In [None]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import h5py

from mpl_toolkits.mplot3d import Axes3D

import sys
sys.path.append("/home/habjan.e/TNG/TNG_cluster_dynamics")
import TNG_DA


dirc_path = '/home/habjan.e/'
sys.path.append(dirc_path + 'TNG/Codes')
import iapi_TNG as iapi

dirc = dirc_path + 'TNG/TNG_workshop/'
sim = 'TNG300-1'

baseUrl = 'http://www.tng-project.org/api/'
r=iapi.get(baseUrl)
TNG_data_path = dirc_path + 'TNG/Data/'

### Pick a cluster index

In [None]:
cluster_ind = 50

### Import subhalo information

In [None]:
pos, vel, groups, subhalo_masses, h, halo_mass = TNG_DA.get_cluster_props(cluster_ind)

### Import particle data

In [None]:
fName = f'/home/habjan.e/TNG/Data/TNG_data/halo_cutouts_dm_{cluster_ind}.hdf5'

with h5py.File(fName, 'r') as f:

    part_coordinates = f['PartType1']['Coordinates'][:]
    velocities = f['PartType1']['Velocities'][:]
    ids = f['PartType1']['ParticleIDs'][:]

masses = np.zeros(part_coordinates.shape[0]) + 5.9 * 10**7

coordinates = TNG_DA.coord_cm_corr(cluster_ind = cluster_ind, coordinates = part_coordinates)

### Import ROCKSTAR pandas df

In [None]:
suffix = '_minp_4661'

output_path = f'/projects/mccleary_group/habjan.e/TNG/Data/rockstar_output/rockstar_subhalos_{cluster_ind}' + suffix + '.list'


column_names = ["id", "pos_0", "pos_1", "pos_2", "pos_3", "pos_4", "pos_5", "mass_grav", "vrms", "vmax", "rvmax", "p_start", "num_p"]
df = pd.read_csv(output_path, sep=r"\s+", comment="#", names=column_names)
df

### Add member lists to pandas df

In [None]:
members_path = f'/projects/mccleary_group/habjan.e/TNG/Data/rockstar_output/rockstar_subhalo_members_{cluster_ind}.list'

members = pd.read_csv(members_path,
                      sep=r"\s+", names=["halo_id","particle_id"])

member_id = np.array(members['particle_id'])
mem_halo_id = np.array(members['halo_id'])

grouped = members.groupby("halo_id")["particle_id"].apply(list)
df["member_ids"] = df["id"].map(grouped)
len(df.loc[np.where((df['num_p'] > 10**3) & (df['num_p'] < 10**4))[0]])

### Look at the substructure along some slice of the z-axis

In [None]:
fig = plt.figure()
ax  = fig.add_subplot(111)

z_low, z_up = -100, 100

sub_x_pos = np.array(df.loc[:, 'pos_0']) * 10**-3
sub_y_pos = np.array(df.loc[:, 'pos_1']) * 10**-3
num_p = np.array(df.loc[:, 'num_p'])

sub_mask = np.where((df['pos_2'] > z_low) & (df['pos_2'] < z_up))[0]

for i in sub_mask:

    ax.scatter(sub_x_pos[i], sub_y_pos[i], s=num_p[i], facecolors='none', edgecolors='k', linewidths=np.log10(num_p[i]) / 7.5, marker = 'o')

# label axes
ax.set_xlabel('X-coordinate [Mpc]')
ax.set_ylabel('Y-coordinate [Mpc]')

min_x, max_x = np.min(sub_x_pos[sub_mask]), np.max(sub_x_pos[sub_mask])
min_y, max_y = np.min(sub_y_pos[sub_mask]), np.max(sub_y_pos[sub_mask])

scale = 1.5

ax.set_xlim(min_x * scale, max_x * scale)
ax.set_ylim(min_y * scale, max_y * scale)

plt.tight_layout()
plt.show()

### Make a histogram of the number particles in a substructure

In [None]:
num_p = np.array(df['num_p'])
num_p_log = np.log10(num_p[num_p > 0])


plt.hist(num_p_log, color='blue', bins = np.linspace(1, 7, 200))

lower_b = np.nanquantile(num_p_log, 0.99)
plt.axvline(lower_b, c='red', linestyle='--')

upper_b = np.nanquantile(num_p_log, 0.9998)
plt.axvline(upper_b, c='green', linestyle='--')

plt.yscale('log')

plt.ylabel('Subhalo Count')
plt.xlabel('TNG particle Count')

### Do a distance calculation to find which subhalo is closest to particles

In [None]:
#k = int(np.mean(np.array([10**upper_b, 10**lower_b])))
#upper_b = int(10**upper_b)
#lower_b = int(10**lower_b)

k = 5000
upper_b = int(10**6)
lower_b = int(10**2)

print(lower_b, upper_b, k)

sub_mem = np.zeros(pos.shape[0])

real_subs = np.array(df['id'][np.where((df['num_p'] > lower_b) & (df['num_p'] < upper_b))[0]])
real_bool = np.isin(mem_halo_id, real_subs)

for i in range(pos.shape[0]):

    dist = np.sqrt((pos[i, 0] - coordinates[real_bool, 0])**2 + (pos[i, 1] - coordinates[real_bool, 1])**2 + (pos[i, 2] - coordinates[real_bool, 2])**2)

    idx = np.argpartition(dist, k-1)[:k]

    k_ids = ids[real_bool][idx]

    sub_mem[i] = np.unique(mem_halo_id[real_bool][np.isin(member_id[real_bool], k_ids)])[0]

### Plot the found subhalos in their respective substructure

In [None]:
fig = plt.figure()
ax  = fig.add_subplot(111, projection='3d')

for i in np.unique(sub_mem):

    plot_bool = np.where(i == sub_mem)[0]

    ax.scatter(
        pos[plot_bool,1],  # x
        pos[plot_bool,2],  # y
        pos[plot_bool,0],  # z
        s=100              
    )

# label axes
ax.set_xlabel('Y [kpc]')
ax.set_ylabel('Z [kpc]')
ax.set_zlabel('X [kpc]')

plt.tight_layout()
plt.show()

### The same plot but in velocity space

In [None]:
fig = plt.figure()
ax  = fig.add_subplot(111, projection='3d')

for i in np.unique(sub_mem):

    plot_bool = np.where(i == sub_mem)[0]

    ax.scatter(
        vel[plot_bool,2],  # x
        vel[plot_bool,0],  # y
        vel[plot_bool,1],  # z
        s=100              
    )

# label axes
ax.set_xlabel(r'$v_{z}$ [$\frac{km}{s}$]')
ax.set_ylabel(r'$v_{x}$ [$\frac{km}{s}$]')
ax.set_zlabel(r'$v_{y}$ [$\frac{km}{s}$]')

plt.tight_layout()
plt.show()

### Overlay rockstar-substructure subhalos on z-axis slice

In [None]:
fig = plt.figure()
ax  = fig.add_subplot(111)

z_low, z_up = -200, 200

# & (df['num_p'] < 10**6) previous upper bound

sub_mask = np.where((df['num_p'] > 10**3.23) & (df['pos_2'] > z_low) & (df['pos_2'] < z_up))[0]

for i in sub_mask:

    ax.scatter(df.loc[i, 'pos_0'], df.loc[i, 'pos_1'], s=df.loc[i, 'num_p'], 
                facecolors='none', edgecolors='k', linewidths=np.log10(df.loc[i, 'num_p']) / 7.5, marker = 'o')


sub_ind = np.where((pos[:, 2] > z_low) & (pos[:, 2] < z_up))[0]

for i in np.unique(sub_mem[sub_ind]):

    plot_bool = np.where(i == sub_mem[sub_ind])[0]

    ax.scatter(pos[sub_ind, :][plot_bool, 0], pos[sub_ind, :][plot_bool, 1])

ax.set_xlabel('X-position [kpc]')
ax.set_ylabel('Y-position [kpc]')

min_x, max_x = np.min(df.loc[sub_mask, 'pos_0']), np.max(df.loc[sub_mask, 'pos_0'])
min_y, max_y = np.min(df.loc[sub_mask, 'pos_1']), np.max(df.loc[sub_mask, 'pos_1'])

scale = 1.5

ax.set_xlim(min_x * scale, max_x * scale)
ax.set_ylim(min_y * scale, max_y * scale)

plt.tight_layout()
plt.show()

# DS+ comparison to ROCKSTAR

### Run DS+

In [None]:
pos_2d, vel_los = TNG_DA.project_3d_to_2d(pos, vel)
dsp_results, C, P = TNG_DA.run_dsp(pos_2d, vel_los, groups, n_sims=5000, Plim_P = 10, Ng_jump=1)
C, P

### Compare Results from distance-based matching

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5), sharey=True)

tng_arr = np.zeros(len(subhalo_masses))
dsp_arr = np.zeros(len(subhalo_masses))

marker_size = 40
gray_size = 15

### TNG subgroups

k = 0

ax1.scatter(0, 0, c='white', alpha=0, label='ROCKSTAR')

for i in np.unique(sub_mem):

    plot_bool = np.where(i == sub_mem)[0]
    
    if len(plot_bool) > 1 and len(plot_bool) < int(np.sqrt(len(subhalo_masses))):
        
        ax1.scatter(pos_2d[plot_bool,0], pos_2d[plot_bool,1], s=marker_size)
        tng_arr[plot_bool] = 1
    
    else:

        ax1.scatter(pos_2d[plot_bool,0], pos_2d[plot_bool,1], s=gray_size, c='gray', alpha =0.4)
        tng_arr[plot_bool] = 2

ax1.legend(loc = 'lower right', fontsize = 12.5)

### DS+ subgroups

sub_grnu, sub_count = np.unique(dsp_results[1][:, 8], return_counts=True)
sub_grnu_arr = dsp_results[1][:, 8]

for i in range(len(sub_grnu)):

    if i == 0:

        ax2.scatter(0, 0, c='white', alpha=0, label='DS+')

    group_dsp_arr = np.where(sub_grnu_arr == sub_grnu[i])[0]
    x = dsp_results[1][group_dsp_arr, 9]
    y = dsp_results[1][group_dsp_arr, 10]

    if sub_count[i] > 1 and sub_count[i] < int(np.sqrt(len(subhalo_masses))):

        ax2.scatter(x, y, s=marker_size)
        dsp_arr[group_dsp_arr] = 1
    
    else:

        ax2.scatter(x, y, s=gray_size, c='gray', alpha =0.4)
        dsp_arr[group_dsp_arr] = 2

ax2.legend(loc = 'lower right', fontsize = 12.5)

### Plot comparing TNG and DS+ substructure

ax3.scatter(0, 0, c='white', alpha=0, label='ROCKSTAR & DS+')

# DS+ correctly identifies a galaxy that is part of substructure
bool_corr = (tng_arr == 1) & (dsp_arr == 1)
ax3.scatter(pos_2d[bool_corr, 0], pos_2d[bool_corr, 1], c = 'green' , s=marker_size)

# Galaxies that are not part of substructure and where not identified by DS +
bool_indiff = (tng_arr == 2) & (dsp_arr == 2)
ax3.scatter(pos_2d[bool_indiff, 0], pos_2d[bool_indiff, 1], s=gray_size, c='gray', alpha =0.4)

# Galaxies that are part of substructure but were not identified by DS+
bool_noid = (tng_arr == 1) & (dsp_arr == 2)
ax3.scatter(pos_2d[bool_noid, 0], pos_2d[bool_noid, 1], c = 'black', s=marker_size)

# Galaxies not part of substructure but were identified by DS+
bool_wrong = (tng_arr == 2) & (dsp_arr == 1)
ax3.scatter(pos_2d[bool_wrong, 0], pos_2d[bool_wrong, 1], c = 'red', s=marker_size)

ax3.legend(loc = 'lower right', fontsize = 12.5)

### Adjust layout and show plot

#fig.suptitle(f'TNG Cluster Index: {cluster_TNG} | Orientation: {view_vector}')
fig.supxlabel(r'X - X$_0$ [kpc]', fontsize = 17.5, y = -0.02)
fig.supylabel(r'Y - Y$_0$ [kpc]', fontsize = 17.5, x = 0.07)

fig.subplots_adjust(wspace=0)

plt.show()

### Import TNG subhalo data that is necessary to do subhalo-based classification

In [None]:
SubhaloLenType = iapi.getSubhaloField('SubhaloLenType', simulation=sim, snapshot=99, fileName=TNG_data_path+'TNG_data/'+sim+'_SubhaloLenType', rewriteFile=0)
Group_num = iapi.getSubhaloField('SubhaloGrNr', simulation=sim, fileName=TNG_data_path+'TNG_data/'+sim+'_SubhaloGrNr', rewriteFile=0)
SubhaloStellarPhotometrics = iapi.getSubhaloField('SubhaloStellarPhotometrics', snapshot=99, simulation=sim, fileName=TNG_data_path+'TNG_data/'+sim+'_SubhaloStellarPhotometrics', rewriteFile=0)

### To look at subhalos with no DM particles
SubhaloMassType = iapi.getSubhaloField('SubhaloMassType', snapshot=99, simulation=sim, fileName=TNG_data_path+'TNG_data/'+sim+'_SubhaloMassType', rewriteFile=0)

sub_ind = np.where(Group_num == cluster_ind)[0]

sub_lens, sub_photo, sub_type = SubhaloLenType[sub_ind, :], SubhaloStellarPhotometrics[sub_ind, :], SubhaloMassType[sub_ind, :]

mag_cut = -18
bright_ind = sub_photo[:, 4] < mag_cut

sub_lens_type, sub_mass_type = sub_lens[bright_ind, :], sub_type[bright_ind, :]

### Compare the number of particles in TNG subhalos with ROCKSTAR subhalos

In [None]:
part_mass = 5.9 * 10**7

num_p = np.array(df['num_p'])
num_p_log = np.log10(num_p[num_p > 0] * part_mass)

sub_lens_log = np.log10(sub_lens[sub_lens > 0] * part_mass)

plt.hist(num_p_log, color='blue', bins = np.linspace(8, 15, 100), label = 'ROCKSTAR', alpha = 0.9)
plt.hist(sub_lens_log, color='red', bins = np.linspace(8, 15, 100), label = 'SUBFIND', alpha = 0.7)

lower_b = np.nanquantile(num_p_log, 0.99)
#plt.axvline(lower_b, c='red', linestyle='--')

upper_b = np.nanquantile(num_p_log, 0.9998)
#plt.axvline(upper_b, c='green', linestyle='--')

plt.yscale('symlog', linthresh=10)

plt.ylabel('Subhalo Count', fontsize = 20)
plt.xlabel(r'Subhalo Mass [$M_{\odot}$]', fontsize = 20)

plt.legend(loc='upper right')

### Code to sort subhalos into a substructure

In [None]:
sub_mem_len = np.zeros(pos.shape[0])

part_count = 0
bright_sub = 0

for i in range(sub_lens[:, 1].shape[0]):

    sub_parts_i = sub_lens[i, 1]

    if sub_photo[i, 4] < mag_cut:

        sub_parts = ids[int(part_count) : int(part_count + sub_parts_i)]
        part_count += sub_parts_i

        sub_bool = np.isin(member_id, sub_parts)
        sub_members = mem_halo_id[sub_bool]

        try: 

            sub_mem_len[bright_sub] = np.unique(sub_members)[0]
        
        except: 

            sub_mem_len[bright_sub] = np.nan ### This exception is solely for subhalos with no particles associated with it

        bright_sub += 1

        if bright_sub + 1 == pos.shape[0]:
            break

    else:

        part_count += sub_parts_i

        continue

### What are the masses of subhalos that do not have any DM particles

In [None]:
sub_mass_type[np.isnan(sub_mem_len), :]

### Where do this subhalos lie in the g-band distribution? 

In [None]:
plt.hist(sub_photo[:, 4], color='blue', bins = np.linspace(-25, -10, 100))

plt.axvline(-18, c='red', linestyle = '--', label = '-18 Magnitude Cut')

bright_g = sub_photo[bright_ind, 4]

for i in range(len(bright_g[np.isnan(sub_mem_len)])):

    if i == 0:

        plt.axvline(bright_g[np.isnan(sub_mem_len)][i], c='green', linestyle = '--', label = 'DMless Subhalo')
    
    else: 

        plt.axvline(bright_g[np.isnan(sub_mem_len)][i], c='green', linestyle = '--')

plt.legend(loc='upper left')

plt.xlabel('Magnitude')
plt.ylabel('Count')

### Print the TNG-based substructure results

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5), sharey=True)

tng_arr = np.zeros(len(subhalo_masses))
dsp_arr = np.zeros(len(subhalo_masses))

marker_size = 40
gray_size = 15

### TNG subgroups

k = 0

ax1.scatter(0, 0, c='white', alpha=0, label='ROCKSTAR')

for i in np.unique(sub_mem_len):

    plot_bool = np.where(i == sub_mem_len)[0]
    
    if len(plot_bool) > 1 and len(plot_bool) < int(np.sqrt(len(subhalo_masses))):
        
        ax1.scatter(pos_2d[plot_bool,0], pos_2d[plot_bool,1], s=marker_size)
        tng_arr[plot_bool] = 1
        ax1.scatter(df['pos_0'].loc[i], df['pos_1'].loc[i], s = marker_size) #df['num_p'].loc[i])
    
    else:

        ax1.scatter(pos_2d[plot_bool,0], pos_2d[plot_bool,1], s=gray_size, c='gray', alpha =0.4)
        tng_arr[plot_bool] = 2

ax1.legend(loc = 'lower right', fontsize = 12.5)

### DS+ subgroups

sub_grnu, sub_count = np.unique(dsp_results[1][:, 8], return_counts=True)
sub_grnu_arr = dsp_results[1][:, 8]

for i in range(len(sub_grnu)):

    if i == 0:

        ax2.scatter(0, 0, c='white', alpha=0, label='DS+')

    group_dsp_arr = np.where(sub_grnu_arr == sub_grnu[i])[0]
    x = dsp_results[1][group_dsp_arr, 9]
    y = dsp_results[1][group_dsp_arr, 10]

    if sub_count[i] > 1 and sub_count[i] < int(np.sqrt(len(subhalo_masses))):

        ax2.scatter(x, y, s=marker_size)
        dsp_arr[group_dsp_arr] = 1
    
    else:

        ax2.scatter(x, y, s=gray_size, c='gray', alpha =0.4)
        dsp_arr[group_dsp_arr] = 2

ax2.legend(loc = 'lower right', fontsize = 12.5)

### Plot comparing TNG and DS+ substructure

ax3.scatter(0, 0, c='white', alpha=0, label='ROCKSTAR & DS+')

# DS+ correctly identifies a galaxy that is part of substructure
bool_corr = (tng_arr == 1) & (dsp_arr == 1)
ax3.scatter(pos_2d[bool_corr, 0], pos_2d[bool_corr, 1], c = 'green' , s=marker_size)

# Galaxies that are not part of substructure and where not identified by DS +
bool_indiff = (tng_arr == 2) & (dsp_arr == 2)
ax3.scatter(pos_2d[bool_indiff, 0], pos_2d[bool_indiff, 1], s=gray_size, c='gray', alpha =0.4)

# Galaxies that are part of substructure but were not identified by DS+
bool_noid = (tng_arr == 1) & (dsp_arr == 2)
ax3.scatter(pos_2d[bool_noid, 0], pos_2d[bool_noid, 1], c = 'black', s=marker_size)

# Galaxies not part of substructure but were identified by DS+
bool_wrong = (tng_arr == 2) & (dsp_arr == 1)
ax3.scatter(pos_2d[bool_wrong, 0], pos_2d[bool_wrong, 1], c = 'red', s=marker_size)

ax3.legend(loc = 'lower right', fontsize = 12.5)

### Adjust layout and show plot

#fig.suptitle(f'TNG Cluster Index: {cluster_TNG} | Orientation: {view_vector}')
fig.supxlabel(r'X - X$_0$ [kpc]', fontsize = 17.5, y = -0.02)
fig.supylabel(r'Y - Y$_0$ [kpc]', fontsize = 17.5, x = 0.07)

fig.subplots_adjust(wspace=0)

plt.show()

### Most bound particle identification

In [None]:
sub_mem_bound = np.zeros(pos.shape[0])

part_count = 0
bright_sub = 0

for i in range(sub_lens[:, 1].shape[0]):

    sub_parts_i = sub_lens[i, 1]

    if sub_photo[i, 4] < mag_cut:

        sub_parts = ids[int(part_count) : int(part_count + sub_parts_i)]
        part_count += sub_parts_i

        sub_bool = np.isin(member_id, sub_parts)
        sub_members = mem_halo_id[sub_bool]

        try: 

            sub_mem_bound[bright_sub] = sub_members[0]
        
        except: 

            sub_mem_bound[bright_sub] = np.nan ### This exception is solely for subhalos with no particles associated with it

        bright_sub += 1

        if bright_sub + 1 == pos.shape[0]:
            break

    else:

        part_count += sub_parts_i

        continue

### Most bound particle plot

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5), sharey=True)

tng_arr = np.zeros(len(subhalo_masses))
dsp_arr = np.zeros(len(subhalo_masses))

marker_size = 40
gray_size = 15

### TNG subgroups

k = 0

ax1.scatter(0, 0, c='white', alpha=0, label='ROCKSTAR')

for i in np.unique(sub_mem_bound):

    plot_bool = np.where(i == sub_mem_bound)[0]
    
    if len(plot_bool) > 1 and len(plot_bool) < int(np.sqrt(len(subhalo_masses))):
        
        ax1.scatter(pos_2d[plot_bool,0], pos_2d[plot_bool,1], s=marker_size)
        tng_arr[plot_bool] = 1
        ax1.scatter(df['pos_0'].loc[i], df['pos_1'].loc[i], s = marker_size) #df['num_p'].loc[i])
    
    else:

        ax1.scatter(pos_2d[plot_bool,0], pos_2d[plot_bool,1], s=gray_size, c='gray', alpha =0.4)
        tng_arr[plot_bool] = 2

ax1.legend(loc = 'lower right', fontsize = 12.5)

### DS+ subgroups

sub_grnu, sub_count = np.unique(dsp_results[1][:, 8], return_counts=True)
sub_grnu_arr = dsp_results[1][:, 8]

for i in range(len(sub_grnu)):

    if i == 0:

        ax2.scatter(0, 0, c='white', alpha=0, label='DS+')

    group_dsp_arr = np.where(sub_grnu_arr == sub_grnu[i])[0]
    x = dsp_results[1][group_dsp_arr, 9]
    y = dsp_results[1][group_dsp_arr, 10]

    if sub_count[i] > 1 and sub_count[i] < int(np.sqrt(len(subhalo_masses))):

        ax2.scatter(x, y, s=marker_size)
        dsp_arr[group_dsp_arr] = 1
    
    else:

        ax2.scatter(x, y, s=gray_size, c='gray', alpha =0.4)
        dsp_arr[group_dsp_arr] = 2

ax2.legend(loc = 'lower right', fontsize = 12.5)

### Plot comparing TNG and DS+ substructure

ax3.scatter(0, 0, c='white', alpha=0, label='ROCKSTAR & DS+')

# DS+ correctly identifies a galaxy that is part of substructure
bool_corr = (tng_arr == 1) & (dsp_arr == 1)
ax3.scatter(pos_2d[bool_corr, 0], pos_2d[bool_corr, 1], c = 'green' , s=marker_size)

# Galaxies that are not part of substructure and where not identified by DS +
bool_indiff = (tng_arr == 2) & (dsp_arr == 2)
ax3.scatter(pos_2d[bool_indiff, 0], pos_2d[bool_indiff, 1], s=gray_size, c='gray', alpha =0.4)

# Galaxies that are part of substructure but were not identified by DS+
bool_noid = (tng_arr == 1) & (dsp_arr == 2)
ax3.scatter(pos_2d[bool_noid, 0], pos_2d[bool_noid, 1], c = 'black', s=marker_size)

# Galaxies not part of substructure but were identified by DS+
bool_wrong = (tng_arr == 2) & (dsp_arr == 1)
ax3.scatter(pos_2d[bool_wrong, 0], pos_2d[bool_wrong, 1], c = 'red', s=marker_size)

ax3.legend(loc = 'lower right', fontsize = 12.5)

### Adjust layout and show plot

#fig.suptitle(f'TNG Cluster Index: {cluster_TNG} | Orientation: {view_vector}')
fig.supxlabel(r'X - X$_0$ [kpc]', fontsize = 17.5, y = -0.02)
fig.supylabel(r'Y - Y$_0$ [kpc]', fontsize = 17.5, x = 0.07)

fig.subplots_adjust(wspace=0)

plt.show()

# BAHAMAS comparisons

### Import BAHAMAS data

In [None]:
bahamas_cl_id = 10

column_names = ["id", "pos_0", "pos_1", "pos_2", "pos_3", "pos_4", "pos_5", "mass_grav", "vrms", "vmax", "rvmax", "p_start", "num_p"]
path = '/projects/mccleary_group/habjan.e/TNG/Data/rockstar_output'
bahamas_path = '/projects/mccleary_group/habjan.e/TNG/Data/BAHAMAS_data/dataGiuliaCerini/'

output_path = path + f'/bahamas_rockstar_subhalos_CDMb_{bahamas_cl_id}.list'
df_CDMb = pd.read_csv(output_path, sep=r"\s+", comment="#", names=column_names)
members_path = path + f'/bahamas_rockstar_subhalo_members_CDMb_{bahamas_cl_id}.list'
members_CDMb = pd.read_csv(members_path, sep=r"\s+", names=["halo_id","particle_id"])
data_CDMb = np.load(bahamas_path + f"CDMb/GrNm_0{bahamas_cl_id}.npz")

output_path = path + f'/bahamas_rockstar_subhalos_SIDM0.1b_{bahamas_cl_id}.list'
df_SIDM01b = pd.read_csv(output_path, sep=r"\s+", comment="#", names=column_names)
members_path = path + f'/bahamas_rockstar_subhalo_members_SIDM0.1b_{bahamas_cl_id}.list'
members_SIDM01b = pd.read_csv(members_path, sep=r"\s+", names=["halo_id","particle_id"])
data_SIDM01b = np.load(bahamas_path + f"SIDM0.1b/GrNm_0{bahamas_cl_id}.npz")

output_path = path + f'/bahamas_rockstar_subhalos_SIDM0.3b_{bahamas_cl_id}.list'
df_SIDM03b = pd.read_csv(output_path, sep=r"\s+", comment="#", names=column_names)
members_path = path + f'/bahamas_rockstar_subhalo_members_SIDM0.3b_{bahamas_cl_id}.list'
members_SIDM03b = pd.read_csv(members_path, sep=r"\s+", names=["halo_id","particle_id"])
data_SIDM03b = np.load(bahamas_path + f"SIDM0.3b/GrNm_0{bahamas_cl_id}.npz")

output_path = path + f'/bahamas_rockstar_subhalos_SIDM1b_{bahamas_cl_id}.list'
df_SIDM1b = pd.read_csv(output_path, sep=r"\s+", comment="#", names=column_names)
members_path = path + f'/bahamas_rockstar_subhalo_members_SIDM1b_{bahamas_cl_id}.list'
members_SIDM1b = pd.read_csv(members_path, sep=r"\s+", names=["halo_id","particle_id"])
data_SIDM1b = np.load(bahamas_path + f"SIDM1b/GrNm_0{bahamas_cl_id}.npz")

output_path = path + f'/bahamas_rockstar_subhalos_vdSIDMb_{bahamas_cl_id}.list'
df_vdSIDMb = pd.read_csv(output_path, sep=r"\s+", comment="#", names=column_names)
members_path = path + f'/bahamas_rockstar_subhalo_members_vdSIDMb_{bahamas_cl_id}.list'
members_vdSIDMb = pd.read_csv(members_path, sep=r"\s+", names=["halo_id","particle_id"])
data_vdSIDMb = np.load(bahamas_path + f"vdSIDMb/GrNm_0{bahamas_cl_id}.npz")

### Function for making a binned scatter plot

In [None]:
def binned_mass_function(x, bins, mode="per_host", n_hosts=1, use_log=True):
    """
    x: values to bin (e.g., log10 M if use_log=True, else M)
    bins: shared bin edges for all datasets (in same units as x)
    mode: "pdf", "per_host", or "cumulative"
    n_hosts: number of host halos represented by this sample (>=1)
    use_log: if True, interprets x & bins as log10; densities are per dex

    Returns: x_center, x_err, y, yerr_lo, yerr_hi
    """
    x = np.asarray(x)
    counts, _ = np.histogram(x, bins=bins)
    widths = np.diff(bins)
    centers = 0.5 * (bins[:-1] + bins[1:])
    xerr = 0.5 * widths

    # Gehrels (1986) approx for 68% CL Poisson errors (asymmetric)
    n = counts.astype(float)
    err_lo_counts = n - np.where(n>0, (np.sqrt(n + 0.75) - 1.0)**2, 0.0)
    err_hi_counts = (np.sqrt(n + 0.75) + 1.0)**2 - n

    if mode == "pdf":
        scale = n.sum()
        y = counts / (scale * widths) if scale > 0 else counts*0.0
        ylo = err_lo_counts / (scale * widths) if scale > 0 else counts*0.0
        yhi = err_hi_counts / (scale * widths) if scale > 0 else counts*0.0
    elif mode == "per_host":
        # differential mass function per host per dex (or per linear unit if use_log=False)
        denom = max(n_hosts, 1)
        y   = counts / (denom * widths)
        ylo = err_lo_counts / (denom * widths)
        yhi = err_hi_counts / (denom * widths)
    elif mode == "cumulative":
        # N(>x) per host; place at left edges’ centers for plotting
        c = counts[::-1].cumsum()[::-1]
        y   = c / max(n_hosts, 1)
        # simple symmetric sqrt(N) (or carry Gehrels cumulatives if desired)
        ylo = np.sqrt(c) / max(n_hosts, 1)
        yhi = ylo
        # for cumulative, xerr isn’t very meaningful:
        xerr = np.zeros_like(centers)
    else:
        raise ValueError("mode must be 'pdf', 'per_host', or 'cumulative'")

    # return arrays with only bins that had any data or keep all (here we keep all)
    return centers, xerr, y, ylo, yhi


### Compare the SHMF between different simulations

In [None]:
num_bins = 25
bins = np.linspace(10.5, 12, num_bins)

num_p = np.array(df_CDMb['num_p'])
mass_cdm = num_p * 5.5 * 10**9
mass_log_cdm = np.log10(mass_cdm[mass_cdm > 0])

num_p = np.array(df_vdSIDMb['num_p'])
mass_vdSIDM = num_p * 5.5 * 10**9
mass_log_vdSIDM = np.log10(mass_vdSIDM[mass_vdSIDM > 0])

num_p = np.array(df['num_p'])
mass_tng = num_p * 5.9 * 10**7
mass_log_tng = np.log10(mass_tng[mass_tng > 0])

n_hosts = mass_log_cdm.shape[0] + mass_log_vdSIDM.shape[0] + mass_log_tng.shape[0]

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5), sharey=False)
plt.subplots_adjust(wspace=0.25)

### Left plot: cumulative mass function

for data, color, label in [
    (mass_log_cdm,   'red',   'BAHAMAS CDM'),
    (mass_log_vdSIDM,'blue',  'BAHAMAS vdSIDM'),
    (mass_log_tng,    'green', 'TNG CDM'),
]:
    x_mean, x_std, y, y_err, _ = binned_mass_function(data, bins, mode="cumulative", n_hosts=n_hosts)
    # points with both horizontal (std in bin) and vertical (Poisson on density) errors
    ax1.errorbar(x_mean, y, xerr=x_std, yerr=y_err,
                 fmt='o', ms=3, capsize=2, elinewidth=1,
                 color=color, label=label, linestyle='none', alpha=0.9)

### Right plot: per-host mass function

for data, color, label in [
    (mass_log_cdm,   'red',   'BAHAMAS CDM'),
    (mass_log_vdSIDM,'blue',  'BAHAMAS vdSIDM'),
    (mass_log_tng,    'green', 'TNG CDM'),
]:
    x_mean, x_std, y, y_err, _ = binned_mass_function(data, bins, mode="per_host", n_hosts=n_hosts)
    # points with both horizontal (std in bin) and vertical (Poisson on density) errors
    ax2.errorbar(x_mean, y, xerr=x_std, yerr=y_err,
                 fmt='o', ms=3, capsize=2, elinewidth=1,
                 color=color, label=label, linestyle='none', alpha=0.9)

ax1.set_yscale('log')
ax2.set_yscale('log')

ax1.set_ylabel('N(> $\log(M_{\odot})$)', fontsize = 15)
ax1.set_xlabel(r'Subhalo Mass [$\log(M_{\odot})$]', fontsize = 15)

ax2.set_ylabel(r'$\frac{\mathrm{d} N_{bin}}{\mathrm{d}\log(M_{\odot})}$ $(N_{SH})^{-1}$', fontsize = 15)
ax2.set_xlabel(r'Subhalo Mass [$\log(M_{\odot})$]', fontsize = 15)

### Center of mass velocity for different simulations

In [None]:
num_bins = 25
bins = np.linspace(300, 3000, num_bins)

vel_sum_CDMb   = np.array(df_CDMb['pos_3'])**2   + np.array(df_CDMb['pos_4'])**2   + np.array(df_CDMb['pos_5'])**2
vel_com_CDMb   = np.sqrt(vel_sum_CDMb)

vel_sum_vdSIDMb = np.array(df_vdSIDMb['pos_3'])**2 + np.array(df_vdSIDMb['pos_4'])**2 + np.array(df_vdSIDMb['pos_5'])**2
vel_com_vdSIDMb = np.sqrt(vel_sum_vdSIDMb)

vel_sum_TNG = np.array(df['pos_3'])**2 + np.array(df['pos_4'])**2 + np.array(df['pos_5'])**2
vel_com_TNG = np.sqrt(vel_sum_TNG)

# --- make figure ---
plt.figure(figsize=(7,5))

for data, color, label in [
    (vel_com_CDMb,   'red',   'BAHAMAS CDM'),
    (vel_com_vdSIDMb,'blue',  'BAHAMAS vdSIDM'),
    (vel_com_TNG,    'green', 'TNG CDM'),
]:
    x_mean, x_std, y, y_err = binned_points_with_errors(data, bins)
    # points with both horizontal (std in bin) and vertical (Poisson on density) errors
    plt.errorbar(x_mean, y, xerr=x_std, yerr=y_err,
                 fmt='o', ms=3, capsize=2, elinewidth=1,
                 color=color, label=label, linestyle='none', alpha=0.9)

plt.yscale('log')
plt.xlabel(r'Subhalo COM velocity [$\mathrm{km}\ \mathrm{s}^{-1}$]')
plt.ylabel(r'Probability density')
plt.legend()
plt.tight_layout()
plt.show()


### Create Velocity Dispersion arrays for each simulation

In [None]:
vrms_TNG = np.array(df['vrms'])
vrms_CDMb = np.array(df_CDMb['vrms'])
vrms_vdSIDMb = np.array(df_vdSIDMb['vrms'])

### Make a VSHD

In [None]:
num_bins = 25
bins = np.linspace(0, 500, num_bins)

n_hosts = vrms_TNG.shape[0] + vrms_CDMb.shape[0] + vrms_vdSIDMb.shape[0]

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5), sharey=False)
plt.subplots_adjust(wspace=0.25)

### Left plot: cumulative mass function

for data, color, label in [
    (vrms_CDMb,   'red',   'BAHAMAS CDM'),
    (vrms_vdSIDMb,'blue',  'BAHAMAS vdSIDM'),
    (vrms_TNG,    'green', 'TNG CDM'),
]:
    x_mean, x_std, y, y_err, _ = binned_mass_function(data, bins, mode="cumulative", n_hosts=n_hosts)
    # points with both horizontal (std in bin) and vertical (Poisson on density) errors
    ax1.errorbar(x_mean, y, xerr=x_std, yerr=y_err,
                 fmt='o', ms=3, capsize=2, elinewidth=1,
                 color=color, label=label, linestyle='none', alpha=0.9)

### Right plot: per-host mass function

for data, color, label in [
    (vrms_CDMb,   'red',   'BAHAMAS CDM'),
    (vrms_vdSIDMb,'blue',  'BAHAMAS vdSIDM'),
    (vrms_TNG,    'green', 'TNG CDM'),
]:
    x_mean, x_std, y, y_err, _ = binned_mass_function(data, bins, mode="per_host", n_hosts=n_hosts)
    # points with both horizontal (std in bin) and vertical (Poisson on density) errors
    ax2.errorbar(x_mean, y, xerr=x_std, yerr=y_err,
                 fmt='o', ms=3, capsize=2, elinewidth=1,
                 color=color, label=label, linestyle='none', alpha=0.9)

ax1.set_yscale('log')
ax2.set_yscale('log')

ax1.set_ylabel('N(> $\log(M_{\odot})$)', fontsize = 15)
ax1.set_xlabel(r'Subhalo $\sigma_v$ [$km$ $s^{-1}$]', fontsize = 15)

ax2.set_ylabel(r'$\frac{\mathrm{d} N_{bin}}{\mathrm{d}\log(M_{\odot})}$ $(N_{SH})^{-1}$', fontsize = 15)
ax2.set_xlabel(r'Subhalo $\sigma_v$ [$km$ $s^{-1}$]', fontsize = 15)

### Ciruclar velocity arrays

In [None]:
#vmax_TNG = np.array(df['vmax'])
vmax_CDMb = np.array(df_CDMb['vmax'])
vmax_vdSIDMb = np.array(df_vdSIDMb['vmax'])

### Comparing Cricular velocity

In [None]:
num_bins = 25
bins = np.linspace(0, 500, num_bins)

#n_hosts = vmax_TNG.shape[0] + vmax_CDMb.shape[0] + vmax_vdSIDMb.shape[0]
n_hosts = vmax_CDMb.shape[0] + vmax_vdSIDMb.shape[0]

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5), sharey=False)
plt.subplots_adjust(wspace=0.25)

### Left plot: cumulative mass function

for data, color, label in [
    (vmax_CDMb,   'red',   'BAHAMAS CDM'),
    (vmax_vdSIDMb,'blue',  'BAHAMAS vdSIDM'),
    #(vmax_TNG,    'green', 'TNG CDM'),
]:
    x_mean, x_std, y, y_err, _ = binned_mass_function(data, bins, mode="cumulative", n_hosts=n_hosts)
    # points with both horizontal (std in bin) and vertical (Poisson on density) errors
    ax1.errorbar(x_mean, y, xerr=x_std, yerr=y_err,
                 fmt='o', ms=3, capsize=2, elinewidth=1,
                 color=color, label=label, linestyle='none', alpha=0.9)

### Right plot: per-host mass function

for data, color, label in [
    (vmax_CDMb,   'red',   'BAHAMAS CDM'),
    (vmax_vdSIDMb,'blue',  'BAHAMAS vdSIDM'),
    #(vmax_TNG,    'green', 'TNG CDM'),
]:
    x_mean, x_std, y, y_err, _ = binned_mass_function(data, bins, mode="per_host", n_hosts=n_hosts)
    # points with both horizontal (std in bin) and vertical (Poisson on density) errors
    ax2.errorbar(x_mean, y, xerr=x_std, yerr=y_err,
                 fmt='o', ms=3, capsize=2, elinewidth=1,
                 color=color, label=label, linestyle='none', alpha=0.9)

ax1.set_yscale('log')
ax2.set_yscale('log')

ax1.set_ylabel('N(> $\log(M_{\odot})$)', fontsize = 15)
ax1.set_xlabel(r'Subhalo $V_{max}$ [$km$ $s^{-1}$]', fontsize = 15)

ax2.set_ylabel(r'$\frac{\mathrm{d} N_{bin}}{\mathrm{d}\log(M_{\odot})}$ $(N_{SH})^{-1}$', fontsize = 15)
ax2.set_xlabel(r'Subhalo $V_{max}$ [$km$ $s^{-1}$]', fontsize = 15)

### View some substructure in a simulation

In [None]:
fig = plt.figure()
ax  = fig.add_subplot(111)

z_low, z_up = -100, 100

dm_model = df_vdSIDMb

sub_mask = np.where((dm_model['pos_2'] > z_low) & (dm_model['pos_2'] < z_up))[0]

for i in sub_mask:

    ax.scatter(dm_model.loc[i, 'pos_0'], dm_model.loc[i, 'pos_1'], s=dm_model.loc[i, 'num_p'], 
                facecolors='none', edgecolors='k', linewidths=np.log10(dm_model.loc[i, 'num_p']) / 7.5, marker = 'o')

ax.set_xlabel('X-position [Mpc]')
ax.set_ylabel('Y-position [Mpc]')

min_x, max_x = np.min(dm_model.loc[sub_mask, 'pos_0']), np.max(dm_model.loc[sub_mask, 'pos_0'])
min_y, max_y = np.min(dm_model.loc[sub_mask, 'pos_1']), np.max(dm_model.loc[sub_mask, 'pos_1'])

scale = 1.5

ax.set_xlim(min_x * scale, max_x * scale)
ax.set_ylim(min_y * scale, max_y * scale)

plt.tight_layout()
plt.show()

# Compare BAHAMAS clusters

### Import additional bahamas run

In [None]:
bahamas_cl_id = '10'
suffix = '_min_200part'

column_names = ["id", "pos_0", "pos_1", "pos_2", "pos_3", "pos_4", "pos_5", "mass_grav", "vrms", "p_start", "num_p"]
path = '/projects/mccleary_group/habjan.e/TNG/Data/rockstar_output'
bahamas_path = '/projects/mccleary_group/habjan.e/TNG/Data/BAHAMAS_data/dataGiuliaCerini/'

output_path = path + f'/bahamas_rockstar_subhalos_CDMb_' + bahamas_cl_id + suffix + '.list'
df_CDMb_09 = pd.read_csv(output_path, sep=r"\s+", comment="#", names=column_names)
members_path = path + f'/bahamas_rockstar_subhalo_members_CDMb_' + bahamas_cl_id + suffix + '.list'
members_CDMb_09 = pd.read_csv(members_path, sep=r"\s+", names=["halo_id","particle_id"])
data_CDMb_09 = np.load(bahamas_path + f"CDMb/GrNm_0" + bahamas_cl_id + ".npz")

output_path = path + f'/bahamas_rockstar_subhalos_SIDM0.1b_' + bahamas_cl_id + suffix + '.list'
df_SIDM01b_09 = pd.read_csv(output_path, sep=r"\s+", comment="#", names=column_names)
members_path = path + f'/bahamas_rockstar_subhalo_members_SIDM0.1b_' + bahamas_cl_id + suffix + '.list'
members_SIDM01b_09 = pd.read_csv(members_path, sep=r"\s+", names=["halo_id","particle_id"])
data_SIDM01b_09 = np.load(bahamas_path + f"SIDM0.1b/GrNm_0" + bahamas_cl_id + ".npz")

output_path = path + f'/bahamas_rockstar_subhalos_SIDM0.3b_' + bahamas_cl_id + suffix + '.list'
df_SIDM03b_09 = pd.read_csv(output_path, sep=r"\s+", comment="#", names=column_names)
members_path = path + f'/bahamas_rockstar_subhalo_members_SIDM0.3b_' + bahamas_cl_id + suffix + '.list'
members_SIDM03b_09 = pd.read_csv(members_path, sep=r"\s+", names=["halo_id","particle_id"])
data_SIDM03b_09 = np.load(bahamas_path + f"SIDM0.3b/GrNm_0" + bahamas_cl_id + ".npz")

output_path = path + f'/bahamas_rockstar_subhalos_SIDM1b_' + bahamas_cl_id + suffix + '.list'
df_SIDM1b_09 = pd.read_csv(output_path, sep=r"\s+", comment="#", names=column_names)
members_path = path + f'/bahamas_rockstar_subhalo_members_SIDM1b_' + bahamas_cl_id + suffix + '.list'
members_SIDM1b_09 = pd.read_csv(members_path, sep=r"\s+", names=["halo_id","particle_id"])
data_SIDM1b_09 = np.load(bahamas_path + f"SIDM1b/GrNm_0" + bahamas_cl_id + ".npz")

output_path = path + f'/bahamas_rockstar_subhalos_vdSIDMb_' + bahamas_cl_id + suffix + '.list'
df_vdSIDMb_09 = pd.read_csv(output_path, sep=r"\s+", comment="#", names=column_names)
members_path = path + f'/bahamas_rockstar_subhalo_members_vdSIDMb_' + bahamas_cl_id + suffix + '.list'
members_vdSIDMb_09 = pd.read_csv(members_path, sep=r"\s+", names=["halo_id","particle_id"])
data_vdSIDMb_09 = np.load(bahamas_path + f"vdSIDMb/GrNm_0" + bahamas_cl_id + ".npz")

### Plot the cumulative SHMF

In [None]:
num_bins = 25
bins = np.linspace(10.5, 13, num_bins)

num_p = np.array(df_CDMb['num_p'])
mass_cdm_10 = num_p * 5.5 * 10**9
mass_log_cdm_10 = np.log10(mass_cdm_10[mass_cdm_10 > 0])

num_p = np.array(df_CDMb_09['num_p'])
mass_CDMb_09 = num_p * 5.5 * 10**9
mass_log_CDMb_09 = np.log10(mass_CDMb_09[mass_CDMb_09 > 0])

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5), sharey=True)

for data, color, label in [
    (mass_log_cdm_10,   'red',   'BAHAMAS CDM Cluster 10 | min particles: 3'),
    (mass_log_CDMb_09,'blue',  'BAHAMAS CDM Cluster 10 | min particles: 200'),
    (mass_log_tng,    'green', 'TNG CDM Cluster 50'),
]:
    x_mean, x_std, y, y_err, _ = binned_mass_function(data, bins, mode="cumulative", n_hosts=n_hosts)
    # points with both horizontal (std in bin) and vertical (Poisson on density) errors
    #ax1.errorbar(x_mean, y, xerr=x_std, yerr=y_err,
     #            fmt='o', ms=3, capsize=2, elinewidth=1,
      #           color=color, label=label, linestyle='none', alpha=0.9)
    ax1.plot(x_mean, y, color=color, label=label)

num_p = np.array(df_vdSIDMb['num_p'])
mass_vdSIDM_10 = num_p * 5.5 * 10**9
mass_log_vdSIDM_10 = np.log10(mass_vdSIDM_10[mass_vdSIDM_10 > 0])

num_p = np.array(df_vdSIDMb_09['num_p'])
mass_vdSIDM_09 = num_p * 5.5 * 10**9
mass_log_vdSIDM_09 = np.log10(mass_vdSIDM_09[mass_vdSIDM_09 > 0])

n_hosts = mass_log_vdSIDM_10.shape[0] + mass_log_vdSIDM_09.shape[0]

for data, color, label in [
    (mass_log_vdSIDM_10, 'red',   'BAHAMAS vdSIDM Cluster 10 | min particles: 3'),
    (mass_log_vdSIDM_09, 'blue',  'BAHAMAS vdSIDM Cluster 10 | min particles: 200'),
]:
    x_mean, x_std, y, y_err, _ = binned_mass_function(data, bins, mode="cumulative", n_hosts=n_hosts)
    # points with both horizontal (std in bin) and vertical (Poisson on density) errors
    #ax2.errorbar(x_mean, y, xerr=x_std, yerr=y_err,
     #            fmt='o', ms=3, capsize=2, elinewidth=1,
      #           color=color, label=label, linestyle='none', alpha=0.9)
    ax2.plot(x_mean, y, color=color, label=label)

ax1.set_yscale('log')
ax2.set_yscale('log')

ax1.set_ylabel('N(> $\log(M_{\odot})$)', fontsize=15)
ax1.set_xlabel(r'Subhalo Mass [$\log(M_{\odot})$]', fontsize=15)
ax2.set_xlabel(r'Subhalo Mass [$\log(M_{\odot})$]', fontsize=15)

ax1.legend(loc='upper right')
ax2.legend(loc='upper right')

ax1.set_ylim(None, 10**0)

### Plot the differential number density per bin for subhalo mass

In [None]:
num_bins = 15
bins = np.linspace(10, 13, num_bins)

num_p = np.array(df_CDMb['num_p'])
mass_cdm_10 = num_p * 5.5 * 10**9
mass_log_cdm_10 = np.log10(mass_cdm_10[mass_cdm_10 > 0])

num_p = np.array(df_CDMb_09['num_p'])
mass_CDMb_09 = num_p * 5.5 * 10**9
mass_log_CDMb_09 = np.log10(mass_CDMb_09[mass_CDMb_09 > 0])

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5), sharey=True)

for data, color, label in [
    (mass_log_cdm_10,   'red',   'BAHAMAS CDM Cluster 10 | min particles: 3'),
    (mass_log_CDMb_09,'blue',  'BAHAMAS CDM Cluster 10 | min particles: 200'),
    (mass_log_tng,    'green', 'TNG CDM Cluster 50'),
]:
    x_mean, x_std, y, y_err, _ = binned_mass_function(data, bins, mode="per_host", n_hosts=n_hosts)
    # points with both horizontal (std in bin) and vertical (Poisson on density) errors
    ax1.errorbar(x_mean, y, xerr=x_std, yerr=y_err,
                 fmt='o', ms=3, capsize=2, elinewidth=1,
                 color=color, label=label, linestyle='none', alpha=0.9)
    #ax1.plot(x_mean, y, color=color)

num_p = np.array(df_vdSIDMb['num_p'])
mass_vdSIDM_10 = num_p * 5.5 * 10**9
mass_log_vdSIDM_10 = np.log10(mass_vdSIDM_10[mass_vdSIDM_10 > 0])

num_p = np.array(df_vdSIDMb_09['num_p'])
mass_vdSIDM_09 = num_p * 5.5 * 10**9
mass_log_vdSIDM_09 = np.log10(mass_vdSIDM_09[mass_vdSIDM_09 > 0])

n_hosts = mass_log_vdSIDM_10.shape[0] + mass_log_vdSIDM_09.shape[0]

for data, color, label in [
    (mass_log_vdSIDM_10, 'red',   'BAHAMAS vdSIDM Cluster 10 | min particles: 3'),
    (mass_log_vdSIDM_09, 'blue',  'BAHAMAS vdSIDM Cluster 10 | min particles: 200'),
]:
    x_mean, x_std, y, y_err, _ = binned_mass_function(data, bins, mode="per_host", n_hosts=n_hosts)
    # points with both horizontal (std in bin) and vertical (Poisson on density) errors
    ax2.errorbar(x_mean, y, xerr=x_std, yerr=y_err,
                 fmt='o', ms=3, capsize=2, elinewidth=1,
                 color=color, label=label, linestyle='none', alpha=0.9)
    #ax2.plot(x_mean, y, color=color)

ax1.set_yscale('log')
ax2.set_yscale('log')

ax1.set_ylabel(r'$\frac{\mathrm{d} N_{bin}}{\mathrm{d}\log(M_{\odot})}$ $(N_{SH})^{-1}$', fontsize=15)
ax1.set_xlabel(r'Subhalo Mass [$\log(M_{\odot})$]', fontsize=15)
ax2.set_xlabel(r'Subhalo Mass [$\log(M_{\odot})$]', fontsize=15)

ax1.legend(loc='upper right')
ax2.legend(loc='upper right')

ax1.set_ylim(None, 10**1)