In [1]:
import time
import numpy as np
import pandas as pd

import galsim #install with conda install -c conda_forge galsim

import matplotlib.pyplot as plt
from matplotlib import pyplot
import matplotlib.cm as cm
import matplotlib.colors as norm
from matplotlib.gridspec import SubplotSpec
import seaborn as sns

from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn import svm
from sklearn.svm import SVR

from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.model_selection import cross_validate, KFold, StratifiedKFold
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score
from sklearn.inspection import permutation_importance
from sklearn.pipeline import make_pipeline #This allows one to build different steps together
from sklearn.preprocessing import StandardScaler, RobustScaler

from tqdm import tqdm 

import target_predicting_ML_functions_and_feature_ranking as functions
import RF_target_predicting_and_learning_curves_functions as tp_lc_functions

In [2]:
# Raw dataset from TNG300 Notebook v1,
# the version of dataset without non-physical galaxies whose Mstar/Mvir (GalpropNormMstar) > 0.2,
# and with stellar mass cut at log10Mstar>9.0
# and without fdisk<0.02 galaxies
df_not_normalized = pd.read_csv ('TNG300-SAM_images/v1_TNG300-SAM_cleanup_normalize_dataset/TNG300-NewSAM_Raw_Dataset_fromv1_wo_nonphys_mstar9_and_diskgals_w_smallfdisk.csv') 

df_not_normalized = df_not_normalized.loc[:, :]
df_not_normalized.shape

(207467, 59)

In [3]:
# There are 28,062 galaxies with Spin<0.02
df_not_normalized[df_not_normalized.HalopropSpin<0.02].shape

(28062, 59)

In [4]:
# Set all Spin<0.02 equal to 0.02
df_not_normalized.loc[:,'HalopropSpin_effective']=df_not_normalized.loc[:,'HalopropSpin'].apply(lambda x: 0.02 if x<0.02 else x)

In [5]:
df_not_normalized = df_not_normalized.drop(columns=['HalopropSpin'])

In [6]:
# Rename Spin_effective back to Spin because all the rest of the code has this name used
df_not_normalized = df_not_normalized.rename(columns={'HalopropSpin_effective': 'HalopropSpin'})

In [7]:
# Normalized (in better words, dimensionless) dataset from TNG300 Notebook v1: 
# all masses divided by halo mass (Mvir), halfmass radius divided by halo size (Rhalo);
# the version of dataset without non-physical galaxies whose Mstar/Mvir (GalpropNormMstar) > 0.2
# and with stellar mass cut at log10Mstar>9.0
# and without fdisk<0.02 galaxies

# read the same file again
df_normalized_35 = pd.read_csv('TNG300-SAM_images/v1_TNG300-SAM_cleanup_normalize_dataset/TNG300-NewSAM_Normalized_Dataset_fromv1_wo_mstar9_nonphys_and_diskgals_w_smallfdsik.csv')
# Set all Spin<0.02 equal to 0.02
df_normalized_35.loc[:,'HalopropSpin_effective']=df_normalized_35.loc[:,'HalopropSpin'].apply(lambda x: 0.02 if x<0.02 else x)
df_normalized_35 = df_normalized_35.drop(columns=['HalopropSpin'])
# Rename Spin_effective back to Spin because all the rest of the code has this name used
df_normalized_35 = df_normalized_35.rename(columns={'HalopropSpin_effective': 'HalopropSpin'})

df_normalized_35.shape

(207467, 38)

In [8]:
import os
# os.mkdir('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition')

In [9]:
# df_normalized_35.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/v6_TNG300-NewSAM_Normalized_Dataset_fromv1_wo_mstar9_nonphys_and_diskgals_w_smallfdsik_w_spineff.csv', index=False)
# df_not_normalized.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/v6_TNG300-NewSAM_Raw_Dataset_fromv1_wo_nonphys_mstar9_and_diskgals_w_smallfdisk_w_spineff.csv', index=False)

In [11]:
# 52,053 galaxies in df_1
df_1 = df_normalized_35.loc[df_normalized_35.loc[:, 'BulgeMstar_ratio']<=0.10] 
df_1_raw = df_not_normalized.loc[df_not_normalized.loc[:, 'BulgeMstar_ratio']<=0.10] 
# df_1.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_1_Normalized_as_defined_in_TNG300notebook_v6', index=False)
# df_1_raw.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_1_Raw_as_defined_in_TNG300notebook_v6', index=False)
print("df_1.shape", df_1.shape)
print("df_1_raw.shape", df_1_raw.shape)

# 64,494 galaxies in df_2
df_2 = df_normalized_35.loc[(df_normalized_35.BulgeMstar_ratio > 0.10) & (df_normalized_35.BulgeMstar_ratio <= 0.20)] 
df_2_raw = df_not_normalized.loc[(df_not_normalized.BulgeMstar_ratio > 0.10) & (df_not_normalized.BulgeMstar_ratio <= 0.20)] 
# df_2.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_2_Normalized_as_defined_in_TNG300notebook_v6', index=False)
# df_2_raw.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_2_Raw_as_defined_in_TNG300notebook_v6', index=False)
print("df_2.shape", df_2.shape)
print("df_2_raw.shape", df_2_raw.shape)

# 28,372 galaxies in df_3
df_3 = df_normalized_35.loc[(df_normalized_35.BulgeMstar_ratio > 0.20) & (df_normalized_35.BulgeMstar_ratio <= 0.30)] 
df_3_raw = df_not_normalized.loc[(df_not_normalized.BulgeMstar_ratio > 0.20) & (df_not_normalized.BulgeMstar_ratio <= 0.30)] 
# df_3.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_3_Normalized_as_defined_in_TNG300notebook_v6', index=False)
# df_3_raw.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_3_Raw_as_defined_in_TNG300notebook_v6', index=False)
print("df_3.shape", df_3.shape)
print("df_3_raw.shape", df_3_raw.shape)

# 13,582 galaxies in df_4
df_4 = df_normalized_35.loc[(df_normalized_35.BulgeMstar_ratio > 0.30) & (df_normalized_35.BulgeMstar_ratio <= 0.40)] 
df_4_raw = df_not_normalized.loc[(df_not_normalized.BulgeMstar_ratio > 0.30) & (df_not_normalized.BulgeMstar_ratio <= 0.40)] 
# df_4.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_4_Normalized_as_defined_in_TNG300notebook_v6', index=False)
# df_4_raw.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_4_Raw_as_defined_in_TNG300notebook_v6', index=False)
print("df_4.shape", df_4.shape)
print("df_4_raw.shape", df_4_raw.shape)

# 13,539 galaxies in df_5
df_5 = df_normalized_35.loc[(df_normalized_35.BulgeMstar_ratio > 0.40) & (df_normalized_35.BulgeMstar_ratio <= 0.50)] 
df_5_raw = df_not_normalized.loc[(df_not_normalized.BulgeMstar_ratio > 0.40) & (df_not_normalized.BulgeMstar_ratio <= 0.50)] 
# df_5.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_5_Normalized_as_defined_in_TNG300notebook_v6', index=False)
# df_5_raw.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_5_Raw_as_defined_in_TNG300notebook_v6', index=False)
print("df_5.shape", df_5.shape)
print("df_5_raw.shape", df_5_raw.shape)

# ... galaxies in df_6
df_6 = df_normalized_35.loc[(df_normalized_35.BulgeMstar_ratio > 0.50) & (df_normalized_35.BulgeMstar_ratio <= 0.60)] 
df_6_raw = df_not_normalized.loc[(df_not_normalized.BulgeMstar_ratio > 0.50) & (df_not_normalized.BulgeMstar_ratio <= 0.60)] 
# df_6.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_6_Normalized_as_defined_in_TNG300notebook_v6', index=False)
# df_6_raw.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_6_Raw_as_defined_in_TNG300notebook_v6', index=False)
print("df_6.shape", df_6.shape)
print("df_6_raw.shape", df_6_raw.shape)

# ... galaxies in df_7
df_7 = df_normalized_35.loc[(df_normalized_35.BulgeMstar_ratio > 0.60) & (df_normalized_35.BulgeMstar_ratio <= 0.70)] 
df_7_raw = df_not_normalized.loc[(df_not_normalized.BulgeMstar_ratio > 0.60) & (df_not_normalized.BulgeMstar_ratio <= 0.70)] 
# df_7.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_7_Normalized_as_defined_in_TNG300notebook_v6', index=False)
# df_7_raw.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_7_Raw_as_defined_in_TNG300notebook_v6', index=False)
print("df_7.shape", df_7.shape)
print("df_7_raw.shape", df_7_raw.shape)

# ... galaxies in df_8
df_8 = df_normalized_35.loc[(df_normalized_35.BulgeMstar_ratio > 0.70) & (df_normalized_35.BulgeMstar_ratio <= 0.80)] 
df_8_raw = df_not_normalized.loc[(df_not_normalized.BulgeMstar_ratio > 0.70) & (df_not_normalized.BulgeMstar_ratio <= 0.80)] 
# df_8.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_8_Normalized_as_defined_in_TNG300notebook_v6', index=False)
# df_8_raw.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_8_Raw_as_defined_in_TNG300notebook_v6', index=False)
print("df_8.shape", df_8.shape)
print("df_8_raw.shape", df_8_raw.shape)

# 10,040 galaxies in df_9
df_9 = df_normalized_35.loc[df_normalized_35.loc[:, 'BulgeMstar_ratio']>0.80] 
df_9_raw = df_not_normalized.loc[df_not_normalized.loc[:, 'BulgeMstar_ratio']>0.80] 
# df_9.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_9_Normalized_as_defined_in_TNG300notebook_v6', index=False)
# df_9_raw.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_9_Raw_as_defined_in_TNG300notebook_v6', index=False)
print("df_9.shape", df_9.shape)
print("df_9_raw.shape", df_9_raw.shape)

# Combined Disk morphologies 1 to 4 in order to fit one physical model to all of them; 
# 158,501 galaxies in the 1-4 morphologies, out of a total of ~207,000
df_14 = df_normalized_35.loc[df_normalized_35.BulgeMstar_ratio <= 0.40] #normalized
df_14_raw = df_not_normalized.loc[df_not_normalized.BulgeMstar_ratio <= 0.40] 
# df_14.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_14_Normalized_as_defined_in_TNG300notebook_v6', index=False)
# df_14_raw.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_14_Raw_as_defined_in_TNG300notebook_v6', index=False)
print("df_14.shape", df_14.shape)
print("df_14_raw.shape", df_14_raw.shape)

df_1.shape (52053, 38)
df_1_raw.shape (52053, 59)
df_2.shape (64494, 38)
df_2_raw.shape (64494, 59)
df_3.shape (28372, 38)
df_3_raw.shape (28372, 59)
df_4.shape (13582, 38)
df_4_raw.shape (13582, 59)
df_5.shape (13539, 38)
df_5_raw.shape (13539, 59)
df_6.shape (10167, 38)
df_6_raw.shape (10167, 59)
df_7.shape (8367, 38)
df_7_raw.shape (8367, 59)
df_8.shape (6853, 38)
df_8_raw.shape (6853, 59)
df_9.shape (10040, 38)
df_9_raw.shape (10040, 59)
df_14.shape (158501, 38)
df_14_raw.shape (158501, 59)


In [8]:
# Combined Elliptical morphologies 5 to 8 in order to fit one physical model to all of them; 
# 38,926 galaxies in the 5-8 morphologies, out of a total of ~207,000
df_58 = df_normalized_35.loc[(df_normalized_35.BulgeMstar_ratio > 0.40) & (df_normalized_35.BulgeMstar_ratio <= 0.80)] #normalized
df_58_raw = df_not_normalized.loc[(df_not_normalized.BulgeMstar_ratio > 0.40) & (df_not_normalized.BulgeMstar_ratio <= 0.80)] 
# df_58.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_58_Normalized_as_defined_in_TNG300notebook_v6', index=False)
# df_58_raw.to_csv('TNG300-SAM_images/v6_TNG300-SAM_Morphologies_definition/df_58_Raw_as_defined_in_TNG300notebook_v6', index=False)
print("df_58.shape", df_58.shape)
print("df_58_raw.shape", df_58_raw.shape)

df_58.shape (38926, 38)
df_58_raw.shape (38926, 59)


In [4]:
dd = pd.read_csv('/Users/festabu/Desktop/ML_galaxy_size_project/Codes/TNG300-SAM_images/SR_df_7_wspineff_run3_ellipticals_eqn_search/hall_of_fame_2023-04-06_234909.339.csv', sep='|')

In [5]:
for i in range(10):
    print(dd.loc[i,'Equation'])

0.023591585
(0.01835729 / GalpropNormSigmaBulge)
(0.012798642 / square(GalpropNormSigmaBulge))
(pow(HalopropSpin, GalpropNormSigmaBulge) / 4.8259325)
square((-0.104911506 / GalpropNormSigmaBulge) - GalpropNormMcold)
(pow(GalpropNormMstar + GalpropNormMcold, GalpropNormSigmaBulge) / 2.7574792)
square(((-0.08844264 - GalpropNormMcold) - GalpropNormMstar) / GalpropNormSigmaBulge)
((pow(GalpropNormMstar + GalpropNormMcold, GalpropNormSigmaBulge) / 3.4845552) + 0.006824277)
square((-0.103109956 - pow(GalpropNormMcold + GalpropNormMstar, GalpropNormSigmaBulge)) + GalpropNormMcold)
(((pow(GalpropNormMstar + GalpropNormMcold, GalpropNormSigmaBulge) - GalpropNormMcold) / 2.6337867) - -0.005257287)


In [None]:
# ((pow(GalpropNormMHII, GalpropNormSigmaBulge) + pow(GalpropNormMstar, GalpropNormSigmaBulge)) * 0.45374486)

sr_gr7ellip_v1_eqn4 = 0.45 * (df_7.loc[:, 'GalpropNormMHII']**df_7.loc[:, 'GalpropNormSigmaBulge'] \
                              + df_7.loc[:, 'GalpropNormMstar']**df_7.loc[:, 'GalpropNormSigmaBulge'])

r2_score_gr7ellipticals4=r2_score(df_7.loc[:, 'GalpropNormHalfRadius'], sr_gr7ellip_v1_eqn4)

fig_complete, ax = plt.subplots(figsize=(7, 5))

fig_SR = ax.scatter(df_7.loc[:, 'GalpropNormHalfRadius'], sr_gr7ellip_v1_eqn4,
            c = df_7_raw.loc[:, 'GalpropMbulge']/df_7_raw.loc[:, 'GalpropMstar'], 
            cmap='Spectral_r',
            s=10, marker='.', alpha=0.7,label= r'$\frac{M_{bulge}}{M_{star}}$', vmin=0.5, vmax=0.9)
ax.text(0.02, 0.17, '$R^{2}$ score=' + '{:.2f}'.format(r2_score_gr7ellipticals4), size=12)
ax.axis([0.0,0.2, 0.0,0.2])
ax.plot([0.0, 0.3], [0.0, 0.3], color = 'black', linewidth = 2)
ax.set_title(r'Group 7 Ellipticals 0.5<$\frac{M_{bulge}}{M_{star}}$<0.8' +'\n' + r' Eqn =0.45*[$NormMbulge^{NormSigmaBugle}$ + $NormMstar^{NormSigmaBulge}$]')
ax.set_xlabel('True Galaxy Size/Halo Size')
ax.legend(loc = 'lower right', shadow=True)
ax.set_ylabel('Predicted Galaxy Size/Halo Size by SR eqn ')
fig_complete.colorbar(fig_SR, ax=ax)
# plt.savefig('TNG300-SAM_images/v5_TNG300-SAM_wo_DISKgals_w_smallfdisk_wspineff/v5_Group7_ellipticals_SR_v1eqn4_dimensionless_MbulgeMstarcolor.jpeg', dpi=500)
plt.show()