In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np  
import seaborn as sns  
import tensorflow as tf  
from tensorflow import keras  
from keras import layers
from sklearn.ensemble import RandomForestRegressor  
from sklearn.preprocessing import StandardScaler  
from sklearn.metrics import mean_squared_error, r2_score  

In [46]:
#Load in CAT files
df_mag = pd.read_csv('NEP14_CohenMAGS.cat',header=5,sep=r'\s+')
df_sed = pd.read_csv('NEP14_CohenSED.cat',header=3,sep=r'\s+')

#Clean Columns
#MAG
mag_col_names = list(df_mag.columns.drop(['#','(deg)']))
mag_col_names[1] = 'RA(deg)'
df_mag_clean = df_mag.iloc[:,:-2].set_axis(mag_col_names,axis=1).dropna()
df_mag_clean['ID'] = df_mag_clean['ID'].astype(float)
display(df_mag_clean)

#SED
sed_col_names = [s.replace(',', '') for s in list(df_sed.columns.drop(['#','sq.,']))]
sed_col_names[6] = 'chi sq.'
df_sed_clean = df_sed.iloc[1:,:-2].set_axis(sed_col_names,axis=1).reset_index(drop=True).dropna()
df_sed_clean['ID'] = df_sed_clean['ID'].astype(float)
display(df_sed_clean)

Unnamed: 0,ID,RA(deg),Dec(deg),F090W,F115W,F150W,F200W,F277W,F356W,F410M,F444W,eF090W,eF115W,eF150W,eF200W,eF277W,eF356W,eF410M,eF444W
0,1.0,260.718976,65.711171,18.7847,18.6773,18.8318,18.9775,19.6117,19.8170,20.2598,20.3863,0.0700,0.0701,0.0700,0.0700,0.0700,0.0700,0.0700,0.0700
1,2.0,260.731025,65.710601,18.3673,18.1124,17.9475,17.8704,18.0502,18.3024,18.4486,18.5508,0.0700,0.0700,0.0700,0.0702,0.0700,0.0700,0.0700,0.0700
2,3.0,260.671726,65.711675,17.7597,17.2200,16.8639,16.6053,16.6891,17.0728,17.0368,17.2670,0.0700,0.0700,0.0700,0.0700,0.0700,0.0700,0.0702,0.1107
3,4.0,260.724918,65.709605,21.6962,21.4863,21.3460,21.4604,23.0741,23.1244,23.2145,23.3691,0.0701,0.0731,0.0727,0.0701,0.0703,0.0702,0.0716,0.0705
4,5.0,260.751843,65.708582,25.5848,25.5662,25.5087,25.3979,24.7714,24.7934,25.1194,24.9991,0.0815,0.0809,0.0787,0.0757,0.0705,0.0704,0.0721,0.0712
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37863,39117.0,260.672816,65.904928,22.0350,21.9733,21.8381,21.7456,21.8421,22.3119,22.4508,22.4028,0.0701,0.0700,0.0700,0.0700,0.0700,0.0700,0.0701,0.0700
37864,36603.0,260.666303,65.901720,28.0913,27.6933,27.2214,26.6518,26.6035,26.7834,26.6136,26.7546,0.3106,0.2174,0.1472,0.0950,0.0755,0.0761,0.0844,0.0818
37865,41620.0,260.661086,65.906960,27.4219,27.4504,27.5667,27.2882,26.8604,26.9123,26.9335,27.0008,0.1446,0.1462,0.1491,0.1110,0.0760,0.0756,0.0875,0.0858
37866,38911.0,260.702667,65.875949,27.4436,26.9181,26.2983,26.2320,26.1292,26.3350,26.4955,26.4607,0.1500,0.1068,0.0811,0.0773,0.0719,0.0724,0.0813,0.0762


Unnamed: 0,ID,RA(J2000),DEC(J2000),z_m1(EAZY),log10(mass),log10(age),chi sq.,Log10(SSFR),Log10(Tau),A_V,nfilt,F444W_AB
0,1.0,260.718976069,65.711171400,0.075000,8.024576,-2.040001,7.724554,1.827012,-2.000000,0.000000,8,20.3863
1,2.0,260.731024544,65.710600509,0.446000,10.559864,-2.220000,0.866112,2.184449,-1.428291,2.400000,8,18.5508
2,3.0,260.671726138,65.711675312,0.259000,10.944576,-0.843454,3.219007,-4.227740,-2.000000,1.800000,8,17.2670
3,4.0,260.724917893,65.709605249,0.018000,5.689755,-2.080001,141.343780,1.886938,-2.000000,0.000000,8,23.3691
4,5.0,260.751843111,65.708582257,4.715000,9.813772,-0.693453,17.567767,-0.052254,-1.142668,0.000000,8,24.9991
...,...,...,...,...,...,...,...,...,...,...,...,...
37863,39117.0,260.672816384,65.904928329,0.454000,8.765012,-1.043455,4.177360,1.043258,2.000000,0.200000,8,22.4028
37864,36603.0,260.666302552,65.901720439,0.499000,7.283634,-2.200001,6.419984,2.127079,-1.714443,4.000000,8,26.7546
37865,41620.0,260.661086119,65.906960439,4.786000,8.939743,-0.893456,2.477451,-0.045416,-1.428291,0.000000,8,27.0008
37866,38911.0,260.702667433,65.875948602,0.423000,7.312332,-2.220000,5.361995,2.150443,-1.714443,3.600000,8,26.4607


In [50]:
# Merge on "ID"
df_merged = df_mag_clean.merge(df_sed_clean, on="ID",how='right')
display((df_merged))
# Define input features and target variables
input_features = ["RA(deg)", "Dec(deg)", "F090W", "F150W", "F200W", "F277W", "F356W", "F410M", "F444W", "z_m1(EAZY)"]
target_variables = ["log10(mass)", "log10(age)", "Log10(SSFR)", "Log10(Tau)", "A_V", "chi sq."]

# Split data into train and test sets
X = df_merged[input_features]
y = df_merged[target_variables]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize input features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


Unnamed: 0,ID,RA(deg),Dec(deg),F090W,F115W,F150W,F200W,F277W,F356W,F410M,...,DEC(J2000),z_m1(EAZY),log10(mass),log10(age),chi sq.,Log10(SSFR),Log10(Tau),A_V,nfilt,F444W_AB
0,1.0,260.718976,65.711171,18.7847,18.6773,18.8318,18.9775,19.6117,19.8170,20.2598,...,65.711171400,0.075000,8.024576,-2.040001,7.724554,1.827012,-2.000000,0.000000,8,20.3863
1,2.0,260.731025,65.710601,18.3673,18.1124,17.9475,17.8704,18.0502,18.3024,18.4486,...,65.710600509,0.446000,10.559864,-2.220000,0.866112,2.184449,-1.428291,2.400000,8,18.5508
2,3.0,260.671726,65.711675,17.7597,17.2200,16.8639,16.6053,16.6891,17.0728,17.0368,...,65.711675312,0.259000,10.944576,-0.843454,3.219007,-4.227740,-2.000000,1.800000,8,17.2670
3,4.0,260.724918,65.709605,21.6962,21.4863,21.3460,21.4604,23.0741,23.1244,23.2145,...,65.709605249,0.018000,5.689755,-2.080001,141.343780,1.886938,-2.000000,0.000000,8,23.3691
4,5.0,260.751843,65.708582,25.5848,25.5662,25.5087,25.3979,24.7714,24.7934,25.1194,...,65.708582257,4.715000,9.813772,-0.693453,17.567767,-0.052254,-1.142668,0.000000,8,24.9991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37719,39117.0,260.672816,65.904928,22.0350,21.9733,21.8381,21.7456,21.8421,22.3119,22.4508,...,65.904928329,0.454000,8.765012,-1.043455,4.177360,1.043258,2.000000,0.200000,8,22.4028
37720,36603.0,260.666303,65.901720,28.0913,27.6933,27.2214,26.6518,26.6035,26.7834,26.6136,...,65.901720439,0.499000,7.283634,-2.200001,6.419984,2.127079,-1.714443,4.000000,8,26.7546
37721,41620.0,260.661086,65.906960,27.4219,27.4504,27.5667,27.2882,26.8604,26.9123,26.9335,...,65.906960439,4.786000,8.939743,-0.893456,2.477451,-0.045416,-1.428291,0.000000,8,27.0008
37722,38911.0,260.702667,65.875949,27.4436,26.9181,26.2983,26.2320,26.1292,26.3350,26.4955,...,65.875948602,0.423000,7.312332,-2.220000,5.361995,2.150443,-1.714443,3.600000,8,26.4607
