In [77]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd


import astropy as ap
from astropy.table import QTable


from sklearn.model_selection import train_test_split, KFold, GridSearchCV

from sklearn.tree import DecisionTreeRegressor, plot_tree
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor

from sklearn.metrics import precision_score, recall_score, f1_score, matthews_corrcoef, confusion_matrix, explained_variance_score, mean_squared_error, max_error, mean_absolute_error

from sklearn.preprocessing import StandardScaler, MinMaxScaler, PowerTransformer
from sklearn.decomposition import PCA, KernelPCA
# pas besoin de feature selection parce que pas beacoup de colonnes

import read_mist_models

In [53]:
def get_iso_data_panda(file):
    iso = read_mist_models.ISO(file)

    age = []
    logTeff = []
    # logL = []
    log_g = []
    mass = []
    for iso_ind in range(len(iso.isos)):
        age.extend(iso.isos[iso_ind]['log10_isochrone_age_yr'])
        logTeff.extend(iso.isos[iso_ind]['log_Teff'])
        # logL.extend(iso.isos[iso_ind]['log_L'])
        log_g.extend(iso.isos[iso_ind]['log_g'])
        mass.extend(iso.isos[iso_ind]['star_mass'])

    iso_data_all = QTable([age, logTeff, log_g, mass],
                        names=('age', 'logTeff', 'log_g', "mass"),
                        meta={'name': "iso data"})
    # print(iso_data_all.info)
    # print(iso_data_all)
    # display(iso_data_all.to_pandas())
    return iso_data_all.to_pandas()

In [55]:
full_data = get_iso_data_panda("data/MIST_v1.2_vvcrit0.0_basic_isos/MIST_v1.2_feh_p0.00_afe_p0.0_vvcrit0.0_basic.txt")

Reading in: data/MIST_v1.2_vvcrit0.0_basic_isos/MIST_v1.2_feh_p0.00_afe_p0.0_vvcrit0.0_basic.txt


In [56]:
display(full_data)

Unnamed: 0,age,logTeff,log_g,mass
0,5.0,3.468541,3.116651,0.100000
1,5.0,3.469176,3.114042,0.101392
2,5.0,3.471116,3.106133,0.105660
3,5.0,3.473035,3.098417,0.109915
4,5.0,3.474944,3.090875,0.114180
...,...,...,...,...
103984,10.3,4.370643,7.779334,0.528715
103985,10.3,4.363336,7.782175,0.528717
103986,10.3,4.356026,7.784962,0.528720
103987,10.3,4.348711,7.787695,0.528724


In [57]:
no_massive_stars_data = full_data.where(full_data.mass < 5).dropna().reset_index(drop=True)

In [58]:
display(no_massive_stars_data)

Unnamed: 0,age,logTeff,log_g,mass
0,5.0,3.468541,3.116651,0.100000
1,5.0,3.469176,3.114042,0.101392
2,5.0,3.471116,3.106133,0.105660
3,5.0,3.473035,3.098417,0.109915
4,5.0,3.474944,3.090875,0.114180
...,...,...,...,...
82360,10.3,4.370643,7.779334,0.528715
82361,10.3,4.363336,7.782175,0.528717
82362,10.3,4.356026,7.784962,0.528720
82363,10.3,4.348711,7.787695,0.528724


In [63]:
import random

# basic random train/test split
train_data = []
test_data = []
for index, row in no_massive_stars_data.iterrows():
    if random.randint(1, 100) <= 70: # enlever une row sur X? p/r au pas de l'âge
        train_data.append([row["age"], row["mass"], row["logTeff"], row["log_g"]])
    else:
        test_data.append([row["age"], row["mass"], row["logTeff"], row["log_g"]])

train_data_df = pd.DataFrame(train_data, columns=["age", "mass", "logTeff", "log_g"])
test_data_df = pd.DataFrame(test_data, columns=["age", "mass", "logTeff", "log_g"])

display(train_data_df)
display(test_data_df)

Unnamed: 0,age,mass,logTeff,log_g
0,5.0,0.101392,3.469176,3.114042
1,5.0,0.105660,3.471116,3.106133
2,5.0,0.109915,3.473035,3.098417
3,5.0,0.118442,3.476837,3.083524
4,5.0,0.122697,3.478710,3.076377
...,...,...,...,...
57600,10.3,0.528715,4.370643,7.779334
57601,10.3,0.528717,4.363336,7.782175
57602,10.3,0.528720,4.356026,7.784962
57603,10.3,0.528724,4.348711,7.787695


Unnamed: 0,age,mass,logTeff,log_g
0,5.0,0.100000,3.468541,3.116651
1,5.0,0.114180,3.474944,3.090875
2,5.0,0.126952,3.480566,3.069404
3,5.0,0.139721,3.486031,3.049569
4,5.0,0.190028,3.505762,3.006153
...,...,...,...,...
24755,10.3,0.528691,4.537083,7.698825
24756,10.3,0.528692,4.529949,7.703140
24757,10.3,0.528696,4.465235,7.738071
24758,10.3,0.528702,4.421690,7.758184


In [65]:
def gaussian(dsts):
    kernel_width = .5
    weights = np.exp(-(dsts**2)/kernel_width)
    return weights