In [1]:
%cd ../

/home/users/dmoreno2016/VT_Model_for_LightCurves_Classification


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
import mlflow
import pandas as pd
import matplotlib.pyplot as plt
import os
#os.environ["PATH"] = "/home/users/dmoreno2016/.TinyTeX/bin/x86_64-linux:" + os.environ["PATH"]

plt.rcParams.update({
    "text.usetex": True,
    "pgf.texsystem": "pdflatex",
    "font.family": "serif",
    "font.serif": ["Palatino"],
})


import warnings
warnings.filterwarnings('ignore')

In [5]:
mlflow.set_tracking_uri("file:./results/ml-runs")

In [6]:
data_name =  'macho_multiband'
experiment_name = f"ft_classification/{data_name}/best_params"

experiment = mlflow.get_experiment_by_name(experiment_name)
if experiment:
    experiment_id = experiment.experiment_id
    print(f"Experiment ID for '{experiment_name}': {experiment_id}")
else:
    print(f"Experiment '{experiment_name}' not found.")

runs = mlflow.search_runs(experiment_ids=[experiment_id])

order_columns = [
    'run_id',
    'status',
    'metrics.epoch',
    'metrics.f1/test',
    'metrics.loss/test',
    'params.imgs_params/fig_params/linewidth',
    'params.imgs_params/fig_params/markersize',
    'params.imgs_params/input_type',
    'params.imgs_params/use_err',
    'params.training/lr',
    'params.loader/spc',
    'start_time',
    'end_time',
]

show_df = runs[order_columns]
show_df.loc[:, 'training_time'] = show_df['end_time'] - show_df['start_time']
cols = show_df.columns.tolist()
cols.insert(2, cols.pop(cols.index('training_time')))
show_df = show_df[cols].drop(['end_time', 'start_time'], axis=1)
show_df = show_df.sort_values(by=['metrics.f1/test'], ascending=False)

group_columns = [
    'params.imgs_params/fig_params/linewidth',
    'params.imgs_params/fig_params/markersize',
    'params.imgs_params/input_type',
    'params.imgs_params/use_err',
    'params.training/lr',
    'params.loader/spc',
]

# Calculating the mean for metrics within each group
multiband_stats = show_df.groupby(group_columns).agg({
    'metrics.f1/test': ['mean', 'std'],
    'metrics.loss/test': ['mean', 'std'],
    'training_time': ['mean', 'std']
}).reset_index()

# Adjusting the rename dictionary to handle tuple column names
rename_dict = {
    ('params.imgs_params/fig_params/linewidth', ''): 'Linewidth',
    ('params.imgs_params/fig_params/markersize', ''): 'Markersize',
    ('params.imgs_params/input_type', ''): 'Input Type',
    ('params.imgs_params/use_err', ''): 'Use Error',
    ('params.training/lr', ''): 'Learning Rate',
    ('metrics.f1/test', 'mean'): 'avg_f1/test',
    ('metrics.f1/test', 'std'): 'std_f1/test',
    ('metrics.loss/test', 'mean'): 'avg_loss/test',
    ('metrics.loss/test', 'std'): 'std_loss/test',
    ('training_time', 'mean'): 'avg_training_time',
    ('training_time', 'std'): 'std_training_time',
    ('params.loader/spc', ''): 'SPC'
}

# Renaming columns using the adjusted dictionary
multiband_stats.columns = [rename_dict.get(col, col) for col in multiband_stats.columns]
multiband_stats

Experiment ID for 'ft_classification/macho_multiband/best_params': 241827995979661800


Unnamed: 0,Linewidth,Markersize,Input Type,Use Error,Learning Rate,SPC,avg_f1/test,std_f1/test,avg_loss/test,std_loss/test,avg_training_time,std_training_time
0,0.5,2.0,overlay,True,5e-05,20,0.557982,0.049306,1.349229,0.158774,0 days 00:06:40.830000,0 days 00:01:20.634147673
1,1.0,2.0,2grid,False,5e-05,500,0.747423,0.011709,0.948237,0.184621,0 days 00:14:56.757000,0 days 00:02:07.099819468
2,1.0,3.0,overlay,True,5e-05,500,0.728466,0.015244,1.421596,0.14297,0 days 00:28:23.736333333,0 days 00:03:43.682032274
3,2.0,1.0,2grid,True,5e-05,20,0.516427,0.069799,1.533327,0.154623,0 days 00:05:56.434333333,0 days 00:01:57.401976143


In [14]:
multiband_stats = multiband_stats[multiband_stats["SPC"].isin(["20", "500", "all"])].sort_values(by="SPC")
overlay_multiband = multiband_stats[multiband_stats['Input Type'] == 'overlay']
avg_overlay_multiband = overlay_multiband["avg_f1/test"].values * 100
std_overlay_multiband = overlay_multiband["std_f1/test"].values * 100
print('avg overlay:', avg_overlay_multiband)
print('std overlay:', std_overlay_multiband)

grid_multiband = multiband_stats[multiband_stats['Input Type'] == '2grid']
avg_grid_multiband = grid_multiband["avg_f1/test"].values * 100
std_grid_multiband = grid_multiband["std_f1/test"].values * 100
print('avg 2grid:', avg_grid_multiband)
print('std 2grid:', std_grid_multiband)

avg overlay: [55.79822262 72.84656564]
std overlay: [4.93058069 1.52439798]
avg 2grid: [51.64273977 74.74225163]
std 2grid: [6.97994393 1.17088527]


In [None]:
print(f"Overlay: {avg_overlay_multiband[0]:.2f} ± {std_overlay_multiband[0]:.2f} | {avg_overlay_multiband[1]:.2f} ± {std_overlay_multiband[1]:.2f}")
print(f"2Grid:   {avg_grid_multiband[0]:.2f} ± {std_grid_multiband[0]:.2f} | {avg_grid_multiband[1]:.2f} ± {std_grid_multiband[1]:.2f}")

Overlay: 55.80 ± 4.93 | 72.85 ± 1.52
2Grid:   51.64 ± 6.98 | 74.74 ± 1.17


In [8]:
data_name =  'macho'
experiment_name = f"ft_classification/{data_name}/best_params"

experiment = mlflow.get_experiment_by_name(experiment_name)
if experiment:
    experiment_id = experiment.experiment_id
    print(f"Experiment ID for '{experiment_name}': {experiment_id}")
else:
    print(f"Experiment '{experiment_name}' not found.")

runs = mlflow.search_runs(experiment_ids=[experiment_id])

order_columns = [
    'run_id',
    'status',
    'metrics.epoch',
    'metrics.f1/test',
    'metrics.loss/test',
    'params.imgs_params/fig_params/linewidth',
    'params.imgs_params/fig_params/markersize',
    'params.imgs_params/input_type',
    'params.imgs_params/use_err',
    'params.training/lr',
    'params.loader/spc',
    'start_time',
    'end_time',
]

show_df = runs[order_columns]
show_df.loc[:, 'training_time'] = show_df['end_time'] - show_df['start_time']
cols = show_df.columns.tolist()
cols.insert(2, cols.pop(cols.index('training_time')))
show_df = show_df[cols].drop(['end_time', 'start_time'], axis=1)
show_df = show_df.sort_values(by=['metrics.f1/test'], ascending=False)

group_columns = [
    'params.imgs_params/fig_params/linewidth',
    'params.imgs_params/fig_params/markersize',
    'params.imgs_params/input_type',
    'params.imgs_params/use_err',
    'params.training/lr',
    'params.loader/spc',
]

# Calculating the mean for metrics within each group
singleband_stats = show_df.groupby(group_columns).agg({
    'metrics.f1/test': ['mean', 'std'],
    'metrics.loss/test': ['mean', 'std'],
    'training_time': ['mean', 'std']
}).reset_index()

# Adjusting the rename dictionary to handle tuple column names
rename_dict = {
    ('params.imgs_params/fig_params/linewidth', ''): 'Linewidth',
    ('params.imgs_params/fig_params/markersize', ''): 'Markersize',
    ('params.imgs_params/input_type', ''): 'Input Type',
    ('params.imgs_params/use_err', ''): 'Use Error',
    ('params.training/lr', ''): 'Learning Rate',
    ('metrics.f1/test', 'mean'): 'avg_f1/test',
    ('metrics.f1/test', 'std'): 'std_f1/test',
    ('metrics.loss/test', 'mean'): 'avg_loss/test',
    ('metrics.loss/test', 'std'): 'std_loss/test',
    ('training_time', 'mean'): 'avg_training_time',
    ('training_time', 'std'): 'std_training_time',
    ('params.loader/spc', ''): 'SPC'
}

# Renaming columns using the adjusted dictionary
singleband_stats.columns = [rename_dict.get(col, col) for col in singleband_stats.columns]
singleband_stats

#singleband_stats.drop([2], axis=0, inplace=True)

Experiment ID for 'ft_classification/macho/best_params': 197991169753779357


Unnamed: 0,Linewidth,Markersize,Input Type,Use Error,Learning Rate,SPC,avg_f1/test,std_f1/test,avg_loss/test,std_loss/test,avg_training_time,std_training_time
0,1.0,1.0,overlay,True,5e-06,500,0.694755,0.017918,0.966322,0.066845,0 days 00:24:59.019000,0 days 00:03:40.825794208
1,1.5,5.0,overlay,True,5e-05,20,0.489102,0.014417,2.037017,0.252695,0 days 00:04:27.996666666,0 days 00:00:31.568745878


In [18]:
singleband_stats = singleband_stats[singleband_stats["SPC"].isin(["20", "500", "all"])].sort_values(by="SPC")
overlay_singleband = singleband_stats[singleband_stats['Input Type'] == 'overlay']
avg_f1_swinv2 = overlay_singleband["avg_f1/test"].values * 100
std_f1_swinv2 = overlay_singleband["std_f1/test"].values * 100
print('avg singleband:', avg_f1_swinv2)
print('std singleband:', std_f1_swinv2)

avg singleband: [48.91020656 69.47547197]
std singleband: [1.44165279 1.79177653]


In [19]:
print(f"One-band: {avg_f1_swinv2[0]:.2f} ± {std_f1_swinv2[0]:.2f} | {avg_f1_swinv2[1]:.2f} ± {std_f1_swinv2[1]:.2f}")

One-band: 48.91 ± 1.44 | 69.48 ± 1.79
