# MLflow results presentation

Present MLflow results using pandas.

If tensorboard is not showing inline, use `!` instead of `%` and view tensorboard in a new browser tab.

In [6]:
import pandas as pd
from uncertainties import ufloat
import mlflow
from mlflow.tracking import MlflowClient
mlflow.set_tracking_uri('file:///home/zeyusun/work/flare-prediction-smarp/mlruns')
client = MlflowClient()

from mlflow_helper import *

pd.set_option('display.max_colwidth', None)

%load_ext tensorboard
%load_ext autoreload
%autoreload 2

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Display boarderlines of pandas tables

In [7]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

## experiment: beta
* no random seed
* multiple databases

In [8]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'metrics.auc': 'auc',
    'metrics.tss_opt': 'tss',
}
rows = {
    'estimator': {
        'HistGradientBoostingClassifier': 'HGB',
        'RandomForestClassifier': 'RF',
        'SGDClassifier': 'LG',
    }
}
runs_raw = retrieve('experiment', 'beta')
runs = select(runs_raw, columns, rows)
runs

Select iloc 0 from 
                          start_time tags.mlflow.runName
90  2021-05-22 18:13:26.644000+00:00                beta
166 2021-05-21 01:50:33.010000+00:00                beta


Unnamed: 0,database,dataset,estimator,auc,tss
36,M_Q_6hr,combined,HGB,0.999919,0.998669
37,M_Q_6hr,combined,RF,1.0,1.0
38,M_Q_6hr,combined,LG,0.99204,0.921998
39,M_Q_6hr,sharp,HGB,0.996281,0.943164
40,M_Q_6hr,sharp,RF,1.0,1.0
41,M_Q_6hr,sharp,LG,0.992198,0.921169
42,M_Q_6hr,smarp,HGB,0.996993,0.950146
43,M_Q_6hr,smarp,RF,1.0,1.0
44,M_Q_6hr,smarp,LG,0.992351,0.920737
45,M_Q_12hr_balanced,combined,HGB,1.0,1.0


In [4]:
by = ['database', 'dataset', 'estimator']
df_style = style(runs, by=by)
df_style

Unnamed: 0_level_0,database,M_Q_12hr,M_Q_12hr,M_Q_12hr,M_Q_12hr_balanced,M_Q_12hr_balanced,M_Q_12hr_balanced,M_Q_24hr,M_Q_24hr,M_Q_24hr,M_Q_24hr_balanced,M_Q_24hr_balanced,M_Q_24hr_balanced,M_Q_6hr,M_Q_6hr,M_Q_6hr,M_Q_6hr_balanced,M_Q_6hr_balanced,M_Q_6hr_balanced
Unnamed: 0_level_1,dataset,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
auc,HGB,1.0,0.994,0.996,1.0,0.997,0.998,1.0,0.999,0.996,1.0,0.994,0.995,1.0,0.996,0.997,1.0,0.994,0.995
auc,LG,0.992,0.992,0.991,0.994,0.992,0.993,0.991,0.99,0.991,0.991,0.991,0.991,0.992,0.992,0.992,0.991,0.99,0.991
auc,RF,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
tss,HGB,1.0,0.928,0.942,1.0,0.94,0.959,1.0,0.966,0.94,1.0,0.926,0.938,0.999,0.943,0.95,1.0,0.925,0.934
tss,LG,0.91,0.909,0.911,0.915,0.918,0.912,0.912,0.912,0.912,0.919,0.915,0.917,0.922,0.921,0.921,0.917,0.899,0.913
tss,RF,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [5]:
df = organize(runs, by=by, std=True)
df_latex = typeset(df)
#print(df_latex)
df

Unnamed: 0_level_0,database,M_Q_12hr,M_Q_12hr,M_Q_12hr,M_Q_12hr_balanced,M_Q_12hr_balanced,M_Q_12hr_balanced,M_Q_24hr,M_Q_24hr,M_Q_24hr,M_Q_24hr_balanced,M_Q_24hr_balanced,M_Q_24hr_balanced,M_Q_6hr,M_Q_6hr,M_Q_6hr,M_Q_6hr_balanced,M_Q_6hr_balanced,M_Q_6hr_balanced
Unnamed: 0_level_1,dataset,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
auc,HGB,1.000+/-nan,0.994+/-nan,0.996+/-nan,1.000+/-nan,0.997+/-nan,0.998+/-nan,1.000+/-nan,0.999+/-nan,0.996+/-nan,1.000+/-nan,0.994+/-nan,0.995+/-nan,1.000+/-nan,0.996+/-nan,0.997+/-nan,1.000+/-nan,0.994+/-nan,0.995+/-nan
auc,LG,0.992+/-nan,0.992+/-nan,0.991+/-nan,0.994+/-nan,0.992+/-nan,0.993+/-nan,0.991+/-nan,0.990+/-nan,0.991+/-nan,0.991+/-nan,0.991+/-nan,0.991+/-nan,0.992+/-nan,0.992+/-nan,0.992+/-nan,0.991+/-nan,0.990+/-nan,0.991+/-nan
auc,RF,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan
tss,HGB,1.000+/-nan,0.928+/-nan,0.942+/-nan,1.000+/-nan,0.940+/-nan,0.959+/-nan,1.000+/-nan,0.966+/-nan,0.940+/-nan,1.000+/-nan,0.926+/-nan,0.938+/-nan,0.999+/-nan,0.943+/-nan,0.950+/-nan,1.000+/-nan,0.925+/-nan,0.934+/-nan
tss,LG,0.910+/-nan,0.909+/-nan,0.911+/-nan,0.915+/-nan,0.918+/-nan,0.912+/-nan,0.912+/-nan,0.912+/-nan,0.912+/-nan,0.919+/-nan,0.915+/-nan,0.917+/-nan,0.922+/-nan,0.921+/-nan,0.921+/-nan,0.917+/-nan,0.899+/-nan,0.913+/-nan
tss,RF,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan


## leaderboard1: sklearn_dataset

In [6]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'metrics.auc': 'auc',
    'metrics.tss_opt': 'tss',
}
rows = {
    'estimator': {
        'HistGradientBoostingClassifier': 'HGB',
        'RandomForestClassifier': 'RF',
        'SGDClassifier': 'LG',
    }
}
runs_raw = retrieve('leaderboard1', 'sklearn_dataset')
runs = select(runs_raw, columns, rows)
runs

Unnamed: 0,database,dataset,estimator,auc,tss
77,M_Q_24hr,fused_sharp,HGB,0.986385,0.892283
78,M_Q_24hr,fused_sharp,RF,0.974761,0.863344
79,M_Q_24hr,fused_sharp,LG,0.983602,0.895498
80,M_Q_24hr,fused_smarp,HGB,0.94781,0.764423
82,M_Q_24hr,fused_smarp,RF,0.938948,0.725962
83,M_Q_24hr,fused_smarp,LG,0.956173,0.810897
84,M_Q_24hr,sharp,HGB,0.991318,0.90836
85,M_Q_24hr,sharp,RF,0.983958,0.890675
86,M_Q_24hr,sharp,LG,0.987159,0.900322
87,M_Q_24hr,smarp,HGB,0.953228,0.801282


In [7]:
df = organize(runs, std=True)
print(df.to_latex())
df

\begin{tabular}{llllll}
\toprule
    & dataset &    fused\_sharp &    fused\_smarp &          sharp &          smarp \\
{} & estimator &                &                &                &                \\
\midrule
auc & HGB &  0.990+/-0.003 &  0.963+/-0.010 &  0.992+/-0.002 &  0.970+/-0.009 \\
    & LG &  0.988+/-0.003 &  0.970+/-0.008 &  0.991+/-0.003 &  0.971+/-0.009 \\
    & RF &  0.983+/-0.006 &  0.957+/-0.011 &  0.988+/-0.004 &  0.958+/-0.012 \\
tss & HGB &  0.907+/-0.019 &  0.812+/-0.034 &  0.919+/-0.016 &  0.839+/-0.029 \\
    & LG &  0.906+/-0.007 &  0.841+/-0.026 &  0.913+/-0.018 &  0.843+/-0.029 \\
    & RF &  0.884+/-0.030 &  0.786+/-0.038 &  0.901+/-0.023 &  0.797+/-0.040 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
auc,HGB,0.990+/-0.003,0.963+/-0.010,0.992+/-0.002,0.970+/-0.009
auc,LG,0.988+/-0.003,0.970+/-0.008,0.991+/-0.003,0.971+/-0.009
auc,RF,0.983+/-0.006,0.957+/-0.011,0.988+/-0.004,0.958+/-0.012
tss,HGB,0.907+/-0.019,0.812+/-0.034,0.919+/-0.016,0.839+/-0.029
tss,LG,0.906+/-0.007,0.841+/-0.026,0.913+/-0.018,0.843+/-0.029
tss,RF,0.884+/-0.030,0.786+/-0.038,0.901+/-0.023,0.797+/-0.040


In [8]:
by = ['database', 'dataset', 'estimator']
style(runs, by=by)

Unnamed: 0_level_0,database,M_Q_24hr,M_Q_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
auc,HGB,0.99,0.963,0.992,0.97
auc,LG,0.988,0.97,0.991,0.971
auc,RF,0.983,0.957,0.988,0.958
tss,HGB,0.907,0.812,0.919,0.839
tss,LG,0.906,0.841,0.913,0.843
tss,RF,0.884,0.786,0.901,0.797


## leaderboard1: arnet_dataset

In [9]:
columns = {
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
}
rows = {}
runs_raw = retrieve('leaderboard1', 'arnet_dataset')
runs = select(runs_raw, columns, rows)
runs

Select iloc 0 from 
                         start_time tags.mlflow.runName
96 2021-05-26 05:14:03.491000+00:00       arnet_dataset


Unnamed: 0,dataset,estimator,AUC,TSS
0,fused_smarp,CNN,0.942973,0.753205
1,fused_smarp,CNN,0.961599,0.816193
2,fused_smarp,CNN,0.945356,0.758294
3,fused_smarp,CNN,0.949142,0.773788
4,fused_smarp,CNN,0.929361,0.684978
...,...,...,...,...
75,sharp,MLP,0.976567,0.855305
76,sharp,MLP,0.976531,0.849802
81,sharp,MLP,0.974626,0.815385
91,sharp,MLP,0.986490,0.888298


In [10]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.959,0.942,0.903,0.928
AUC,CNN,0.961,0.946,0.954,0.915
AUC,LSTM,0.991,0.97,0.989,0.967
AUC,MLP,0.989,0.969,0.977,0.962
TSS,C3D,0.81,0.745,0.702,0.737
TSS,CNN,0.801,0.757,0.789,0.701
TSS,LSTM,0.912,0.837,0.903,0.831
TSS,MLP,0.906,0.833,0.855,0.807


In [11]:
df = organize(runs, std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & dataset &    fused\_sharp &    fused\_smarp &          sharp &          smarp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & C3D &  0.959+/-0.020 &  0.941+/-0.013 &  0.903+/-0.051 &  0.928+/-0.031 \\
    & CNN &  0.961+/-0.024 &  0.946+/-0.012 &  0.954+/-0.017 &  0.915+/-0.035 \\
    & LSTM &  0.991+/-0.004 &  0.970+/-0.008 &  0.989+/-0.005 &  0.967+/-0.010 \\
    & MLP &  0.989+/-0.004 &  0.969+/-0.007 &  0.977+/-0.006 &  0.962+/-0.006 \\
TSS & C3D &  0.810+/-0.058 &  0.745+/-0.046 &  0.702+/-0.111 &  0.737+/-0.082 \\
    & CNN &  0.801+/-0.062 &  0.757+/-0.047 &  0.789+/-0.058 &  0.701+/-0.076 \\
    & LSTM &  0.912+/-0.024 &  0.837+/-0.028 &  0.903+/-0.031 &  0.831+/-0.033 \\
    & MLP &  0.906+/-0.015 &  0.833+/-0.026 &  0.855+/-0.027 &  0.807+/-0.028 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.959+/-0.020,0.941+/-0.013,0.903+/-0.051,0.928+/-0.031
AUC,CNN,0.961+/-0.024,0.946+/-0.012,0.954+/-0.017,0.915+/-0.035
AUC,LSTM,0.991+/-0.004,0.970+/-0.008,0.989+/-0.005,0.967+/-0.010
AUC,MLP,0.989+/-0.004,0.969+/-0.007,0.977+/-0.006,0.962+/-0.006
TSS,C3D,0.810+/-0.058,0.745+/-0.046,0.702+/-0.111,0.737+/-0.082
TSS,CNN,0.801+/-0.062,0.757+/-0.047,0.789+/-0.058,0.701+/-0.076
TSS,LSTM,0.912+/-0.024,0.837+/-0.028,0.903+/-0.031,0.831+/-0.033
TSS,MLP,0.906+/-0.015,0.833+/-0.026,0.855+/-0.027,0.807+/-0.028


## arnet: fusesize_new_more_dataset

In [12]:
columns = {
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
}
rows = {}
runs_raw = retrieve('arnet', 'fusesize_new_more_dataset')
runs = select(runs_raw, columns, rows)
runs

Unnamed: 0,dataset,estimator,AUC,TSS
87,fused_smarp,FusionC3D,0.935290,0.737179
88,fused_smarp,FusionC3D,0.960964,0.823851
89,fused_smarp,FusionC3D,0.963064,0.798578
90,fused_smarp,FusionC3D,0.951671,0.791741
91,fused_smarp,FusionC3D,0.931807,0.756726
...,...,...,...,...
162,sharp,CNN,0.938087,0.716887
163,sharp,CNN,0.886316,0.654784
164,sharp,CNN,0.932091,0.705107
165,sharp,CNN,0.863181,0.664815


In [13]:
df = organize(runs)
df_style = style(runs)
df_style

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.959,0.948,0.916,0.947
AUC,CNN,0.961,0.961,0.919,0.94
AUC,FusionC3D,0.962,0.949,0.912,0.931
AUC,FusionCNN,0.961,0.96,0.934,0.94
TSS,C3D,0.81,0.782,0.748,0.762
TSS,CNN,0.802,0.795,0.713,0.747
TSS,FusionC3D,0.806,0.782,0.721,0.745
TSS,FusionCNN,0.805,0.804,0.738,0.743


In [14]:
dirs = tensorboard(runs_raw)
%tensorboard --logdir_spec {dirs}

Reusing TensorBoard on port 6007 (pid 15467), started 5 days, 1:54:44 ago. (Use '!kill 15467' to kill it.)

In [15]:
df = organize(runs, std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & dataset &    fused\_sharp &    fused\_smarp &          sharp &          smarp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & C3D &  0.959+/-0.030 &  0.948+/-0.023 &  0.916+/-0.043 &  0.947+/-0.011 \\
    & CNN &  0.961+/-0.027 &  0.961+/-0.006 &  0.919+/-0.045 &  0.940+/-0.015 \\
    & FusionC3D &  0.961+/-0.020 &  0.949+/-0.014 &  0.912+/-0.050 &  0.931+/-0.032 \\
    & FusionCNN &  0.961+/-0.027 &  0.960+/-0.010 &  0.934+/-0.023 &  0.940+/-0.007 \\
TSS & C3D &  0.810+/-0.078 &  0.782+/-0.068 &  0.748+/-0.081 &  0.763+/-0.016 \\
    & CNN &  0.802+/-0.076 &  0.795+/-0.028 &  0.713+/-0.066 &  0.747+/-0.057 \\
    & FusionC3D &  0.806+/-0.047 &  0.782+/-0.035 &  0.721+/-0.115 &  0.745+/-0.072 \\
    & FusionCNN &  0.805+/-0.087 &  0.804+/-0.034 &  0.738+/-0.066 &  0.743+/-0.028 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.959+/-0.030,0.948+/-0.023,0.916+/-0.043,0.947+/-0.011
AUC,CNN,0.961+/-0.027,0.961+/-0.006,0.919+/-0.045,0.940+/-0.015
AUC,FusionC3D,0.961+/-0.020,0.949+/-0.014,0.912+/-0.050,0.931+/-0.032
AUC,FusionCNN,0.961+/-0.027,0.960+/-0.010,0.934+/-0.023,0.940+/-0.007
TSS,C3D,0.810+/-0.078,0.782+/-0.068,0.748+/-0.081,0.763+/-0.016
TSS,CNN,0.802+/-0.076,0.795+/-0.028,0.713+/-0.066,0.747+/-0.057
TSS,FusionC3D,0.806+/-0.047,0.782+/-0.035,0.721+/-0.115,0.745+/-0.072
TSS,FusionCNN,0.805+/-0.087,0.804+/-0.034,0.738+/-0.066,0.743+/-0.028


In [16]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.959,0.948,0.916,0.947
AUC,CNN,0.961,0.961,0.919,0.94
AUC,FusionC3D,0.962,0.949,0.912,0.931
AUC,FusionCNN,0.961,0.96,0.934,0.94
TSS,C3D,0.81,0.782,0.748,0.762
TSS,CNN,0.802,0.795,0.713,0.747
TSS,FusionC3D,0.806,0.782,0.721,0.745
TSS,FusionCNN,0.805,0.804,0.738,0.743


## arnet: fusesize_QS

In [17]:
columns = get_columns('arnet')
rows = {}
runs_raw = retrieve('arnet', 'fusesize_QS')
runs = select(runs_raw, columns, rows)
runs

Unnamed: 0,dataset,estimator,AUC,TSS
6,fused_smarp,FusionC3D,0.870811,0.655346
7,fused_smarp,FusionC3D,0.893403,0.650000
8,fused_smarp,FusionC3D,0.903260,0.651032
9,fused_smarp,FusionC3D,0.917868,0.695096
10,fused_smarp,FusionC3D,0.856200,0.585851
...,...,...,...,...
81,sharp,CNN,0.855643,0.565943
82,sharp,CNN,0.906057,0.701195
83,sharp,CNN,0.861959,0.589474
84,sharp,CNN,0.631436,0.234286


In [18]:
df = organize(runs, std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & dataset &    fused\_sharp &    fused\_smarp &          sharp &          smarp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & C3D &  0.913+/-0.034 &  0.905+/-0.019 &  0.814+/-0.121 &  0.871+/-0.026 \\
    & CNN &  0.902+/-0.044 &  0.889+/-0.015 &  0.832+/-0.115 &  0.876+/-0.009 \\
    & FusionC3D &  0.903+/-0.036 &  0.888+/-0.025 &  0.853+/-0.042 &  0.851+/-0.057 \\
    & FusionCNN &  0.905+/-0.052 &  0.883+/-0.013 &  0.850+/-0.073 &  0.853+/-0.021 \\
TSS & C3D &  0.697+/-0.042 &  0.670+/-0.026 &  0.521+/-0.160 &  0.597+/-0.056 \\
    & CNN &  0.656+/-0.071 &  0.641+/-0.049 &  0.546+/-0.182 &  0.604+/-0.036 \\
    & FusionC3D &  0.671+/-0.072 &  0.647+/-0.039 &  0.604+/-0.022 &  0.569+/-0.092 \\
    & FusionCNN &  0.685+/-0.102 &  0.632+/-0.015 &  0.560+/-0.155 &  0.564+/-0.043 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.913+/-0.034,0.905+/-0.019,0.814+/-0.121,0.871+/-0.026
AUC,CNN,0.902+/-0.044,0.889+/-0.015,0.832+/-0.115,0.876+/-0.009
AUC,FusionC3D,0.903+/-0.036,0.888+/-0.025,0.853+/-0.042,0.851+/-0.057
AUC,FusionCNN,0.905+/-0.052,0.883+/-0.013,0.850+/-0.073,0.853+/-0.021
TSS,C3D,0.697+/-0.042,0.670+/-0.026,0.521+/-0.160,0.597+/-0.056
TSS,CNN,0.656+/-0.071,0.641+/-0.049,0.546+/-0.182,0.604+/-0.036
TSS,FusionC3D,0.671+/-0.072,0.647+/-0.039,0.604+/-0.022,0.569+/-0.092
TSS,FusionCNN,0.685+/-0.102,0.632+/-0.015,0.560+/-0.155,0.564+/-0.043


In [19]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.913,0.905,0.814,0.871
AUC,CNN,0.902,0.889,0.832,0.876
AUC,FusionC3D,0.903,0.888,0.853,0.851
AUC,FusionCNN,0.905,0.883,0.85,0.853
TSS,C3D,0.697,0.67,0.521,0.597
TSS,CNN,0.656,0.641,0.546,0.604
TSS,FusionC3D,0.671,0.647,0.604,0.569
TSS,FusionCNN,0.685,0.632,0.56,0.565


## leaderboard2: sklearn

In [20]:
columns = get_columns('sklearn')
rows = {
    'estimator': {
        'HistGradientBoostingClassifier': 'HGB',
        'RandomForestClassifier': 'RF',
        'SGDClassifier': 'LG',
    }
}
runs_raw = retrieve('leaderboard2', 'sklearn')
runs = select(runs_raw, columns, rows)
runs

Select the first from 
                          start_time tags.mlflow.runName  \
281 2021-06-04 15:43:34.477000+00:00             sklearn   
465 2021-06-04 00:56:40.893000+00:00             sklearn   

                tags.mlflow.source.git.commit  
281  084b4a84b04953ad4e004885cd4fed94986f5979  
465  9ceec256686c5d7c0580736a3ae6a21f7f45d35c  


Unnamed: 0,dataset,estimator,AUC,TSS
161,fused_sharp,HGB,0.947099,0.779633
162,fused_sharp,RF,0.928796,0.741235
163,fused_sharp,LG,0.958088,0.772955
164,fused_sharp,HGB,0.958913,0.804781
165,fused_sharp,RF,0.949705,0.776892
...,...,...,...,...
276,smarp,RF,0.957159,0.787253
277,smarp,LG,0.976000,0.839318
278,smarp,HGB,0.973456,0.826233
279,smarp,RF,0.961006,0.799327


In [21]:
df = organize(runs, std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & dataset &    fused\_sharp &    fused\_smarp &          sharp &          smarp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & HGB &  0.970+/-0.023 &  0.946+/-0.025 &  0.972+/-0.023 &  0.951+/-0.023 \\
    & LG &  0.972+/-0.023 &  0.943+/-0.023 &  0.973+/-0.023 &  0.950+/-0.024 \\
    & RF &  0.960+/-0.028 &  0.932+/-0.031 &  0.964+/-0.027 &  0.933+/-0.031 \\
TSS & HGB &  0.852+/-0.079 &  0.764+/-0.071 &  0.854+/-0.077 &  0.781+/-0.070 \\
    & LG &  0.848+/-0.073 &  0.764+/-0.061 &  0.856+/-0.074 &  0.782+/-0.073 \\
    & RF &  0.828+/-0.077 &  0.725+/-0.087 &  0.838+/-0.074 &  0.732+/-0.084 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,HGB,0.970+/-0.023,0.946+/-0.025,0.972+/-0.023,0.951+/-0.023
AUC,LG,0.972+/-0.023,0.943+/-0.023,0.973+/-0.023,0.950+/-0.024
AUC,RF,0.960+/-0.028,0.932+/-0.031,0.964+/-0.027,0.933+/-0.031
TSS,HGB,0.852+/-0.079,0.764+/-0.071,0.854+/-0.077,0.781+/-0.070
TSS,LG,0.848+/-0.073,0.764+/-0.061,0.856+/-0.074,0.782+/-0.073
TSS,RF,0.828+/-0.077,0.725+/-0.087,0.838+/-0.074,0.732+/-0.084


In [22]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,HGB,0.97,0.946,0.972,0.951
AUC,LG,0.972,0.943,0.973,0.95
AUC,RF,0.96,0.932,0.964,0.933
TSS,HGB,0.852,0.764,0.854,0.781
TSS,LG,0.848,0.764,0.856,0.781
TSS,RF,0.828,0.725,0.838,0.732


## leaderboad2: arnet

In [3]:
columns = get_columns('arnet')
columns.update({
    'metrics.test/bss': 'BSS',
})
rows = {}
runs_raw = retrieve('leaderboard2', 'arnet')
runs = select(runs_raw, columns, rows)
runs

Select iloc 0 from 
                          start_time tags.mlflow.runName  \
181 2021-06-06 18:45:11.099000+00:00               arnet   
488 2021-06-04 00:56:12.203000+00:00               arnet   

                tags.mlflow.source.git.commit  
181  9ceec256686c5d7c0580736a3ae6a21f7f45d35c  
488  9ceec256686c5d7c0580736a3ae6a21f7f45d35c  


Unnamed: 0,dataset,estimator,AUC,TSS,BSS
21,fused_smarp,C3D,0.894203,0.611321,0.130059
22,fused_smarp,C3D,0.879053,0.654167,0.069913
23,fused_smarp,C3D,0.915493,0.666979,0.194797
24,fused_smarp,C3D,0.919003,0.691898,-0.087610
25,fused_smarp,C3D,0.879287,0.611288,-0.126926
...,...,...,...,...,...
176,sharp,MLP,0.986885,0.912252,0.795789
177,sharp,MLP,0.982534,0.891182,0.748400
178,sharp,MLP,0.986856,0.920923,0.827172
179,sharp,MLP,0.995346,0.938889,0.882912


In [4]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.933,0.925,0.875,0.906
AUC,CNN,0.931,0.93,0.882,0.901
AUC,LSTM,0.971,0.947,0.967,0.943
AUC,MLP,0.971,0.947,0.965,0.944
TSS,C3D,0.741,0.713,0.648,0.678
TSS,CNN,0.722,0.725,0.652,0.663
TSS,LSTM,0.853,0.778,0.834,0.773
TSS,MLP,0.85,0.772,0.836,0.765
BSS,C3D,0.283,0.244,-28.202,-1.731
BSS,CNN,0.189,0.246,-0.142,-0.0


In [4]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.933,0.925,0.875,0.906
AUC,CNN,0.931,0.93,0.882,0.901
AUC,LSTM,0.971,0.947,0.967,0.943
AUC,MLP,0.971,0.947,0.965,0.944
TSS,C3D,0.741,0.713,0.648,0.678
TSS,CNN,0.722,0.725,0.652,0.663
TSS,LSTM,0.853,0.778,0.834,0.773
TSS,MLP,0.85,0.772,0.836,0.765
BSS,C3D,0.283,0.244,-28.202,-1.731
BSS,CNN,0.189,0.246,-0.142,-0.0


In [5]:
df = organize(runs, std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & dataset &    fused\_sharp &    fused\_smarp &             sharp &           smarp \\
{} & estimator &                &                &                   &                 \\
\midrule
AUC & C3D &  0.933+/-0.041 &  0.925+/-0.033 &     0.875+/-0.117 &   0.907+/-0.041 \\
    & CNN &  0.931+/-0.041 &  0.930+/-0.036 &     0.882+/-0.069 &   0.901+/-0.046 \\
    & LSTM &  0.971+/-0.027 &  0.947+/-0.025 &     0.966+/-0.027 &   0.943+/-0.025 \\
    & MLP &  0.970+/-0.026 &  0.947+/-0.026 &     0.964+/-0.031 &   0.944+/-0.027 \\
TSS & C3D &  0.741+/-0.103 &  0.713+/-0.078 &     0.648+/-0.180 &   0.678+/-0.088 \\
    & CNN &  0.722+/-0.089 &  0.725+/-0.093 &     0.652+/-0.144 &   0.663+/-0.094 \\
    & LSTM &  0.853+/-0.082 &  0.778+/-0.074 &     0.834+/-0.089 &   0.773+/-0.073 \\
    & MLP &  0.850+/-0.079 &  0.772+/-0.080 &     0.836+/-0.087 &   0.765+/-0.077 \\
BSS & C3D &  0.283+/-0.335 &  0.244+/-0.242 &  -28.202+/-45.646 &  -1.731+/-5.497 \\
    & CNN 

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.933+/-0.041,0.925+/-0.033,0.875+/-0.117,0.907+/-0.041
AUC,CNN,0.931+/-0.041,0.930+/-0.036,0.882+/-0.069,0.901+/-0.046
AUC,LSTM,0.971+/-0.027,0.947+/-0.025,0.966+/-0.027,0.943+/-0.025
AUC,MLP,0.970+/-0.026,0.947+/-0.026,0.964+/-0.031,0.944+/-0.027
TSS,C3D,0.741+/-0.103,0.713+/-0.078,0.648+/-0.180,0.678+/-0.088
TSS,CNN,0.722+/-0.089,0.725+/-0.093,0.652+/-0.144,0.663+/-0.094
TSS,LSTM,0.853+/-0.082,0.778+/-0.074,0.834+/-0.089,0.773+/-0.073
TSS,MLP,0.850+/-0.079,0.772+/-0.080,0.836+/-0.087,0.765+/-0.077
BSS,C3D,0.283+/-0.335,0.244+/-0.242,-28.202+/-45.646,-1.731+/-5.497
BSS,CNN,0.189+/-0.449,0.246+/-0.249,-0.142+/-0.543,-0.000+/-0.495


In [26]:
dirs = tensorboard(runs_raw)
%tensorboard --logdir_spec {dirs}

Reusing TensorBoard on port 6008 (pid 15489), started 5 days, 1:54:34 ago. (Use '!kill 15489' to kill it.)

## CNN: tune_CNN

In [27]:
columns

{'tags.dataset_name': 'dataset',
 'tags.estimator_name': 'estimator',
 'metrics.test/auc': 'AUC',
 'metrics.test/tss': 'TSS'}

In [6]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'params.DATA.SHRINKAGE': 'shrinkage',
    'params.DATA.THRESH': 'thresh',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
}
rows = {}
runs_raw = retrieve('CNN', 'tune_CNN')
runs = select(runs_raw, columns, rows)
runs

Select iloc 0 from 
                          start_time tags.mlflow.runName  \
133 2021-06-07 08:39:39.611000+00:00            tune_CNN   

                tags.mlflow.source.git.commit  
133  181f529d41a5631e2cd26e6138d9a0a3baab242c  


Unnamed: 0,database,dataset,estimator,shrinkage,thresh,AUC,TSS
73,M_QS_24hr,fused_sharp,CNN,1/2,150.0,0.901782,0.661842
74,M_QS_24hr,fused_sharp,CNN,1/2,150.0,0.868087,0.609524
75,M_QS_24hr,fused_sharp,CNN,1/2,150.0,0.922934,0.70303
76,M_QS_24hr,fused_sharp,CNN,log,150.0,0.92925,0.727632
77,M_QS_24hr,fused_sharp,CNN,log,150.0,0.849244,0.561905
78,M_QS_24hr,fused_sharp,CNN,log,150.0,0.90543,0.666667
79,M_QS_24hr,fused_sharp,CNN,1/2,50.0,0.933029,0.705263
80,M_QS_24hr,fused_sharp,CNN,1/2,50.0,0.862199,0.571429
81,M_QS_24hr,fused_sharp,CNN,1/2,50.0,0.933362,0.715152
82,M_QS_24hr,fused_sharp,CNN,log,50.0,0.916192,0.668421


In [7]:
from uncertainties import ufloat
_runs = runs.copy()
_runs['shrinkage'] = pd.Categorical(_runs['shrinkage'], ['None', '1/2', 'log'])
def _organize(runs, by, std):
    runs = (runs
            #.sort_values(by=by, key=lambda s: s if s.name == 'shrinkage' else s)
            .groupby(by)
            .agg(lambda s: ufloat(s.mean(), s.std()))
            .dropna()
            .unstack([-2, -1])
            .T
    )
    return runs
df = _organize(_runs, by=['database', 'dataset', 'shrinkage', 'thresh'], std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{lllllll}
\toprule
    &     & database & \multicolumn{2}{c}{M\_QS\_24hr} & \multicolumn{2}{c}{M\_Q\_24hr} \\
    &     & dataset &    fused\_sharp &        sharp &      fused\_sharp &          sharp \\
{} & shrinkage & thresh &                &              &                  &                \\
\midrule
AUC & None & None &    0.90+/-0.06 &  0.83+/-0.08 &    0.978+/-0.007 &  0.932+/-0.010 \\
    & 1/2 & 150 &  0.898+/-0.028 &  0.81+/-0.08 &    0.973+/-0.006 &  0.961+/-0.020 \\
    &     & 50 &    0.91+/-0.04 &  0.84+/-0.07 &  0.9767+/-0.0018 &  0.965+/-0.018 \\
    & log & 150 &    0.89+/-0.04 &  0.80+/-0.06 &    0.974+/-0.004 &  0.968+/-0.008 \\
    &     & 50 &    0.89+/-0.04 &  0.86+/-0.08 &  0.9785+/-0.0026 &  0.967+/-0.027 \\
TSS & None & None &    0.67+/-0.11 &  0.53+/-0.15 &      0.86+/-0.04 &    0.75+/-0.04 \\
    & 1/2 & 150 &    0.66+/-0.05 &  0.49+/-0.14 &    0.823+/-0.014 &    0.81+/-0.07 \\
    &     & 50 &    0.66+/-0.08 &  0.54+/-0.13 &    0.830+/-0.014 &

Unnamed: 0_level_0,Unnamed: 1_level_0,database,M_QS_24hr,M_QS_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,Unnamed: 1_level_1,dataset,fused_sharp,sharp,fused_sharp,sharp
Unnamed: 0_level_2,shrinkage,thresh,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
AUC,,,0.90+/-0.06,0.83+/-0.08,0.978+/-0.007,0.932+/-0.010
AUC,1/2,150.0,0.898+/-0.028,0.81+/-0.08,0.973+/-0.006,0.961+/-0.020
AUC,1/2,50.0,0.91+/-0.04,0.84+/-0.07,0.9767+/-0.0018,0.965+/-0.018
AUC,log,150.0,0.89+/-0.04,0.80+/-0.06,0.974+/-0.004,0.968+/-0.008
AUC,log,50.0,0.89+/-0.04,0.86+/-0.08,0.9785+/-0.0026,0.967+/-0.027
TSS,,,0.67+/-0.11,0.53+/-0.15,0.86+/-0.04,0.75+/-0.04
TSS,1/2,150.0,0.66+/-0.05,0.49+/-0.14,0.823+/-0.014,0.81+/-0.07
TSS,1/2,50.0,0.66+/-0.08,0.54+/-0.13,0.830+/-0.014,0.827+/-0.035
TSS,log,150.0,0.65+/-0.08,0.47+/-0.12,0.820+/-0.029,0.819+/-0.015
TSS,log,50.0,0.64+/-0.06,0.59+/-0.12,0.843+/-0.022,0.835+/-0.022


In [8]:
_runs = runs.copy()
_runs['shrinkage_thresh'] = _runs['shrinkage'] + '_' + _runs['thresh']
style(_runs, by=['database', 'dataset', 'shrinkage_thresh'])

Unnamed: 0_level_0,database,M_QS_24hr,M_QS_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp,sharp,fused_sharp,sharp
Unnamed: 0_level_2,shrinkage_thresh,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUC,1/2_150,0.898,0.808,0.973,0.961
AUC,1/2_50,0.909,0.841,0.977,0.965
AUC,None_None,0.903,0.826,0.978,0.932
AUC,log_150,0.895,0.798,0.974,0.968
AUC,log_50,0.895,0.856,0.979,0.967
TSS,1/2_150,0.658,0.494,0.823,0.808
TSS,1/2_50,0.664,0.538,0.83,0.827
TSS,None_None,0.669,0.525,0.859,0.746
TSS,log_150,0.652,0.467,0.82,0.819
TSS,log_50,0.639,0.593,0.843,0.835


In [9]:
mask = ((runs_raw['params.DATA.SEED'] == '0') &
    (runs_raw['tags.database_name'] == 'M_QS_24hr') &
    (runs_raw['tags.dataset_name'] == 'fused_sharp'))
runs_raw.loc[
    mask,
    ['tags.database_name', 'tags.dataset_name', 'params.DATA.SHRINKAGE', 'params.DATA.THRESH']
]

Unnamed: 0,tags.database_name,tags.dataset_name,params.DATA.SHRINKAGE,params.DATA.THRESH
75,M_QS_24hr,fused_sharp,1/2,150.0
78,M_QS_24hr,fused_sharp,log,150.0
81,M_QS_24hr,fused_sharp,1/2,50.0
84,M_QS_24hr,fused_sharp,log,50.0
87,M_QS_24hr,fused_sharp,,


In [10]:
dir_str = tensorboard(runs_raw[mask])
%tensorboard --logdir_spec {dir_str}

Reusing TensorBoard on port 6011 (pid 32318), started 5:21:16 ago. (Use '!kill 32318' to kill it.)

It can be obsersed that not using value transformation seems to converge fast (only slightly).

## CNN: Li2020

In [16]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    #'params.DATA.SHRINKAGE': 'shrinkage',
    #'params.DATA.THRESH': 'thresh',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
}
rows = {}
runs_raw = retrieve('CNN', 'Li2020')
runs = select(runs_raw, columns, rows)
runs

Select iloc 0 from 
                         start_time tags.mlflow.runName  \
72 2021-06-07 17:25:00.343000+00:00              Li2020   

               tags.mlflow.source.git.commit  
72  1099722f18b4edfc0c22ae09798120ec7be2088f  


Unnamed: 0,database,dataset,estimator,AUC,TSS
47,M_QS_24hr,fused_sharp,CNN_Li2020,0.959915,0.776316
49,M_QS_24hr,fused_sharp,CNN_Li2020,0.840317,0.548571
51,M_QS_24hr,fused_sharp,CNN_Li2020,0.921088,0.711111
53,M_QS_24hr,sharp,CNN_Li2020,0.913765,0.725
55,M_QS_24hr,sharp,CNN_Li2020,0.809105,0.495238
57,M_QS_24hr,sharp,CNN_Li2020,0.933654,0.745455
59,M_Q_24hr,fused_sharp,CNN_Li2020,0.981138,0.874794
61,M_Q_24hr,fused_sharp,CNN_Li2020,0.988676,0.9
63,M_Q_24hr,fused_sharp,CNN_Li2020,0.982983,0.9
65,M_Q_24hr,sharp,CNN_Li2020,0.98607,0.894563


In [32]:
style(runs, by=['database', 'dataset', 'estimator'])

Unnamed: 0_level_0,database,M_QS_24hr,M_QS_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp,sharp,fused_sharp,sharp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUC,CNN_Li2020,0.907,0.885,0.984,0.982
TSS,CNN_Li2020,0.679,0.655,0.892,0.878


In [33]:
df = organize(runs, by=['database', 'dataset', 'estimator'], std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & database & \multicolumn{2}{c}{M\_QS\_24hr} & \multicolumn{2}{c}{M\_Q\_24hr} \\
    & dataset &    fused\_sharp &          sharp &    fused\_sharp &          sharp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & CNN\_Li2020 &  0.907+/-0.061 &  0.886+/-0.067 &  0.984+/-0.004 &  0.982+/-0.004 \\
TSS & CNN\_Li2020 &  0.679+/-0.117 &  0.655+/-0.139 &  0.892+/-0.015 &  0.878+/-0.024 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,database,M_QS_24hr,M_QS_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp,sharp,fused_sharp,sharp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUC,CNN_Li2020,0.907+/-0.061,0.886+/-0.067,0.984+/-0.004,0.982+/-0.004
TSS,CNN_Li2020,0.679+/-0.117,0.655+/-0.139,0.892+/-0.015,0.878+/-0.024


In [19]:
runs_raw.loc[
    (runs_raw['tags.database_name'] == 'M_Q_24hr') &
    (runs_raw['params.DATA.SEED'] == '0') &
    (runs_raw['tags.dataset_name'] == 'fused_sharp'),
    ['tags.dataset_name', 'tags.estimator_name', 'tags.checkpoint']
]

Unnamed: 0,tags.dataset_name,tags.estimator_name,tags.checkpoint
63,fused_sharp,CNN_Li2020,/home/zeyusun/work/flare-prediction-smarp/lightning_logs/version_881/checkpoints/epoch=18-step=3191.ckpt


In [28]:
dirs = tensorboard(runs_raw.loc[
    (runs_raw['tags.database_name'] == 'M_Q_24hr') &
    (runs_raw['params.DATA.SEED'] == '2') &
    (runs_raw['tags.dataset_name'] == 'fused_smarp')
])
%tensorboard --logdir_spec {dirs}

## CNN: Li2020_convbn

In [34]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    #'params.DATA.SHRINKAGE': 'shrinkage',
    #'params.DATA.THRESH': 'thresh',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
}
rows = {}
runs_raw = retrieve('CNN', 'Li2020_convbn')
runs = select(runs_raw, columns, rows)
runs

Unnamed: 0,database,dataset,estimator,AUC,TSS
1,M_QS_24hr,fused_sharp,CNN_Li2020,0.943956,0.755263
3,M_QS_24hr,fused_sharp,CNN_Li2020,0.854973,0.6
5,M_QS_24hr,fused_sharp,CNN_Li2020,0.935939,0.749495
7,M_QS_24hr,sharp,CNN_Li2020,0.941208,0.742105
9,M_QS_24hr,sharp,CNN_Li2020,0.779258,0.481905
11,M_QS_24hr,sharp,CNN_Li2020,0.932113,0.761616
13,M_Q_24hr,fused_sharp,CNN_Li2020,0.967816,0.803954
15,M_Q_24hr,fused_sharp,CNN_Li2020,0.958759,0.812963
17,M_Q_24hr,fused_sharp,CNN_Li2020,0.979786,0.84878
19,M_Q_24hr,sharp,CNN_Li2020,0.985668,0.904448


In [35]:
df = organize(runs, by=['database', 'dataset', 'estimator'], std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & database & \multicolumn{2}{c}{M\_QS\_24hr} & \multicolumn{2}{c}{M\_Q\_24hr} \\
    & dataset &    fused\_sharp &          sharp &    fused\_sharp &          sharp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & CNN\_Li2020 &  0.912+/-0.049 &  0.884+/-0.091 &  0.969+/-0.011 &  0.970+/-0.024 \\
TSS & CNN\_Li2020 &  0.702+/-0.088 &  0.662+/-0.156 &  0.822+/-0.024 &  0.850+/-0.082 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,database,M_QS_24hr,M_QS_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp,sharp,fused_sharp,sharp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUC,CNN_Li2020,0.912+/-0.049,0.884+/-0.091,0.969+/-0.011,0.970+/-0.024
TSS,CNN_Li2020,0.702+/-0.088,0.662+/-0.156,0.822+/-0.024,0.850+/-0.082


## CNN: CNN_comparedwithLi

In [3]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    #'params.DATA.SHRINKAGE': 'shrinkage',
    #'params.DATA.THRESH': 'thresh',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
}
rows = {}
runs_raw = retrieve('CNN', 'CNN_comparedwithLi')
runs = select(runs_raw, columns, rows)
runs

Unnamed: 0,database,dataset,estimator,AUC,TSS
0,M_QS_24hr,fused_sharp,CNN,0.921037,0.685526
1,M_QS_24hr,fused_sharp,CNN,0.805317,0.464762
2,M_QS_24hr,fused_sharp,CNN,0.914888,0.672727
3,M_QS_24hr,sharp,CNN,0.850596,0.567105
4,M_QS_24hr,sharp,CNN,0.731597,0.335238
5,M_QS_24hr,sharp,CNN,0.877594,0.658586
6,M_Q_24hr,fused_sharp,CNN,0.972088,0.848435
7,M_Q_24hr,fused_sharp,CNN,0.959374,0.768519
8,M_Q_24hr,fused_sharp,CNN,0.970654,0.821951
9,M_Q_24hr,sharp,CNN,0.938152,0.736409


In [4]:
df = organize(runs, by=['database', 'dataset', 'estimator'], std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & database & \multicolumn{2}{c}{M\_QS\_24hr} & \multicolumn{2}{c}{M\_Q\_24hr} \\
    & dataset &    fused\_sharp &          sharp &    fused\_sharp &          sharp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & CNN &  0.880+/-0.065 &  0.820+/-0.078 &  0.967+/-0.007 &  0.945+/-0.006 \\
TSS & CNN &  0.608+/-0.124 &  0.520+/-0.167 &  0.813+/-0.041 &  0.773+/-0.036 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,database,M_QS_24hr,M_QS_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp,sharp,fused_sharp,sharp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUC,CNN,0.880+/-0.065,0.820+/-0.078,0.967+/-0.007,0.945+/-0.006
TSS,CNN,0.608+/-0.124,0.520+/-0.167,0.813+/-0.041,0.773+/-0.036


## CNN: tune_arch

In [18]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    #'params.DATA.SHRINKAGE': 'shrinkage',
    #'params.DATA.THRESH': 'thresh',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
}
rows = {}
runs_raw = retrieve('CNN', 'tune_arch', p=0)
runs = select(runs_raw, columns, rows)
runs

Select iloc 0 from 
                         start_time tags.mlflow.runName  \
5  2021-06-15 17:49:09.318000+00:00           tune_arch   
26 2021-06-14 01:58:33.648000+00:00           tune_arch   
33 2021-06-14 00:36:03.100000+00:00           tune_arch   

               tags.mlflow.source.git.commit  
5   70646f9b0549efb71ec506e6c42d1a9d7ce32ae9  
26  70646f9b0549efb71ec506e6c42d1a9d7ce32ae9  
33  9baa229455de3fd2f5bf8b9767dc97e655c122c5  


Unnamed: 0,database,dataset,estimator,AUC,TSS
0,M_Q_24hr,fused_sharp,CNN,0.986718,0.89404
1,M_Q_24hr,fused_sharp,CNN,0.951195,0.786116
2,M_Q_24hr,fused_sharp,CNN,0.988331,0.917628
3,M_Q_24hr,fused_sharp,CNN,0.986108,0.87963
4,M_Q_24hr,fused_sharp,CNN,0.990473,0.929268


In [19]:
df = organize(runs, by=['database', 'dataset', 'estimator'], std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{lll}
\toprule
    & database &       M\_Q\_24hr \\
    & dataset &    fused\_sharp \\
{} & estimator &                \\
\midrule
AUC & CNN &  0.981+/-0.017 \\
TSS & CNN &  0.881+/-0.057 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,database,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2
AUC,CNN,0.981+/-0.017
TSS,CNN,0.881+/-0.057


In [22]:
df = organize(runs, by=['database', 'dataset', 'estimator'], std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & database & \multicolumn{2}{c}{M\_QS\_24hr} & \multicolumn{2}{c}{M\_Q\_24hr} \\
    & dataset &    fused\_sharp &          sharp &    fused\_sharp &          sharp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & CNN &  0.900+/-0.035 &  0.826+/-0.066 &  0.976+/-0.005 &  0.959+/-0.020 \\
TSS & CNN &  0.656+/-0.068 &  0.523+/-0.120 &  0.835+/-0.026 &  0.807+/-0.049 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,database,M_QS_24hr,M_QS_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp,sharp,fused_sharp,sharp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUC,CNN,0.900+/-0.035,0.826+/-0.066,0.976+/-0.005,0.959+/-0.020
TSS,CNN,0.656+/-0.068,0.523+/-0.120,0.835+/-0.026,0.807+/-0.049


In [24]:
dirs = tensorboard(runs_raw)
%tensorboard --logdir_spec {dirs}

## leaderboard3: LSTM_CNN

In [21]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'params.DATA.SEED': 'seed',
    'metrics.test/accuracy': 'ACC',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
    'metrics.test/hss2': 'HSS',
    'metrics.test/bss': 'BSS',
}
rows = {}
runs_raw = retrieve('leaderboard3', 'LSTM_CNN') #, p=0)
runs = select(runs_raw, columns, rows)
ckpt = runs_raw['tags.checkpoint'].str.extract(r'epoch=(?P<epoch>[0-9]+)-step=(?P<step>[0-9]+)').astype(int)
runs = pd.concat((runs, ckpt), axis=1)
runs.style \
    .set_caption('The columns `epoch` and `step` are the numbers of training epochs and steps of the selected model. Early stopping is applied in each training process and the model with the highest validation AUC among all epochs is selected. For LSTMs, training lasts for at most 20 epochs (called max_epochs) and early-stopped if the validation AUC is not increasing for 5 epochs (called patience). CNNs has max_epochs = 80 and patience = 5.') \
    .background_gradient(axis=0)#, subset=['BSS'])
#print()

Select iloc 0 from 
                         start_time tags.mlflow.runName  \
40 2021-06-27 16:03:33.239000+00:00            LSTM_CNN   

               tags.mlflow.source.git.commit  
40  e13810ffddafdb818df7c4de68d9eafd67861e8c  


Unnamed: 0,database,dataset,estimator,seed,ACC,AUC,TSS,HSS,BSS,epoch,step
0,M_Q_24hr,fused_smarp,CNN,4,0.902244,0.948103,0.804487,0.804487,-0.104738,0,167
1,M_Q_24hr,fused_smarp,CNN,3,0.93326,0.978369,0.866521,0.866521,0.623621,20,3149
2,M_Q_24hr,fused_smarp,CNN,2,0.918246,0.966363,0.836493,0.836493,0.224443,5,893
3,M_Q_24hr,fused_smarp,CNN,1,0.912926,0.968733,0.825853,0.825853,0.584576,9,1649
4,M_Q_24hr,fused_smarp,CNN,0,0.882848,0.957071,0.765695,0.765695,0.495979,12,2053
5,M_Q_24hr,fused_smarp,LSTM,4,0.90625,0.953723,0.8125,0.8125,0.563004,19,3359
6,M_Q_24hr,fused_smarp,LSTM,3,0.916849,0.973747,0.833698,0.833698,0.637802,15,2399
7,M_Q_24hr,fused_smarp,LSTM,2,0.946682,0.978889,0.893365,0.893365,0.695828,19,2979
8,M_Q_24hr,fused_smarp,LSTM,1,0.92325,0.972973,0.846499,0.846499,0.567364,14,2474
9,M_Q_24hr,fused_smarp,LSTM,0,0.910314,0.969635,0.820628,0.820628,0.639814,14,2369


In [22]:
df = organize(runs, std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
     & dataset &          fused\_sharp &          fused\_smarp &                sharp &               smarp \\
{} & estimator &                      &                      &                      &                     \\
\midrule
ACC & CNN &        0.943+/-0.030 &        0.910+/-0.019 &        0.934+/-0.011 &       0.888+/-0.021 \\
     & LSTM &        0.957+/-0.012 &        0.921+/-0.016 &        0.951+/-0.012 &       0.918+/-0.017 \\
AUC & CNN &        0.983+/-0.013 &        0.964+/-0.012 &        0.981+/-0.004 &       0.956+/-0.016 \\
     & LSTM &        0.988+/-0.004 &        0.970+/-0.010 &        0.986+/-0.005 &       0.965+/-0.009 \\
TSS & CNN &        0.886+/-0.059 &        0.820+/-0.038 &        0.867+/-0.023 &       0.776+/-0.041 \\
     & LSTM &        0.913+/-0.025 &        0.841+/-0.032 &        0.901+/-0.024 &       0.836+/-0.034 \\
HSS & CNN &        0.886+/-0.059 &        0.820+/-0.038 &        0.867+/-0.023 &       0.776+/-0.041 \\
    

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACC,CNN,0.943+/-0.030,0.910+/-0.019,0.934+/-0.011,0.888+/-0.021
ACC,LSTM,0.957+/-0.012,0.921+/-0.016,0.951+/-0.012,0.918+/-0.017
AUC,CNN,0.983+/-0.013,0.964+/-0.012,0.981+/-0.004,0.956+/-0.016
AUC,LSTM,0.988+/-0.004,0.970+/-0.010,0.986+/-0.005,0.965+/-0.009
TSS,CNN,0.886+/-0.059,0.820+/-0.038,0.867+/-0.023,0.776+/-0.041
TSS,LSTM,0.913+/-0.025,0.841+/-0.032,0.901+/-0.024,0.836+/-0.034
HSS,CNN,0.886+/-0.059,0.820+/-0.038,0.867+/-0.023,0.776+/-0.041
HSS,LSTM,0.913+/-0.025,0.841+/-0.032,0.901+/-0.024,0.836+/-0.034
BSS,CNN,0.343+/-0.388,0.365+/-0.305,0.619+/-0.125,0.274+/-0.493
BSS,LSTM,0.793+/-0.055,0.621+/-0.056,0.757+/-0.062,0.581+/-0.047


In [23]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACC,CNN,0.943,0.91,0.934,0.888
ACC,LSTM,0.957,0.921,0.951,0.918
AUC,CNN,0.983,0.964,0.981,0.956
AUC,LSTM,0.988,0.97,0.986,0.965
TSS,CNN,0.886,0.82,0.867,0.776
TSS,LSTM,0.913,0.841,0.901,0.837
HSS,CNN,0.886,0.82,0.867,0.776
HSS,LSTM,0.913,0.841,0.901,0.837
BSS,CNN,0.343,0.365,0.619,0.274
BSS,LSTM,0.793,0.621,0.757,0.581


In [30]:
mask = (
    (runs_raw['params.DATA.SEED'] == '0') &
    (runs_raw['tags.dataset_name'].isin(['sharp', 'fused_sharp'])) &
    (runs_raw['tags.estimator_name'].isin(['CNN'])) #, 'LSTM'
    #runs_raw['artifact_uri'].str.contains('ce3844') # one particularly bad learning curve observed in tensorboard
    #[10] # this is the worst in all runs
)
runs_raw.loc[
    mask,
    ['tags.dataset_name', 'tags.estimator_name', 'params.DATA.SEED', 'artifact_uri', 'tags.checkpoint']
]

Unnamed: 0,tags.dataset_name,tags.estimator_name,params.DATA.SEED,artifact_uri,tags.checkpoint
24,fused_sharp,CNN,0,file:///home/zeyusun/work/flare-prediction-smarp/mlruns/15/35bc4235f42d4d1dbf413f9141c43713/artifacts,/home/zeyusun/work/flare-prediction-smarp/lightning_logs/version_960/checkpoints/epoch=4-step=839.ckpt
34,sharp,CNN,0,file:///home/zeyusun/work/flare-prediction-smarp/mlruns/15/8abf51da38b447f793c87b3c560efa88/artifacts,/home/zeyusun/work/flare-prediction-smarp/lightning_logs/version_950/checkpoints/epoch=27-step=1539.ckpt


In [31]:
dirs = tensorboard(runs_raw.loc[mask])
%tensorboard --logdir_spec {dirs}

## Plain language summary

In [15]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'params.DATA.SEED': 'seed',
    'metrics.test/accuracy': 'ACC',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
    'metrics.test/hss2': 'HSS',
    'metrics.test/bss': 'BSS',
}
rows = {}
runs_raw = retrieve('leaderboard3', 'LSTM_CNN') #, p=0)
runs = select(runs_raw, columns, rows)
runs.style.background_gradient(axis=0)

Select iloc 0 from 
                         start_time tags.mlflow.runName  \
40 2021-06-27 16:03:33.239000+00:00            LSTM_CNN   

               tags.mlflow.source.git.commit  
40  e13810ffddafdb818df7c4de68d9eafd67861e8c  


Unnamed: 0,database,dataset,estimator,seed,ACC,AUC,TSS,HSS,BSS
0,M_Q_24hr,fused_smarp,CNN,4,0.902244,0.948103,0.804487,0.804487,-0.104738
1,M_Q_24hr,fused_smarp,CNN,3,0.93326,0.978369,0.866521,0.866521,0.623621
2,M_Q_24hr,fused_smarp,CNN,2,0.918246,0.966363,0.836493,0.836493,0.224443
3,M_Q_24hr,fused_smarp,CNN,1,0.912926,0.968733,0.825853,0.825853,0.584576
4,M_Q_24hr,fused_smarp,CNN,0,0.882848,0.957071,0.765695,0.765695,0.495979
5,M_Q_24hr,fused_smarp,LSTM,4,0.90625,0.953723,0.8125,0.8125,0.563004
6,M_Q_24hr,fused_smarp,LSTM,3,0.916849,0.973747,0.833698,0.833698,0.637802
7,M_Q_24hr,fused_smarp,LSTM,2,0.946682,0.978889,0.893365,0.893365,0.695828
8,M_Q_24hr,fused_smarp,LSTM,1,0.92325,0.972973,0.846499,0.846499,0.567364
9,M_Q_24hr,fused_smarp,LSTM,0,0.910314,0.969635,0.820628,0.820628,0.639814


In [9]:
# Only LSTM
# runs = runs[runs['estimator'] == 'LSTM']
#
# df = runs.groupby(['dataset']).agg('mean').T
# df.style.background_gradient(axis=1)
#
# df = runs.groupby(['dataset']).agg(lambda s: ufloat(s.mean(), s.std())).T
# df

In [10]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACC,CNN,0.943,0.91,0.934,0.888
ACC,LSTM,0.957,0.921,0.951,0.918
AUC,CNN,0.983,0.964,0.981,0.956
AUC,LSTM,0.988,0.97,0.986,0.965
TSS,CNN,0.886,0.82,0.867,0.776
TSS,LSTM,0.913,0.841,0.901,0.837
HSS,CNN,0.886,0.82,0.867,0.776
HSS,LSTM,0.913,0.841,0.901,0.837
BSS,CNN,0.343,0.365,0.619,0.274
BSS,LSTM,0.793,0.621,0.757,0.581


In [12]:
organize(runs, std=True)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACC,CNN,0.943+/-0.030,0.910+/-0.019,0.934+/-0.011,0.888+/-0.021
ACC,LSTM,0.957+/-0.012,0.921+/-0.016,0.951+/-0.012,0.918+/-0.017
AUC,CNN,0.983+/-0.013,0.964+/-0.012,0.981+/-0.004,0.956+/-0.016
AUC,LSTM,0.988+/-0.004,0.970+/-0.010,0.986+/-0.005,0.965+/-0.009
TSS,CNN,0.886+/-0.059,0.820+/-0.038,0.867+/-0.023,0.776+/-0.041
TSS,LSTM,0.913+/-0.025,0.841+/-0.032,0.901+/-0.024,0.836+/-0.034
HSS,CNN,0.886+/-0.059,0.820+/-0.038,0.867+/-0.023,0.776+/-0.041
HSS,LSTM,0.913+/-0.025,0.841+/-0.032,0.901+/-0.024,0.836+/-0.034
BSS,CNN,0.343+/-0.388,0.365+/-0.305,0.619+/-0.125,0.274+/-0.493
BSS,LSTM,0.793+/-0.055,0.621+/-0.056,0.757+/-0.062,0.581+/-0.047


In [None]:
dirs = tensorboard

## leaderboard5: no_max_tss_thresh

In [135]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'params.DATA.SEED': 'seed',
    'metrics.test/accuracy': 'ACC',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
    'metrics.test/hss2': 'HSS',
    'metrics.test/bss': 'BSS',
}
rows = {}
runs_raw = retrieve('leaderboard5', 'no_max_tss_thresh') #, p=0)
runs = select(runs_raw, columns, rows)
runs.style.background_gradient(axis=0)

Select iloc 0 from 
                         start_time tags.mlflow.runName
40 2021-07-27 04:48:10.164000+00:00   no_max_tss_thresh


Unnamed: 0,database,dataset,estimator,seed,ACC,AUC,TSS,HSS,BSS
0,M_Q_24hr,fused_smarp,CNN,4,0.71234,0.94664,0.424679,0.424679,-0.190071
1,M_Q_24hr,fused_smarp,CNN,3,0.926149,0.970513,0.852298,0.852298,0.716083
2,M_Q_24hr,fused_smarp,CNN,2,0.800948,0.947589,0.601896,0.601896,0.283664
3,M_Q_24hr,fused_smarp,CNN,1,0.718582,0.947527,0.437163,0.437163,-0.11521
4,M_Q_24hr,fused_smarp,CNN,0,0.733744,0.902389,0.467489,0.467489,-0.086442
5,M_Q_24hr,fused_smarp,LSTM,4,0.902244,0.955338,0.804487,0.804487,0.600679
6,M_Q_24hr,fused_smarp,LSTM,3,0.908096,0.973316,0.816193,0.816193,0.663962
7,M_Q_24hr,fused_smarp,LSTM,2,0.929502,0.977551,0.859005,0.859005,0.726263
8,M_Q_24hr,fused_smarp,LSTM,1,0.882855,0.973022,0.765709,0.765709,0.553694
9,M_Q_24hr,fused_smarp,LSTM,0,0.911996,0.973727,0.823991,0.823991,0.667196


In [137]:
mask = (
    (runs_raw['params.DATA.SEED'] == '3') &
    (runs_raw['tags.dataset_name'].isin(['sharp', 'fused_sharp'])) &
    (runs_raw['tags.estimator_name'].isin(['CNN'])) #, 'LSTM'
    #runs_raw['artifact_uri'].str.contains('ce3844') # one particularly bad learning curve observed in tensorboard
    #[10] # this is the worst in all runs
)
#runs_raw = runs_raw.rename(columns={'tags.checkpoint_best': 'tags.checkpoint'})
runs_raw.loc[
    mask,
    ['tags.dataset_name', 'tags.estimator_name', 'params.DATA.SEED', 'artifact_uri', 'tags.checkpoint']
]

Unnamed: 0,tags.dataset_name,tags.estimator_name,params.DATA.SEED,artifact_uri,tags.checkpoint
21,fused_sharp,CNN,3,file:///home/zeyusun/work/flare-prediction-smarp/mlruns/17/34dff05c0a4c442ea6c5464889561bb5/artifacts,/home/zeyusun/work/flare-prediction-smarp/lightning_logs/version_1128/checkpoints/last.ckpt
31,sharp,CNN,3,file:///home/zeyusun/work/flare-prediction-smarp/mlruns/17/df30e1cda73d4b74a1bbc3ef459ab2c4/artifacts,/home/zeyusun/work/flare-prediction-smarp/lightning_logs/version_1118/checkpoints/last.ckpt


In [138]:
dirs = tensorboard(runs_raw.loc[mask])
%tensorboard --logdir_spec {dirs}

In [116]:
# Use the best checkpoint (not the last)

from glob import glob
import pytorch_lightning as pl
import mlflow

from arnet.modeling.learner import Learner
from arnet.dataset import ActiveRegionDataModule

estimator_name_map = {
    'SimpleLSTM': 'LSTM',
    'CNN_Li2020': 'CNN',
}
mlflow.set_experiment('leaderboard5_1')
with mlflow.start_run(run_name='checkpoint_best'):
    for checkpoint in runs_raw.loc[:, 'tags.checkpoint']:
        with mlflow.start_run(nested=True):
            checkpoint_best = glob(checkpoint.replace('last.ckpt', 'epoch=*.ckpt'))[0]
            learner = Learner.load_from_checkpoint(checkpoint_best)
            # hotfix
            #learner.cfg.DATA.DATABASE = '/home/zeyusun/work/flare-prediction-smarp/' + str(learner.cfg.DATA.DATABASE)
            #learner.cfg.DATA.AUXDATA = '/home/zeyusun/work/flare-prediction-smarp/' + str(learner.cfg.DATA.AUXDATA)
            kwargs = learner.cfg.TRAINER.todict()
            #kwargs['default_root_dir'] = 'lightning_logs_dev'
            trainer = pl.Trainer(**kwargs)

            dm = ActiveRegionDataModule(learner.cfg)

            trainer.test(learner, datamodule=dm)

            mlflow.log_params(learner.cfg.flatten())
            mlflow.set_tag('checkpoint_best', checkpoint_best)
            mlflow.set_tag('dataset_name', learner.cfg.DATA.DATASET)
            mlflow.set_tag('estimator_name', estimator_name_map[learner.cfg.LEARNER.MODEL.NAME])

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.8685897588729858,
 'test/auc': 0.957534670829773,
 'test/bss': 0.511015772819519,
 'test/f1': 0.8586207032203674,
 'test/hss1': 0.7371794581413269,
 'test/hss2': 0.7371794581413269,
 'test/loss': 0.31879207491874695,
 'test/precision': 0.9291045069694519,
 'test/recall': 0.7980769276618958,
 'test/tss': 0.7371795177459717}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9277899265289307,
 'test/auc': 0.9730547666549683,
 'test/bss': 0.6827516555786133,
 'test/f1': 0.9285714030265808,
 'test/hss1': 0.8555798530578613,
 'test/hss2': 0.8555798530578613,
 'test/loss': 0.21501336991786957,
 'test/precision': 0.91862952709198,
 'test/recall': 0.9387308359146118,
 'test/tss': 0.8555798530578613}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.879146933555603,
 'test/auc': 0.9681975841522217,
 'test/bss': 0.44674521684646606,
 'test/f1': 0.8716980814933777,
 'test/hss1': 0.758293867111206,
 'test/hss2': 0.758293867111206,
 'test/loss': 0.2602822482585907,
 'test/precision': 0.9289544224739075,
 'test/recall': 0.821090042591095,
 'test/tss': 0.7582938075065613}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.8527827858924866,
 'test/auc': 0.9615731835365295,
 'test/bss': 0.303702712059021,
 'test/f1': 0.8351758718490601,
 'test/hss1': 0.7055655121803284,
 'test/hss2': 0.7055655121803284,
 'test/loss': 0.31153252720832825,
 'test/precision': 0.948630154132843,
 'test/recall': 0.7459604740142822,
 'test/tss': 0.7055655121803284}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.8873318433761597,
 'test/auc': 0.9592679738998413,
 'test/bss': 0.5803194046020508,
 'test/f1': 0.8899835348129272,
 'test/hss1': 0.7746636867523193,
 'test/hss2': 0.7746636867523193,
 'test/loss': 0.3154093325138092,
 'test/precision': 0.8695186972618103,
 'test/recall': 0.9114349484443665,
 'test/tss': 0.7746636271476746}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9022436141967773,
 'test/auc': 0.9553375244140625,
 'test/bss': 0.6006791591644287,
 'test/f1': 0.9037854075431824,
 'test/hss1': 0.8044871687889099,
 'test/hss2': 0.8044871687889099,
 'test/loss': 0.29952675104141235,
 'test/precision': 0.8897515535354614,
 'test/recall': 0.9182692170143127,
 'test/tss': 0.8044871687889099}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9059081077575684,
 'test/auc': 0.973426342010498,
 'test/bss': 0.6566917896270752,
 'test/f1': 0.9109731316566467,
 'test/hss1': 0.8118162155151367,
 'test/hss2': 0.8118162155151367,
 'test/loss': 0.25055891275405884,
 'test/precision': 0.8644400835037231,
 'test/recall': 0.9628008604049683,
 'test/tss': 0.8118161559104919}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9271327257156372,
 'test/auc': 0.9771498441696167,
 'test/bss': 0.7197312712669373,
 'test/f1': 0.9305477738380432,
 'test/hss1': 0.8542653918266296,
 'test/hss2': 0.8542653918266296,
 'test/loss': 0.20914258062839508,
 'test/precision': 0.8888888955116272,
 'test/recall': 0.9763033390045166,
 'test/tss': 0.8542654514312744}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.8689407706260681,
 'test/auc': 0.9685486555099487,
 'test/bss': 0.469326913356781,
 'test/f1': 0.8820679187774658,
 'test/hss1': 0.7378814816474915,
 'test/hss2': 0.7378814816474915,
 'test/loss': 0.3834614157676697,
 'test/precision': 0.8017621040344238,
 'test/recall': 0.9802513718605042,
 'test/tss': 0.7378815412521362}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.911995530128479,
 'test/auc': 0.9737269878387451,
 'test/bss': 0.6671963930130005,
 'test/f1': 0.9139726161956787,
 'test/hss1': 0.823991060256958,
 'test/hss2': 0.823991060256958,
 'test/loss': 0.22129638493061066,
 'test/precision': 0.8938906788825989,
 'test/recall': 0.9349775910377502,
 'test/tss': 0.823991060256958}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.8589743375778198,
 'test/auc': 0.9412611722946167,
 'test/bss': 0.4054771661758423,
 'test/f1': 0.8456140756607056,
 'test/hss1': 0.7179487347602844,
 'test/hss2': 0.7179487347602844,
 'test/loss': 0.38355565071105957,
 'test/precision': 0.934108555316925,
 'test/recall': 0.7724359035491943,
 'test/tss': 0.7179487347602844}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.8544857501983643,
 'test/auc': 0.9703865051269531,
 'test/bss': 0.442727267742157,
 'test/f1': 0.8339574933052063,
 'test/hss1': 0.7089715600013733,
 'test/hss2': 0.7089715600013733,
 'test/loss': 0.3962964713573456,
 'test/precision': 0.9709302186965942,
 'test/recall': 0.7308533787727356,
 'test/tss': 0.7089715600013733}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.883293867111206,
 'test/auc': 0.9690574407577515,
 'test/bss': 0.55250084400177,
 'test/f1': 0.8771053552627563,
 'test/hss1': 0.7665876746177673,
 'test/hss2': 0.7665876746177673,
 'test/loss': 0.2388429343700409,
 'test/precision': 0.9262186884880066,
 'test/recall': 0.8329383730888367,
 'test/tss': 0.7665876746177673}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.8972172141075134,
 'test/auc': 0.969020426273346,
 'test/bss': 0.5439691543579102,
 'test/f1': 0.8929406404495239,
 'test/hss1': 0.7944344878196716,
 'test/hss2': 0.7944344878196716,
 'test/loss': 0.2416781336069107,
 'test/precision': 0.9317073225975037,
 'test/recall': 0.8572710752487183,
 'test/tss': 0.7944344282150269}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.8817264437675476,
 'test/auc': 0.9450874328613281,
 'test/bss': 0.5342012643814087,
 'test/f1': 0.8785262107849121,
 'test/hss1': 0.7634528875350952,
 'test/hss2': 0.7634528875350952,
 'test/loss': 0.3201172947883606,
 'test/precision': 0.9029585719108582,
 'test/recall': 0.855381190776825,
 'test/tss': 0.76345294713974}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.8950320482254028,
 'test/auc': 0.9537593722343445,
 'test/bss': 0.5755957365036011,
 'test/f1': 0.8978955745697021,
 'test/hss1': 0.7900640964508057,
 'test/hss2': 0.7900640964508057,
 'test/loss': 0.3086756765842438,
 'test/precision': 0.874051570892334,
 'test/recall': 0.9230769276618958,
 'test/tss': 0.7900640964508057}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9059081077575684,
 'test/auc': 0.9752297401428223,
 'test/bss': 0.6472760438919067,
 'test/f1': 0.9109731316566467,
 'test/hss1': 0.8118162155151367,
 'test/hss2': 0.8118162155151367,
 'test/loss': 0.22602002322673798,
 'test/precision': 0.8644400835037231,
 'test/recall': 0.9628008604049683,
 'test/tss': 0.8118161559104919}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9229857921600342,
 'test/auc': 0.9762534499168396,
 'test/bss': 0.7134323120117188,
 'test/f1': 0.9268841743469238,
 'test/hss1': 0.8459715843200684,
 'test/hss2': 0.8459715843200684,
 'test/loss': 0.2330894023180008,
 'test/precision': 0.8822270035743713,
 'test/recall': 0.9763033390045166,
 'test/tss': 0.8459715843200684}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9017055630683899,
 'test/auc': 0.9695329666137695,
 'test/bss': 0.6113885045051575,
 'test/f1': 0.907633900642395,
 'test/hss1': 0.8034111261367798,
 'test/hss2': 0.8034111261367798,
 'test/loss': 0.2630195915699005,
 'test/precision': 0.856006383895874,
 'test/recall': 0.9658886790275574,
 'test/tss': 0.8034111261367798}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9131165742874146,
 'test/auc': 0.9710907936096191,
 'test/bss': 0.6692348718643188,
 'test/f1': 0.9150684475898743,
 'test/hss1': 0.8262332081794739,
 'test/hss2': 0.8262332081794739,
 'test/loss': 0.2331949770450592,
 'test/precision': 0.89496248960495,
 'test/recall': 0.9360986351966858,
 'test/tss': 0.8262331485748291}
--------------------------------------------------------------------------------


TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9271523356437683,
 'test/auc': 0.988074779510498,
 'test/bss': 0.5684245228767395,
 'test/f1': 0.931357204914093,
 'test/hss1': 0.8543046116828918,
 'test/hss2': 0.8543046116828918,
 'test/loss': 0.2248130589723587,
 'test/precision': 0.8805309534072876,
 'test/recall': 0.9884105920791626,
 'test/tss': 0.8543046116828918}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.8855534791946411,
 'test/auc': 0.9605704545974731,
 'test/bss': 0.47905492782592773,
 'test/f1': 0.8801571726799011,
 'test/hss1': 0.7711069583892822,
 'test/hss2': 0.7711069583892822,
 'test/loss': 0.2655392289161682,
 'test/precision': 0.923711359500885,
 'test/recall': 0.8405253291130066,
 'test/tss': 0.7711069583892822}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9489291310310364,
 'test/auc': 0.9902836084365845,
 'test/bss': 0.6976056098937988,
 'test/f1': 0.9498381614685059,
 'test/hss1': 0.8978583216667175,
 'test/hss2': 0.8978583216667175,
 'test/loss': 0.18442068994045258,
 'test/precision': 0.9332273602485657,
 'test/recall': 0.967051088809967,
 'test/tss': 0.8978583216667175}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9009259343147278,
 'test/auc': 0.9755829572677612,
 'test/bss': 0.632548451423645,
 'test/f1': 0.8937437534332275,
 'test/hss1': 0.8018518686294556,
 'test/hss2': 0.8018518686294556,
 'test/loss': 0.22783561050891876,
 'test/precision': 0.9635974168777466,
 'test/recall': 0.8333333134651184,
 'test/tss': 0.8018518090248108}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9341463446617126,
 'test/auc': 0.9917935729026794,
 'test/bss': 0.5864051580429077,
 'test/f1': 0.937644362449646,
 'test/hss1': 0.8682926893234253,
 'test/hss2': 0.8682926893234253,
 'test/loss': 0.21680065989494324,
 'test/precision': 0.890350878238678,
 'test/recall': 0.9902439117431641,
 'test/tss': 0.8682926893234253}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9594370722770691,
 'test/auc': 0.9909104704856873,
 'test/bss': 0.8471921682357788,
 'test/f1': 0.9595375657081604,
 'test/hss1': 0.9188741445541382,
 'test/hss2': 0.9188741445541382,
 'test/loss': 0.11756695061922073,
 'test/precision': 0.9571663737297058,
 'test/recall': 0.9619205594062805,
 'test/tss': 0.918874204158783}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9343339800834656,
 'test/auc': 0.9848058819770813,
 'test/bss': 0.7451460361480713,
 'test/f1': 0.9309664368629456,
 'test/hss1': 0.8686679005622864,
 'test/hss2': 0.8686679005622864,
 'test/loss': 0.1853262335062027,
 'test/precision': 0.9812889695167542,
 'test/recall': 0.8855534791946411,
 'test/tss': 0.8686679005622864}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9579901099205017,
 'test/auc': 0.9917140007019043,
 'test/bss': 0.8461650013923645,
 'test/f1': 0.959036111831665,
 'test/hss1': 0.9159802198410034,
 'test/hss2': 0.9159802198410034,
 'test/loss': 0.1196543276309967,
 'test/precision': 0.9357366561889648,
 'test/recall': 0.9835255146026611,
 'test/tss': 0.9159802198410034}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.970370352268219,
 'test/auc': 0.9960203170776367,
 'test/bss': 0.886390745639801,
 'test/f1': 0.9705882668495178,
 'test/hss1': 0.9407407641410828,
 'test/hss2': 0.9407407641410828,
 'test/loss': 0.08893772959709167,
 'test/precision': 0.9635036587715149,
 'test/recall': 0.9777777791023254,
 'test/tss': 0.9407407641410828}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9280487895011902,
 'test/auc': 0.9885722398757935,
 'test/bss': 0.7565100789070129,
 'test/f1': 0.9248407483100891,
 'test/hss1': 0.8560975790023804,
 'test/hss2': 0.8560975790023804,
 'test/loss': 0.1612444370985031,
 'test/precision': 0.9679999947547913,
 'test/recall': 0.8853658437728882,
 'test/tss': 0.8560975790023804}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.8948675394058228,
 'test/auc': 0.9806190133094788,
 'test/bss': 0.4947596788406372,
 'test/f1': 0.885067880153656,
 'test/hss1': 0.7897350788116455,
 'test/hss2': 0.7897350788116455,
 'test/loss': 0.25468453764915466,
 'test/precision': 0.976047933101654,
 'test/recall': 0.809602677822113,
 'test/tss': 0.7897351384162903}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9071294665336609,
 'test/auc': 0.9747385382652283,
 'test/bss': 0.644465446472168,
 'test/f1': 0.9020771384239197,
 'test/hss1': 0.8142589330673218,
 'test/hss2': 0.8142589330673218,
 'test/loss': 0.21679474413394928,
 'test/precision': 0.9539749026298523,
 'test/recall': 0.8555347323417664,
 'test/tss': 0.8142589330673218}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9003294706344604,
 'test/auc': 0.9795371890068054,
 'test/bss': 0.6693639159202576,
 'test/f1': 0.894690990447998,
 'test/hss1': 0.8006590008735657,
 'test/hss2': 0.8006590008735657,
 'test/loss': 0.19824263453483582,
 'test/precision': 0.9483394622802734,
 'test/recall': 0.8467874526977539,
 'test/tss': 0.8006589412689209}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9101851582527161,
 'test/auc': 0.9753857851028442,
 'test/bss': 0.6018029451370239,
 'test/f1': 0.9078822135925293,
 'test/hss1': 0.8203703761100769,
 'test/hss2': 0.8203703761100769,
 'test/loss': 0.219582200050354,
 'test/precision': 0.9317739009857178,
 'test/recall': 0.885185182094574,
 'test/tss': 0.8203703761100769}
--------------------------------------------------------------------------------


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.
  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9121951460838318,
 'test/auc': 0.9878584146499634,
 'test/bss': 0.5835776925086975,
 'test/f1': 0.918367326259613,
 'test/hss1': 0.8243902325630188,
 'test/hss2': 0.8243902325630188,
 'test/loss': 0.24321012198925018,
 'test/precision': 0.8580508232116699,
 'test/recall': 0.9878048896789551,
 'test/tss': 0.8243902325630188}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9470198750495911,
 'test/auc': 0.9849855899810791,
 'test/bss': 0.8017571568489075,
 'test/f1': 0.9473684430122375,
 'test/hss1': 0.8940397500991821,
 'test/hss2': 0.8940397500991821,
 'test/loss': 0.15738053619861603,
 'test/precision': 0.9411764740943909,
 'test/recall': 0.9536423683166504,
 'test/tss': 0.8940396904945374}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9249531030654907,
 'test/auc': 0.9819774627685547,
 'test/bss': 0.7413575649261475,
 'test/f1': 0.9229287505149841,
 'test/hss1': 0.8499062061309814,
 'test/hss2': 0.8499062061309814,
 'test/loss': 0.17977632582187653,
 'test/precision': 0.9485148787498474,
 'test/recall': 0.8986867070198059,
 'test/tss': 0.8499062061309814}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.953871488571167,
 'test/auc': 0.9886782169342041,
 'test/bss': 0.8296545743942261,
 'test/f1': 0.9539474248886108,
 'test/hss1': 0.907742977142334,
 'test/hss2': 0.907742977142334,
 'test/loss': 0.12978167831897736,
 'test/precision': 0.9523809552192688,
 'test/recall': 0.9555189609527588,
 'test/tss': 0.9077430367469788}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Using native 16bit precision.


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9546296000480652,
 'test/auc': 0.9928498268127441,
 'test/bss': 0.8207165002822876,
 'test/f1': 0.953990638256073,
 'test/hss1': 0.9092592597007751,
 'test/hss2': 0.9092592597007751,
 'test/loss': 0.12363481521606445,
 'test/precision': 0.9676190614700317,
 'test/recall': 0.9407407641410828,
 'test/tss': 0.9092592597007751}
--------------------------------------------------------------------------------


  self._construct_datasets(balanced=cfg.DATA.BALANCED)
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/accuracy': 0.9121951460838318,
 'test/auc': 0.9863563179969788,
 'test/bss': 0.6856560707092285,
 'test/f1': 0.9074549674987793,
 'test/hss1': 0.8243902325630188,
 'test/hss2': 0.8243902325630188,
 'test/loss': 0.20721431076526642,
 'test/precision': 0.95923912525177,
 'test/recall': 0.8609756231307983,
 'test/tss': 0.8243902325630188}
--------------------------------------------------------------------------------


In [4]:
columns = {
    #'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'params.DATA.SEED': 'seed',
    'metrics.test/accuracy': 'ACC',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
    'metrics.test/hss2': 'HSS',
    'metrics.test/bss': 'BSS',
}
rows = {}
runs_raw = retrieve('leaderboard5_1', 'checkpoint_best') #, p=0)
runs = select(runs_raw, columns, rows)
runs.style.background_gradient(axis=0)

Select iloc 0 from 
                         start_time tags.mlflow.runName
40 2021-07-27 19:35:21.705000+00:00     checkpoint_best
41 2021-07-27 19:34:41.225000+00:00     checkpoint_best
79 2021-07-27 16:43:57.339000+00:00     checkpoint_best
80 2021-07-27 16:41:36.790000+00:00     checkpoint_best


Unnamed: 0,dataset,estimator,seed,ACC,AUC,TSS,HSS,BSS
0,sharp,LSTM,0,0.912195,0.986356,0.82439,0.82439,0.685656
1,sharp,LSTM,1,0.95463,0.99285,0.909259,0.909259,0.820717
2,sharp,LSTM,2,0.953871,0.988678,0.907743,0.907743,0.829655
3,sharp,LSTM,3,0.924953,0.981977,0.849906,0.849906,0.741358
4,sharp,LSTM,4,0.94702,0.984986,0.89404,0.89404,0.801757
5,sharp,CNN,0,0.912195,0.987858,0.82439,0.82439,0.583578
6,sharp,CNN,1,0.910185,0.975386,0.82037,0.82037,0.601803
7,sharp,CNN,2,0.900329,0.979537,0.800659,0.800659,0.669364
8,sharp,CNN,3,0.907129,0.974739,0.814259,0.814259,0.644465
9,sharp,CNN,4,0.894868,0.980619,0.789735,0.789735,0.49476


In [27]:
# Indeed, the models are early-stopped
#runs_raw[['tags.checkpoint_best', 'tags.dataset_name', 'tags.estimator_name', 'params.DATA.SEED']]

In [5]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACC,CNN,0.919,0.883,0.905,0.875
ACC,LSTM,0.95,0.903,0.939,0.908
AUC,CNN,0.981,0.964,0.98,0.959
AUC,LSTM,0.99,0.97,0.987,0.969
TSS,CNN,0.839,0.766,0.81,0.75
TSS,LSTM,0.9,0.806,0.877,0.816
HSS,CNN,0.839,0.766,0.81,0.75
HSS,LSTM,0.9,0.806,0.877,0.816
BSS,CNN,0.593,0.505,0.599,0.496
BSS,LSTM,0.816,0.623,0.776,0.643


In [122]:
# df = organize(runs, std=True)
# print(df.to_latex(multicolumn_format='c'))
# df

### LSTM

In [123]:
_runs = select(
    runs[runs['estimator'] == 'LSTM'],
    columns=None,
    rows={
        'dataset': {
            'fused_sharp': 'FUSED_SHARP',
            'sharp': 'SHARP_ONLY',
            'fused_smarp': 'FUSED_SMARP',
            'smarp': 'SMARP_ONLY',
        }
    }
)

# Note: comment out unstack
df = organize(
    _runs,
    by=['dataset'],
    std=True
)
# we don't need sort_values to rearrange columns
df = df[['SHARP_ONLY', 'FUSED_SHARP', 'SMARP_ONLY', 'FUSED_SMARP']]
print(typeset(df))
df

\begin{tabular}{lllll}
\toprule
dataset &     SHARP\_ONLY &    FUSED\_SHARP &     SMARP\_ONLY &    FUSED\_SMARP \\
\midrule
ACC &  0.939+/-0.019 &  0.950+/-0.018 &  0.908+/-0.011 &  0.903+/-0.021 \\
AUC &  0.987+/-0.004 &  0.990+/-0.004 &  0.969+/-0.009 &  0.970+/-0.009 \\
TSS &  0.877+/-0.038 &  0.900+/-0.036 &  0.815+/-0.022 &  0.806+/-0.043 \\
HSS &  0.877+/-0.038 &  0.900+/-0.036 &  0.815+/-0.022 &  0.806+/-0.043 \\
BSS &  0.776+/-0.061 &  0.816+/-0.062 &  0.643+/-0.053 &  0.623+/-0.096 \\
\bottomrule
\end{tabular}



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


dataset,SHARP_ONLY,FUSED_SHARP,SMARP_ONLY,FUSED_SMARP
ACC,0.939+/-0.019,0.950+/-0.018,0.908+/-0.011,0.903+/-0.021
AUC,0.987+/-0.004,0.990+/-0.004,0.969+/-0.009,0.970+/-0.009
TSS,0.877+/-0.038,0.900+/-0.036,0.815+/-0.022,0.806+/-0.043
HSS,0.877+/-0.038,0.900+/-0.036,0.815+/-0.022,0.806+/-0.043
BSS,0.776+/-0.061,0.816+/-0.062,0.643+/-0.053,0.623+/-0.096


### CNN

In [124]:
_runs = select(
    runs[runs['estimator'] == 'CNN'],
    columns=None,
    rows={
        'dataset': {
            'fused_sharp': 'FUSED_SHARP',
            'sharp': 'SHARP_ONLY',
            'fused_smarp': 'FUSED_SMARP',
            'smarp': 'SMARP_ONLY',
        }
    }
)

# Note: comment out unstack
df = organize(
    _runs,
    by=['dataset'],
    std=True
)
# we don't need sort_values to rearrange columns
df = df[['SHARP_ONLY', 'FUSED_SHARP', 'SMARP_ONLY', 'FUSED_SMARP']]
print(typeset(df))
df

\begin{tabular}{lllll}
\toprule
dataset &     SHARP\_ONLY &    FUSED\_SHARP &     SMARP\_ONLY &    FUSED\_SMARP \\
\midrule
ACC &  0.905+/-0.007 &  0.919+/-0.026 &  0.875+/-0.018 &  0.883+/-0.028 \\
AUC &  0.980+/-0.005 &  0.981+/-0.013 &  0.959+/-0.014 &  0.964+/-0.007 \\
TSS &  0.810+/-0.014 &  0.839+/-0.051 &  0.750+/-0.036 &  0.766+/-0.056 \\
HSS &  0.810+/-0.014 &  0.839+/-0.051 &  0.750+/-0.036 &  0.766+/-0.056 \\
BSS &  0.599+/-0.067 &  0.593+/-0.081 &  0.496+/-0.067 &  0.505+/-0.142 \\
\bottomrule
\end{tabular}



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


dataset,SHARP_ONLY,FUSED_SHARP,SMARP_ONLY,FUSED_SMARP
ACC,0.905+/-0.007,0.919+/-0.026,0.875+/-0.018,0.883+/-0.028
AUC,0.980+/-0.005,0.981+/-0.013,0.959+/-0.014,0.964+/-0.007
TSS,0.810+/-0.014,0.839+/-0.051,0.750+/-0.036,0.766+/-0.056
HSS,0.810+/-0.014,0.839+/-0.051,0.750+/-0.036,0.766+/-0.056
BSS,0.599+/-0.067,0.593+/-0.081,0.496+/-0.067,0.505+/-0.142


### LSTM paired t-test

In [104]:
estimator_name = 'LSTM'
numerical = ['ACC', 'AUC', 'TSS', 'HSS', 'BSS']
diff_sharp = runs.loc[
    (runs['estimator'] == estimator_name) &
    (runs['dataset'] == 'fused_sharp')
]
diff_sharp[numerical] -= runs.loc[
    (runs['estimator'] == estimator_name) &
    (runs['dataset'] == 'sharp'),
    numerical
].values
diff_sharp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0,dataset,estimator,seed,ACC,AUC,TSS,HSS,BSS
10,fused_sharp,LSTM,0,0.015854,0.002216,0.031707,0.031707,0.070854
11,fused_sharp,LSTM,1,0.015741,0.00317,0.031482,0.031482,0.065674
12,fused_sharp,LSTM,2,0.004119,0.003036,0.008237,0.008237,0.01651
13,fused_sharp,LSTM,3,0.009381,0.002828,0.018762,0.018762,0.003788
14,fused_sharp,LSTM,4,0.012417,0.005925,0.024835,0.024834,0.045435


In [111]:
# Paired t-test: compare diff with mu0 = 0
# Test on the mean, with variance unknown
#   H0: diff <= mu0 (or diff == mu0)
#   H1: diff > mu0
import numpy as np
X = diff_sharp['TSS'].to_numpy()
mu = np.mean(X)
s = np.std(X, ddof=1) # sample standard deviation
n = len(X) # sample size
statistic = mu * np.sqrt(n) / s

from scipy.stats import t
alpha = 0.05
dof = n - 1
thresh = t.ppf(1-alpha, dof, loc=0, scale=1)

statistic, thresh

In [None]:
from scipy import stats
stats.ttest_rel(a=before,b=after)

### CNN paired t-test

In [150]:
estimator_name = 'CNN'
numerical = ['ACC', 'AUC', 'TSS', 'HSS', 'BSS']
diff_sharp = runs.loc[
    (runs['estimator'] == estimator_name) &
    (runs['dataset'] == 'fused_sharp')
]
diff_sharp[numerical] -= runs.loc[
    (runs['estimator'] == estimator_name) &
    (runs['dataset'] == 'sharp'),
    numerical
].values
diff_sharp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


Unnamed: 0,database,dataset,estimator,seed,ACC,AUC,TSS,HSS,BSS
20,M_Q_24hr,fused_sharp,CNN,4,0.100166,-0.00587,0.200331,0.200331,0.428269
21,M_Q_24hr,fused_sharp,CNN,3,-0.151032,-0.02866,-0.302064,-0.302064,-0.643304
22,M_Q_24hr,fused_sharp,CNN,2,0.030478,-0.008173,0.060956,0.060956,-0.08942
23,M_Q_24hr,fused_sharp,CNN,1,0.013889,0.005674,0.027778,0.027778,0.052934
24,M_Q_24hr,fused_sharp,CNN,0,-0.041463,-0.004286,-0.082927,-0.082927,-0.13383


In [151]:
paired_t_test(diff_sharp['TSS'].to_numpy())

(-0.22857471191350576, 0.5847951511425598)

In [3]:
from scipy import stats
before = runs.loc[
    (runs['estimator'] == estimator_name) &
    (runs['dataset'] == 'sharp'),
    'TSS'
].values
after = runs.loc[
    (runs['estimator'] == estimator_name) &
    (runs['dataset'] == 'fused_sharp'),
    'TSS'
].values
breakpoint()
stats.ttest_rel(a=before,b=after)

NameError: name 'runs' is not defined

In [153]:
before, after

(array([0.52980131, 0.75797379, 0.69522238, 0.80370373, 0.85121948]),
 array([0.73013246, 0.45590997, 0.7561779 , 0.83148152, 0.76829273]))

In [154]:
diff_sharp['TSS']

20    0.200331
21   -0.302064
22    0.060956
23    0.027778
24   -0.082927
Name: TSS, dtype: float64