# MLflow results presentation

Present MLflow results using pandas.

If tensorboard is not showing inline, use `!` instead of `%` and view tensorboard in a new browser tab.

In [1]:
import pandas as pd
from uncertainties import ufloat
import mlflow
from mlflow.tracking import MlflowClient
mlflow.set_tracking_uri('file:///home/zeyusun/work/flare-prediction-smarp/mlruns')
client = MlflowClient()

from mlflow_helper import *

pd.set_option('display.max_colwidth', None)

%load_ext tensorboard
%load_ext autoreload
%autoreload 2

Display boarderlines of pandas tables

In [2]:
%%HTML
<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

## experiment: beta
* no random seed
* multiple databases

In [3]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'metrics.auc': 'auc',
    'metrics.tss_opt': 'tss',
}
rows = {
    'estimator': {
        'HistGradientBoostingClassifier': 'HGB',
        'RandomForestClassifier': 'RF',
        'SGDClassifier': 'LG',
    }
}
runs_raw = retrieve('experiment', 'beta')
runs = select(runs_raw, columns, rows)
runs

Select the first from 
                          start_time tags.mlflow.runName  \
90  2021-05-22 18:13:26.644000+00:00                beta   
166 2021-05-21 01:50:33.010000+00:00                beta   

                tags.mlflow.source.git.commit  
90   d5d0b545f888847c72dd44d581a77e3e7602b2fe  
166  7149bc5e0355ab3dbc90c029260325a64a1f6044  


Unnamed: 0,database,dataset,estimator,auc,tss
36,M_Q_6hr,combined,HGB,0.999919,0.998669
37,M_Q_6hr,combined,RF,1.0,1.0
38,M_Q_6hr,combined,LG,0.99204,0.921998
39,M_Q_6hr,sharp,HGB,0.996281,0.943164
40,M_Q_6hr,sharp,RF,1.0,1.0
41,M_Q_6hr,sharp,LG,0.992198,0.921169
42,M_Q_6hr,smarp,HGB,0.996993,0.950146
43,M_Q_6hr,smarp,RF,1.0,1.0
44,M_Q_6hr,smarp,LG,0.992351,0.920737
45,M_Q_12hr_balanced,combined,HGB,1.0,1.0


In [4]:
by = ['database', 'dataset', 'estimator']
df_style = style(runs, by=by)
df_style

Unnamed: 0_level_0,database,M_Q_12hr,M_Q_12hr,M_Q_12hr,M_Q_12hr_balanced,M_Q_12hr_balanced,M_Q_12hr_balanced,M_Q_24hr,M_Q_24hr,M_Q_24hr,M_Q_24hr_balanced,M_Q_24hr_balanced,M_Q_24hr_balanced,M_Q_6hr,M_Q_6hr,M_Q_6hr,M_Q_6hr_balanced,M_Q_6hr_balanced,M_Q_6hr_balanced
Unnamed: 0_level_1,dataset,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
auc,HGB,1.0,0.994,0.996,1.0,0.997,0.998,1.0,0.999,0.996,1.0,0.994,0.995,1.0,0.996,0.997,1.0,0.994,0.995
auc,LG,0.992,0.992,0.991,0.994,0.992,0.993,0.991,0.99,0.991,0.991,0.991,0.991,0.992,0.992,0.992,0.991,0.99,0.991
auc,RF,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
tss,HGB,1.0,0.928,0.942,1.0,0.94,0.959,1.0,0.966,0.94,1.0,0.926,0.938,0.999,0.943,0.95,1.0,0.925,0.934
tss,LG,0.91,0.909,0.911,0.915,0.918,0.912,0.912,0.912,0.912,0.919,0.915,0.917,0.922,0.921,0.921,0.917,0.899,0.913
tss,RF,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [5]:
df = organize(runs, by=by, std=True)
df_latex = typeset(df)
#print(df_latex)
df

Unnamed: 0_level_0,database,M_Q_12hr,M_Q_12hr,M_Q_12hr,M_Q_12hr_balanced,M_Q_12hr_balanced,M_Q_12hr_balanced,M_Q_24hr,M_Q_24hr,M_Q_24hr,M_Q_24hr_balanced,M_Q_24hr_balanced,M_Q_24hr_balanced,M_Q_6hr,M_Q_6hr,M_Q_6hr,M_Q_6hr_balanced,M_Q_6hr_balanced,M_Q_6hr_balanced
Unnamed: 0_level_1,dataset,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp,combined,sharp,smarp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
auc,HGB,1.000+/-nan,0.994+/-nan,0.996+/-nan,1.000+/-nan,0.997+/-nan,0.998+/-nan,1.000+/-nan,0.999+/-nan,0.996+/-nan,1.000+/-nan,0.994+/-nan,0.995+/-nan,1.000+/-nan,0.996+/-nan,0.997+/-nan,1.000+/-nan,0.994+/-nan,0.995+/-nan
auc,LG,0.992+/-nan,0.992+/-nan,0.991+/-nan,0.994+/-nan,0.992+/-nan,0.993+/-nan,0.991+/-nan,0.990+/-nan,0.991+/-nan,0.991+/-nan,0.991+/-nan,0.991+/-nan,0.992+/-nan,0.992+/-nan,0.992+/-nan,0.991+/-nan,0.990+/-nan,0.991+/-nan
auc,RF,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan
tss,HGB,1.000+/-nan,0.928+/-nan,0.942+/-nan,1.000+/-nan,0.940+/-nan,0.959+/-nan,1.000+/-nan,0.966+/-nan,0.940+/-nan,1.000+/-nan,0.926+/-nan,0.938+/-nan,0.999+/-nan,0.943+/-nan,0.950+/-nan,1.000+/-nan,0.925+/-nan,0.934+/-nan
tss,LG,0.910+/-nan,0.909+/-nan,0.911+/-nan,0.915+/-nan,0.918+/-nan,0.912+/-nan,0.912+/-nan,0.912+/-nan,0.912+/-nan,0.919+/-nan,0.915+/-nan,0.917+/-nan,0.922+/-nan,0.921+/-nan,0.921+/-nan,0.917+/-nan,0.899+/-nan,0.913+/-nan
tss,RF,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan,1.000+/-nan


## leaderboard1: sklearn_dataset

In [6]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'metrics.auc': 'auc',
    'metrics.tss_opt': 'tss',
}
rows = {
    'estimator': {
        'HistGradientBoostingClassifier': 'HGB',
        'RandomForestClassifier': 'RF',
        'SGDClassifier': 'LG',
    }
}
runs_raw = retrieve('leaderboard1', 'sklearn_dataset')
runs = select(runs_raw, columns, rows)
runs

Unnamed: 0,database,dataset,estimator,auc,tss
77,M_Q_24hr,fused_sharp,HGB,0.986385,0.892283
78,M_Q_24hr,fused_sharp,RF,0.974761,0.863344
79,M_Q_24hr,fused_sharp,LG,0.983602,0.895498
80,M_Q_24hr,fused_smarp,HGB,0.94781,0.764423
82,M_Q_24hr,fused_smarp,RF,0.938948,0.725962
83,M_Q_24hr,fused_smarp,LG,0.956173,0.810897
84,M_Q_24hr,sharp,HGB,0.991318,0.90836
85,M_Q_24hr,sharp,RF,0.983958,0.890675
86,M_Q_24hr,sharp,LG,0.987159,0.900322
87,M_Q_24hr,smarp,HGB,0.953228,0.801282


In [7]:
df = organize(runs, std=True)
print(df.to_latex())
df

\begin{tabular}{llllll}
\toprule
    & dataset &    fused\_sharp &    fused\_smarp &          sharp &          smarp \\
{} & estimator &                &                &                &                \\
\midrule
auc & HGB &  0.990+/-0.003 &  0.963+/-0.010 &  0.992+/-0.002 &  0.970+/-0.009 \\
    & LG &  0.988+/-0.003 &  0.970+/-0.008 &  0.991+/-0.003 &  0.971+/-0.009 \\
    & RF &  0.983+/-0.006 &  0.957+/-0.011 &  0.988+/-0.004 &  0.958+/-0.012 \\
tss & HGB &  0.907+/-0.019 &  0.812+/-0.034 &  0.919+/-0.016 &  0.839+/-0.029 \\
    & LG &  0.906+/-0.007 &  0.841+/-0.026 &  0.913+/-0.018 &  0.843+/-0.029 \\
    & RF &  0.884+/-0.030 &  0.786+/-0.038 &  0.901+/-0.023 &  0.797+/-0.040 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
auc,HGB,0.990+/-0.003,0.963+/-0.010,0.992+/-0.002,0.970+/-0.009
auc,LG,0.988+/-0.003,0.970+/-0.008,0.991+/-0.003,0.971+/-0.009
auc,RF,0.983+/-0.006,0.957+/-0.011,0.988+/-0.004,0.958+/-0.012
tss,HGB,0.907+/-0.019,0.812+/-0.034,0.919+/-0.016,0.839+/-0.029
tss,LG,0.906+/-0.007,0.841+/-0.026,0.913+/-0.018,0.843+/-0.029
tss,RF,0.884+/-0.030,0.786+/-0.038,0.901+/-0.023,0.797+/-0.040


In [8]:
by = ['database', 'dataset', 'estimator']
style(runs, by=by)

Unnamed: 0_level_0,database,M_Q_24hr,M_Q_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
auc,HGB,0.99,0.963,0.992,0.97
auc,LG,0.988,0.97,0.991,0.971
auc,RF,0.983,0.957,0.988,0.958
tss,HGB,0.907,0.812,0.919,0.839
tss,LG,0.906,0.841,0.913,0.843
tss,RF,0.884,0.786,0.901,0.797


## leaderboard1: arnet_dataset

In [9]:
columns = {
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
}
rows = {}
runs_raw = retrieve('leaderboard1', 'arnet_dataset')
runs = select(runs_raw, columns, rows)
runs

Unnamed: 0,dataset,estimator,AUC,TSS
0,fused_smarp,CNN,0.942973,0.753205
1,fused_smarp,CNN,0.961599,0.816193
2,fused_smarp,CNN,0.945356,0.758294
3,fused_smarp,CNN,0.949142,0.773788
4,fused_smarp,CNN,0.929361,0.684978
...,...,...,...,...
75,sharp,MLP,0.976567,0.855305
76,sharp,MLP,0.976531,0.849802
81,sharp,MLP,0.974626,0.815385
91,sharp,MLP,0.986490,0.888298


In [10]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.959,0.942,0.903,0.928
AUC,CNN,0.961,0.946,0.954,0.915
AUC,LSTM,0.991,0.97,0.989,0.967
AUC,MLP,0.989,0.969,0.977,0.962
TSS,C3D,0.81,0.745,0.702,0.737
TSS,CNN,0.801,0.757,0.789,0.701
TSS,LSTM,0.912,0.837,0.903,0.831
TSS,MLP,0.906,0.833,0.855,0.807


In [11]:
df = organize(runs, std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & dataset &    fused\_sharp &    fused\_smarp &          sharp &          smarp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & C3D &  0.959+/-0.020 &  0.941+/-0.013 &  0.903+/-0.051 &  0.928+/-0.031 \\
    & CNN &  0.961+/-0.024 &  0.946+/-0.012 &  0.954+/-0.017 &  0.915+/-0.035 \\
    & LSTM &  0.991+/-0.004 &  0.970+/-0.008 &  0.989+/-0.005 &  0.967+/-0.010 \\
    & MLP &  0.989+/-0.004 &  0.969+/-0.007 &  0.977+/-0.006 &  0.962+/-0.006 \\
TSS & C3D &  0.810+/-0.058 &  0.745+/-0.046 &  0.702+/-0.111 &  0.737+/-0.082 \\
    & CNN &  0.801+/-0.062 &  0.757+/-0.047 &  0.789+/-0.058 &  0.701+/-0.076 \\
    & LSTM &  0.912+/-0.024 &  0.837+/-0.028 &  0.903+/-0.031 &  0.831+/-0.033 \\
    & MLP &  0.906+/-0.015 &  0.833+/-0.026 &  0.855+/-0.027 &  0.807+/-0.028 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.959+/-0.020,0.941+/-0.013,0.903+/-0.051,0.928+/-0.031
AUC,CNN,0.961+/-0.024,0.946+/-0.012,0.954+/-0.017,0.915+/-0.035
AUC,LSTM,0.991+/-0.004,0.970+/-0.008,0.989+/-0.005,0.967+/-0.010
AUC,MLP,0.989+/-0.004,0.969+/-0.007,0.977+/-0.006,0.962+/-0.006
TSS,C3D,0.810+/-0.058,0.745+/-0.046,0.702+/-0.111,0.737+/-0.082
TSS,CNN,0.801+/-0.062,0.757+/-0.047,0.789+/-0.058,0.701+/-0.076
TSS,LSTM,0.912+/-0.024,0.837+/-0.028,0.903+/-0.031,0.831+/-0.033
TSS,MLP,0.906+/-0.015,0.833+/-0.026,0.855+/-0.027,0.807+/-0.028


## arnet: fusesize_new_more_dataset

In [12]:
columns = {
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
}
rows = {}
runs_raw = retrieve('arnet', 'fusesize_new_more_dataset')
runs = select(runs_raw, columns, rows)
runs

Unnamed: 0,dataset,estimator,AUC,TSS
87,fused_smarp,FusionC3D,0.935290,0.737179
88,fused_smarp,FusionC3D,0.960964,0.823851
89,fused_smarp,FusionC3D,0.963064,0.798578
90,fused_smarp,FusionC3D,0.951671,0.791741
91,fused_smarp,FusionC3D,0.931807,0.756726
...,...,...,...,...
162,sharp,CNN,0.938087,0.716887
163,sharp,CNN,0.886316,0.654784
164,sharp,CNN,0.932091,0.705107
165,sharp,CNN,0.863181,0.664815


In [13]:
df = organize(runs)
df_style = style(runs)
df_style

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.959,0.948,0.916,0.947
AUC,CNN,0.961,0.961,0.919,0.94
AUC,FusionC3D,0.962,0.949,0.912,0.931
AUC,FusionCNN,0.961,0.96,0.934,0.94
TSS,C3D,0.81,0.782,0.748,0.762
TSS,CNN,0.802,0.795,0.713,0.747
TSS,FusionC3D,0.806,0.782,0.721,0.745
TSS,FusionCNN,0.805,0.804,0.738,0.743


In [14]:
dirs = tensorboard(runs_raw)
%tensorboard --logdir_spec {dirs}

Reusing TensorBoard on port 6007 (pid 15467), started 5 days, 1:54:44 ago. (Use '!kill 15467' to kill it.)

In [15]:
df = organize(runs, std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & dataset &    fused\_sharp &    fused\_smarp &          sharp &          smarp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & C3D &  0.959+/-0.030 &  0.948+/-0.023 &  0.916+/-0.043 &  0.947+/-0.011 \\
    & CNN &  0.961+/-0.027 &  0.961+/-0.006 &  0.919+/-0.045 &  0.940+/-0.015 \\
    & FusionC3D &  0.961+/-0.020 &  0.949+/-0.014 &  0.912+/-0.050 &  0.931+/-0.032 \\
    & FusionCNN &  0.961+/-0.027 &  0.960+/-0.010 &  0.934+/-0.023 &  0.940+/-0.007 \\
TSS & C3D &  0.810+/-0.078 &  0.782+/-0.068 &  0.748+/-0.081 &  0.763+/-0.016 \\
    & CNN &  0.802+/-0.076 &  0.795+/-0.028 &  0.713+/-0.066 &  0.747+/-0.057 \\
    & FusionC3D &  0.806+/-0.047 &  0.782+/-0.035 &  0.721+/-0.115 &  0.745+/-0.072 \\
    & FusionCNN &  0.805+/-0.087 &  0.804+/-0.034 &  0.738+/-0.066 &  0.743+/-0.028 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.959+/-0.030,0.948+/-0.023,0.916+/-0.043,0.947+/-0.011
AUC,CNN,0.961+/-0.027,0.961+/-0.006,0.919+/-0.045,0.940+/-0.015
AUC,FusionC3D,0.961+/-0.020,0.949+/-0.014,0.912+/-0.050,0.931+/-0.032
AUC,FusionCNN,0.961+/-0.027,0.960+/-0.010,0.934+/-0.023,0.940+/-0.007
TSS,C3D,0.810+/-0.078,0.782+/-0.068,0.748+/-0.081,0.763+/-0.016
TSS,CNN,0.802+/-0.076,0.795+/-0.028,0.713+/-0.066,0.747+/-0.057
TSS,FusionC3D,0.806+/-0.047,0.782+/-0.035,0.721+/-0.115,0.745+/-0.072
TSS,FusionCNN,0.805+/-0.087,0.804+/-0.034,0.738+/-0.066,0.743+/-0.028


In [16]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.959,0.948,0.916,0.947
AUC,CNN,0.961,0.961,0.919,0.94
AUC,FusionC3D,0.962,0.949,0.912,0.931
AUC,FusionCNN,0.961,0.96,0.934,0.94
TSS,C3D,0.81,0.782,0.748,0.762
TSS,CNN,0.802,0.795,0.713,0.747
TSS,FusionC3D,0.806,0.782,0.721,0.745
TSS,FusionCNN,0.805,0.804,0.738,0.743


## arnet: fusesize_QS

In [17]:
columns = get_columns('arnet')
rows = {}
runs_raw = retrieve('arnet', 'fusesize_QS')
runs = select(runs_raw, columns, rows)
runs

Unnamed: 0,dataset,estimator,AUC,TSS
6,fused_smarp,FusionC3D,0.870811,0.655346
7,fused_smarp,FusionC3D,0.893403,0.650000
8,fused_smarp,FusionC3D,0.903260,0.651032
9,fused_smarp,FusionC3D,0.917868,0.695096
10,fused_smarp,FusionC3D,0.856200,0.585851
...,...,...,...,...
81,sharp,CNN,0.855643,0.565943
82,sharp,CNN,0.906057,0.701195
83,sharp,CNN,0.861959,0.589474
84,sharp,CNN,0.631436,0.234286


In [18]:
df = organize(runs, std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & dataset &    fused\_sharp &    fused\_smarp &          sharp &          smarp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & C3D &  0.913+/-0.034 &  0.905+/-0.019 &  0.814+/-0.121 &  0.871+/-0.026 \\
    & CNN &  0.902+/-0.044 &  0.889+/-0.015 &  0.832+/-0.115 &  0.876+/-0.009 \\
    & FusionC3D &  0.903+/-0.036 &  0.888+/-0.025 &  0.853+/-0.042 &  0.851+/-0.057 \\
    & FusionCNN &  0.905+/-0.052 &  0.883+/-0.013 &  0.850+/-0.073 &  0.853+/-0.021 \\
TSS & C3D &  0.697+/-0.042 &  0.670+/-0.026 &  0.521+/-0.160 &  0.597+/-0.056 \\
    & CNN &  0.656+/-0.071 &  0.641+/-0.049 &  0.546+/-0.182 &  0.604+/-0.036 \\
    & FusionC3D &  0.671+/-0.072 &  0.647+/-0.039 &  0.604+/-0.022 &  0.569+/-0.092 \\
    & FusionCNN &  0.685+/-0.102 &  0.632+/-0.015 &  0.560+/-0.155 &  0.564+/-0.043 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.913+/-0.034,0.905+/-0.019,0.814+/-0.121,0.871+/-0.026
AUC,CNN,0.902+/-0.044,0.889+/-0.015,0.832+/-0.115,0.876+/-0.009
AUC,FusionC3D,0.903+/-0.036,0.888+/-0.025,0.853+/-0.042,0.851+/-0.057
AUC,FusionCNN,0.905+/-0.052,0.883+/-0.013,0.850+/-0.073,0.853+/-0.021
TSS,C3D,0.697+/-0.042,0.670+/-0.026,0.521+/-0.160,0.597+/-0.056
TSS,CNN,0.656+/-0.071,0.641+/-0.049,0.546+/-0.182,0.604+/-0.036
TSS,FusionC3D,0.671+/-0.072,0.647+/-0.039,0.604+/-0.022,0.569+/-0.092
TSS,FusionCNN,0.685+/-0.102,0.632+/-0.015,0.560+/-0.155,0.564+/-0.043


In [19]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.913,0.905,0.814,0.871
AUC,CNN,0.902,0.889,0.832,0.876
AUC,FusionC3D,0.903,0.888,0.853,0.851
AUC,FusionCNN,0.905,0.883,0.85,0.853
TSS,C3D,0.697,0.67,0.521,0.597
TSS,CNN,0.656,0.641,0.546,0.604
TSS,FusionC3D,0.671,0.647,0.604,0.569
TSS,FusionCNN,0.685,0.632,0.56,0.565


## leaderboard2: sklearn

In [20]:
columns = get_columns('sklearn')
rows = {
    'estimator': {
        'HistGradientBoostingClassifier': 'HGB',
        'RandomForestClassifier': 'RF',
        'SGDClassifier': 'LG',
    }
}
runs_raw = retrieve('leaderboard2', 'sklearn')
runs = select(runs_raw, columns, rows)
runs

Select the first from 
                          start_time tags.mlflow.runName  \
281 2021-06-04 15:43:34.477000+00:00             sklearn   
465 2021-06-04 00:56:40.893000+00:00             sklearn   

                tags.mlflow.source.git.commit  
281  084b4a84b04953ad4e004885cd4fed94986f5979  
465  9ceec256686c5d7c0580736a3ae6a21f7f45d35c  


Unnamed: 0,dataset,estimator,AUC,TSS
161,fused_sharp,HGB,0.947099,0.779633
162,fused_sharp,RF,0.928796,0.741235
163,fused_sharp,LG,0.958088,0.772955
164,fused_sharp,HGB,0.958913,0.804781
165,fused_sharp,RF,0.949705,0.776892
...,...,...,...,...
276,smarp,RF,0.957159,0.787253
277,smarp,LG,0.976000,0.839318
278,smarp,HGB,0.973456,0.826233
279,smarp,RF,0.961006,0.799327


In [21]:
df = organize(runs, std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & dataset &    fused\_sharp &    fused\_smarp &          sharp &          smarp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & HGB &  0.970+/-0.023 &  0.946+/-0.025 &  0.972+/-0.023 &  0.951+/-0.023 \\
    & LG &  0.972+/-0.023 &  0.943+/-0.023 &  0.973+/-0.023 &  0.950+/-0.024 \\
    & RF &  0.960+/-0.028 &  0.932+/-0.031 &  0.964+/-0.027 &  0.933+/-0.031 \\
TSS & HGB &  0.852+/-0.079 &  0.764+/-0.071 &  0.854+/-0.077 &  0.781+/-0.070 \\
    & LG &  0.848+/-0.073 &  0.764+/-0.061 &  0.856+/-0.074 &  0.782+/-0.073 \\
    & RF &  0.828+/-0.077 &  0.725+/-0.087 &  0.838+/-0.074 &  0.732+/-0.084 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,HGB,0.970+/-0.023,0.946+/-0.025,0.972+/-0.023,0.951+/-0.023
AUC,LG,0.972+/-0.023,0.943+/-0.023,0.973+/-0.023,0.950+/-0.024
AUC,RF,0.960+/-0.028,0.932+/-0.031,0.964+/-0.027,0.933+/-0.031
TSS,HGB,0.852+/-0.079,0.764+/-0.071,0.854+/-0.077,0.781+/-0.070
TSS,LG,0.848+/-0.073,0.764+/-0.061,0.856+/-0.074,0.782+/-0.073
TSS,RF,0.828+/-0.077,0.725+/-0.087,0.838+/-0.074,0.732+/-0.084


In [22]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,HGB,0.97,0.946,0.972,0.951
AUC,LG,0.972,0.943,0.973,0.95
AUC,RF,0.96,0.932,0.964,0.933
TSS,HGB,0.852,0.764,0.854,0.781
TSS,LG,0.848,0.764,0.856,0.781
TSS,RF,0.828,0.725,0.838,0.732


## leaderboad2: arnet

In [3]:
columns = get_columns('arnet')
columns.update({
    'metrics.test/bss': 'BSS',
})
rows = {}
runs_raw = retrieve('leaderboard2', 'arnet')
runs = select(runs_raw, columns, rows)
runs

Select iloc 0 from 
                          start_time tags.mlflow.runName  \
181 2021-06-06 18:45:11.099000+00:00               arnet   
488 2021-06-04 00:56:12.203000+00:00               arnet   

                tags.mlflow.source.git.commit  
181  9ceec256686c5d7c0580736a3ae6a21f7f45d35c  
488  9ceec256686c5d7c0580736a3ae6a21f7f45d35c  


Unnamed: 0,dataset,estimator,AUC,TSS,BSS
21,fused_smarp,C3D,0.894203,0.611321,0.130059
22,fused_smarp,C3D,0.879053,0.654167,0.069913
23,fused_smarp,C3D,0.915493,0.666979,0.194797
24,fused_smarp,C3D,0.919003,0.691898,-0.087610
25,fused_smarp,C3D,0.879287,0.611288,-0.126926
...,...,...,...,...,...
176,sharp,MLP,0.986885,0.912252,0.795789
177,sharp,MLP,0.982534,0.891182,0.748400
178,sharp,MLP,0.986856,0.920923,0.827172
179,sharp,MLP,0.995346,0.938889,0.882912


In [4]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.933,0.925,0.875,0.906
AUC,CNN,0.931,0.93,0.882,0.901
AUC,LSTM,0.971,0.947,0.967,0.943
AUC,MLP,0.971,0.947,0.965,0.944
TSS,C3D,0.741,0.713,0.648,0.678
TSS,CNN,0.722,0.725,0.652,0.663
TSS,LSTM,0.853,0.778,0.834,0.773
TSS,MLP,0.85,0.772,0.836,0.765
BSS,C3D,0.283,0.244,-28.202,-1.731
BSS,CNN,0.189,0.246,-0.142,-0.0


In [4]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.933,0.925,0.875,0.906
AUC,CNN,0.931,0.93,0.882,0.901
AUC,LSTM,0.971,0.947,0.967,0.943
AUC,MLP,0.971,0.947,0.965,0.944
TSS,C3D,0.741,0.713,0.648,0.678
TSS,CNN,0.722,0.725,0.652,0.663
TSS,LSTM,0.853,0.778,0.834,0.773
TSS,MLP,0.85,0.772,0.836,0.765
BSS,C3D,0.283,0.244,-28.202,-1.731
BSS,CNN,0.189,0.246,-0.142,-0.0


In [5]:
df = organize(runs, std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & dataset &    fused\_sharp &    fused\_smarp &             sharp &           smarp \\
{} & estimator &                &                &                   &                 \\
\midrule
AUC & C3D &  0.933+/-0.041 &  0.925+/-0.033 &     0.875+/-0.117 &   0.907+/-0.041 \\
    & CNN &  0.931+/-0.041 &  0.930+/-0.036 &     0.882+/-0.069 &   0.901+/-0.046 \\
    & LSTM &  0.971+/-0.027 &  0.947+/-0.025 &     0.966+/-0.027 &   0.943+/-0.025 \\
    & MLP &  0.970+/-0.026 &  0.947+/-0.026 &     0.964+/-0.031 &   0.944+/-0.027 \\
TSS & C3D &  0.741+/-0.103 &  0.713+/-0.078 &     0.648+/-0.180 &   0.678+/-0.088 \\
    & CNN &  0.722+/-0.089 &  0.725+/-0.093 &     0.652+/-0.144 &   0.663+/-0.094 \\
    & LSTM &  0.853+/-0.082 &  0.778+/-0.074 &     0.834+/-0.089 &   0.773+/-0.073 \\
    & MLP &  0.850+/-0.079 &  0.772+/-0.080 &     0.836+/-0.087 &   0.765+/-0.077 \\
BSS & C3D &  0.283+/-0.335 &  0.244+/-0.242 &  -28.202+/-45.646 &  -1.731+/-5.497 \\
    & CNN 

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AUC,C3D,0.933+/-0.041,0.925+/-0.033,0.875+/-0.117,0.907+/-0.041
AUC,CNN,0.931+/-0.041,0.930+/-0.036,0.882+/-0.069,0.901+/-0.046
AUC,LSTM,0.971+/-0.027,0.947+/-0.025,0.966+/-0.027,0.943+/-0.025
AUC,MLP,0.970+/-0.026,0.947+/-0.026,0.964+/-0.031,0.944+/-0.027
TSS,C3D,0.741+/-0.103,0.713+/-0.078,0.648+/-0.180,0.678+/-0.088
TSS,CNN,0.722+/-0.089,0.725+/-0.093,0.652+/-0.144,0.663+/-0.094
TSS,LSTM,0.853+/-0.082,0.778+/-0.074,0.834+/-0.089,0.773+/-0.073
TSS,MLP,0.850+/-0.079,0.772+/-0.080,0.836+/-0.087,0.765+/-0.077
BSS,C3D,0.283+/-0.335,0.244+/-0.242,-28.202+/-45.646,-1.731+/-5.497
BSS,CNN,0.189+/-0.449,0.246+/-0.249,-0.142+/-0.543,-0.000+/-0.495


In [26]:
dirs = tensorboard(runs_raw)
%tensorboard --logdir_spec {dirs}

Reusing TensorBoard on port 6008 (pid 15489), started 5 days, 1:54:34 ago. (Use '!kill 15489' to kill it.)

## CNN: tune_CNN

In [27]:
columns

{'tags.dataset_name': 'dataset',
 'tags.estimator_name': 'estimator',
 'metrics.test/auc': 'AUC',
 'metrics.test/tss': 'TSS'}

In [6]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'params.DATA.SHRINKAGE': 'shrinkage',
    'params.DATA.THRESH': 'thresh',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
}
rows = {}
runs_raw = retrieve('CNN', 'tune_CNN')
runs = select(runs_raw, columns, rows)
runs

Select iloc 0 from 
                          start_time tags.mlflow.runName  \
133 2021-06-07 08:39:39.611000+00:00            tune_CNN   

                tags.mlflow.source.git.commit  
133  181f529d41a5631e2cd26e6138d9a0a3baab242c  


Unnamed: 0,database,dataset,estimator,shrinkage,thresh,AUC,TSS
73,M_QS_24hr,fused_sharp,CNN,1/2,150.0,0.901782,0.661842
74,M_QS_24hr,fused_sharp,CNN,1/2,150.0,0.868087,0.609524
75,M_QS_24hr,fused_sharp,CNN,1/2,150.0,0.922934,0.70303
76,M_QS_24hr,fused_sharp,CNN,log,150.0,0.92925,0.727632
77,M_QS_24hr,fused_sharp,CNN,log,150.0,0.849244,0.561905
78,M_QS_24hr,fused_sharp,CNN,log,150.0,0.90543,0.666667
79,M_QS_24hr,fused_sharp,CNN,1/2,50.0,0.933029,0.705263
80,M_QS_24hr,fused_sharp,CNN,1/2,50.0,0.862199,0.571429
81,M_QS_24hr,fused_sharp,CNN,1/2,50.0,0.933362,0.715152
82,M_QS_24hr,fused_sharp,CNN,log,50.0,0.916192,0.668421


In [7]:
from uncertainties import ufloat
_runs = runs.copy()
_runs['shrinkage'] = pd.Categorical(_runs['shrinkage'], ['None', '1/2', 'log'])
def _organize(runs, by, std):
    runs = (runs
            #.sort_values(by=by, key=lambda s: s if s.name == 'shrinkage' else s)
            .groupby(by)
            .agg(lambda s: ufloat(s.mean(), s.std()))
            .dropna()
            .unstack([-2, -1])
            .T
    )
    return runs
df = _organize(_runs, by=['database', 'dataset', 'shrinkage', 'thresh'], std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{lllllll}
\toprule
    &     & database & \multicolumn{2}{c}{M\_QS\_24hr} & \multicolumn{2}{c}{M\_Q\_24hr} \\
    &     & dataset &    fused\_sharp &        sharp &      fused\_sharp &          sharp \\
{} & shrinkage & thresh &                &              &                  &                \\
\midrule
AUC & None & None &    0.90+/-0.06 &  0.83+/-0.08 &    0.978+/-0.007 &  0.932+/-0.010 \\
    & 1/2 & 150 &  0.898+/-0.028 &  0.81+/-0.08 &    0.973+/-0.006 &  0.961+/-0.020 \\
    &     & 50 &    0.91+/-0.04 &  0.84+/-0.07 &  0.9767+/-0.0018 &  0.965+/-0.018 \\
    & log & 150 &    0.89+/-0.04 &  0.80+/-0.06 &    0.974+/-0.004 &  0.968+/-0.008 \\
    &     & 50 &    0.89+/-0.04 &  0.86+/-0.08 &  0.9785+/-0.0026 &  0.967+/-0.027 \\
TSS & None & None &    0.67+/-0.11 &  0.53+/-0.15 &      0.86+/-0.04 &    0.75+/-0.04 \\
    & 1/2 & 150 &    0.66+/-0.05 &  0.49+/-0.14 &    0.823+/-0.014 &    0.81+/-0.07 \\
    &     & 50 &    0.66+/-0.08 &  0.54+/-0.13 &    0.830+/-0.014 &

Unnamed: 0_level_0,Unnamed: 1_level_0,database,M_QS_24hr,M_QS_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,Unnamed: 1_level_1,dataset,fused_sharp,sharp,fused_sharp,sharp
Unnamed: 0_level_2,shrinkage,thresh,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
AUC,,,0.90+/-0.06,0.83+/-0.08,0.978+/-0.007,0.932+/-0.010
AUC,1/2,150.0,0.898+/-0.028,0.81+/-0.08,0.973+/-0.006,0.961+/-0.020
AUC,1/2,50.0,0.91+/-0.04,0.84+/-0.07,0.9767+/-0.0018,0.965+/-0.018
AUC,log,150.0,0.89+/-0.04,0.80+/-0.06,0.974+/-0.004,0.968+/-0.008
AUC,log,50.0,0.89+/-0.04,0.86+/-0.08,0.9785+/-0.0026,0.967+/-0.027
TSS,,,0.67+/-0.11,0.53+/-0.15,0.86+/-0.04,0.75+/-0.04
TSS,1/2,150.0,0.66+/-0.05,0.49+/-0.14,0.823+/-0.014,0.81+/-0.07
TSS,1/2,50.0,0.66+/-0.08,0.54+/-0.13,0.830+/-0.014,0.827+/-0.035
TSS,log,150.0,0.65+/-0.08,0.47+/-0.12,0.820+/-0.029,0.819+/-0.015
TSS,log,50.0,0.64+/-0.06,0.59+/-0.12,0.843+/-0.022,0.835+/-0.022


In [8]:
_runs = runs.copy()
_runs['shrinkage_thresh'] = _runs['shrinkage'] + '_' + _runs['thresh']
style(_runs, by=['database', 'dataset', 'shrinkage_thresh'])

Unnamed: 0_level_0,database,M_QS_24hr,M_QS_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp,sharp,fused_sharp,sharp
Unnamed: 0_level_2,shrinkage_thresh,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUC,1/2_150,0.898,0.808,0.973,0.961
AUC,1/2_50,0.909,0.841,0.977,0.965
AUC,None_None,0.903,0.826,0.978,0.932
AUC,log_150,0.895,0.798,0.974,0.968
AUC,log_50,0.895,0.856,0.979,0.967
TSS,1/2_150,0.658,0.494,0.823,0.808
TSS,1/2_50,0.664,0.538,0.83,0.827
TSS,None_None,0.669,0.525,0.859,0.746
TSS,log_150,0.652,0.467,0.82,0.819
TSS,log_50,0.639,0.593,0.843,0.835


In [9]:
mask = ((runs_raw['params.DATA.SEED'] == '0') &
    (runs_raw['tags.database_name'] == 'M_QS_24hr') &
    (runs_raw['tags.dataset_name'] == 'fused_sharp'))
runs_raw.loc[
    mask,
    ['tags.database_name', 'tags.dataset_name', 'params.DATA.SHRINKAGE', 'params.DATA.THRESH']
]

Unnamed: 0,tags.database_name,tags.dataset_name,params.DATA.SHRINKAGE,params.DATA.THRESH
75,M_QS_24hr,fused_sharp,1/2,150.0
78,M_QS_24hr,fused_sharp,log,150.0
81,M_QS_24hr,fused_sharp,1/2,50.0
84,M_QS_24hr,fused_sharp,log,50.0
87,M_QS_24hr,fused_sharp,,


In [10]:
dir_str = tensorboard(runs_raw[mask])
%tensorboard --logdir_spec {dir_str}

Reusing TensorBoard on port 6011 (pid 32318), started 5:21:16 ago. (Use '!kill 32318' to kill it.)

It can be obsersed that not using value transformation seems to converge fast (only slightly).

## CNN: Li2020

In [16]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    #'params.DATA.SHRINKAGE': 'shrinkage',
    #'params.DATA.THRESH': 'thresh',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
}
rows = {}
runs_raw = retrieve('CNN', 'Li2020')
runs = select(runs_raw, columns, rows)
runs

Select iloc 0 from 
                         start_time tags.mlflow.runName  \
72 2021-06-07 17:25:00.343000+00:00              Li2020   

               tags.mlflow.source.git.commit  
72  1099722f18b4edfc0c22ae09798120ec7be2088f  


Unnamed: 0,database,dataset,estimator,AUC,TSS
47,M_QS_24hr,fused_sharp,CNN_Li2020,0.959915,0.776316
49,M_QS_24hr,fused_sharp,CNN_Li2020,0.840317,0.548571
51,M_QS_24hr,fused_sharp,CNN_Li2020,0.921088,0.711111
53,M_QS_24hr,sharp,CNN_Li2020,0.913765,0.725
55,M_QS_24hr,sharp,CNN_Li2020,0.809105,0.495238
57,M_QS_24hr,sharp,CNN_Li2020,0.933654,0.745455
59,M_Q_24hr,fused_sharp,CNN_Li2020,0.981138,0.874794
61,M_Q_24hr,fused_sharp,CNN_Li2020,0.988676,0.9
63,M_Q_24hr,fused_sharp,CNN_Li2020,0.982983,0.9
65,M_Q_24hr,sharp,CNN_Li2020,0.98607,0.894563


In [32]:
style(runs, by=['database', 'dataset', 'estimator'])

Unnamed: 0_level_0,database,M_QS_24hr,M_QS_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp,sharp,fused_sharp,sharp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUC,CNN_Li2020,0.907,0.885,0.984,0.982
TSS,CNN_Li2020,0.679,0.655,0.892,0.878


In [33]:
df = organize(runs, by=['database', 'dataset', 'estimator'], std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & database & \multicolumn{2}{c}{M\_QS\_24hr} & \multicolumn{2}{c}{M\_Q\_24hr} \\
    & dataset &    fused\_sharp &          sharp &    fused\_sharp &          sharp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & CNN\_Li2020 &  0.907+/-0.061 &  0.886+/-0.067 &  0.984+/-0.004 &  0.982+/-0.004 \\
TSS & CNN\_Li2020 &  0.679+/-0.117 &  0.655+/-0.139 &  0.892+/-0.015 &  0.878+/-0.024 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,database,M_QS_24hr,M_QS_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp,sharp,fused_sharp,sharp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUC,CNN_Li2020,0.907+/-0.061,0.886+/-0.067,0.984+/-0.004,0.982+/-0.004
TSS,CNN_Li2020,0.679+/-0.117,0.655+/-0.139,0.892+/-0.015,0.878+/-0.024


In [19]:
runs_raw.loc[
    (runs_raw['tags.database_name'] == 'M_Q_24hr') &
    (runs_raw['params.DATA.SEED'] == '0') &
    (runs_raw['tags.dataset_name'] == 'fused_sharp'),
    ['tags.dataset_name', 'tags.estimator_name', 'tags.checkpoint']
]

Unnamed: 0,tags.dataset_name,tags.estimator_name,tags.checkpoint
63,fused_sharp,CNN_Li2020,/home/zeyusun/work/flare-prediction-smarp/lightning_logs/version_881/checkpoints/epoch=18-step=3191.ckpt


In [28]:
dirs = tensorboard(runs_raw.loc[
    (runs_raw['tags.database_name'] == 'M_Q_24hr') &
    (runs_raw['params.DATA.SEED'] == '2') &
    (runs_raw['tags.dataset_name'] == 'fused_smarp')
])
%tensorboard --logdir_spec {dirs}

## CNN: Li2020_convbn

In [34]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    #'params.DATA.SHRINKAGE': 'shrinkage',
    #'params.DATA.THRESH': 'thresh',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
}
rows = {}
runs_raw = retrieve('CNN', 'Li2020_convbn')
runs = select(runs_raw, columns, rows)
runs

Unnamed: 0,database,dataset,estimator,AUC,TSS
1,M_QS_24hr,fused_sharp,CNN_Li2020,0.943956,0.755263
3,M_QS_24hr,fused_sharp,CNN_Li2020,0.854973,0.6
5,M_QS_24hr,fused_sharp,CNN_Li2020,0.935939,0.749495
7,M_QS_24hr,sharp,CNN_Li2020,0.941208,0.742105
9,M_QS_24hr,sharp,CNN_Li2020,0.779258,0.481905
11,M_QS_24hr,sharp,CNN_Li2020,0.932113,0.761616
13,M_Q_24hr,fused_sharp,CNN_Li2020,0.967816,0.803954
15,M_Q_24hr,fused_sharp,CNN_Li2020,0.958759,0.812963
17,M_Q_24hr,fused_sharp,CNN_Li2020,0.979786,0.84878
19,M_Q_24hr,sharp,CNN_Li2020,0.985668,0.904448


In [35]:
df = organize(runs, by=['database', 'dataset', 'estimator'], std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & database & \multicolumn{2}{c}{M\_QS\_24hr} & \multicolumn{2}{c}{M\_Q\_24hr} \\
    & dataset &    fused\_sharp &          sharp &    fused\_sharp &          sharp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & CNN\_Li2020 &  0.912+/-0.049 &  0.884+/-0.091 &  0.969+/-0.011 &  0.970+/-0.024 \\
TSS & CNN\_Li2020 &  0.702+/-0.088 &  0.662+/-0.156 &  0.822+/-0.024 &  0.850+/-0.082 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,database,M_QS_24hr,M_QS_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp,sharp,fused_sharp,sharp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUC,CNN_Li2020,0.912+/-0.049,0.884+/-0.091,0.969+/-0.011,0.970+/-0.024
TSS,CNN_Li2020,0.702+/-0.088,0.662+/-0.156,0.822+/-0.024,0.850+/-0.082


## CNN: CNN_comparedwithLi

In [3]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    #'params.DATA.SHRINKAGE': 'shrinkage',
    #'params.DATA.THRESH': 'thresh',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
}
rows = {}
runs_raw = retrieve('CNN', 'CNN_comparedwithLi')
runs = select(runs_raw, columns, rows)
runs

Unnamed: 0,database,dataset,estimator,AUC,TSS
0,M_QS_24hr,fused_sharp,CNN,0.921037,0.685526
1,M_QS_24hr,fused_sharp,CNN,0.805317,0.464762
2,M_QS_24hr,fused_sharp,CNN,0.914888,0.672727
3,M_QS_24hr,sharp,CNN,0.850596,0.567105
4,M_QS_24hr,sharp,CNN,0.731597,0.335238
5,M_QS_24hr,sharp,CNN,0.877594,0.658586
6,M_Q_24hr,fused_sharp,CNN,0.972088,0.848435
7,M_Q_24hr,fused_sharp,CNN,0.959374,0.768519
8,M_Q_24hr,fused_sharp,CNN,0.970654,0.821951
9,M_Q_24hr,sharp,CNN,0.938152,0.736409


In [4]:
df = organize(runs, by=['database', 'dataset', 'estimator'], std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & database & \multicolumn{2}{c}{M\_QS\_24hr} & \multicolumn{2}{c}{M\_Q\_24hr} \\
    & dataset &    fused\_sharp &          sharp &    fused\_sharp &          sharp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & CNN &  0.880+/-0.065 &  0.820+/-0.078 &  0.967+/-0.007 &  0.945+/-0.006 \\
TSS & CNN &  0.608+/-0.124 &  0.520+/-0.167 &  0.813+/-0.041 &  0.773+/-0.036 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,database,M_QS_24hr,M_QS_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp,sharp,fused_sharp,sharp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUC,CNN,0.880+/-0.065,0.820+/-0.078,0.967+/-0.007,0.945+/-0.006
TSS,CNN,0.608+/-0.124,0.520+/-0.167,0.813+/-0.041,0.773+/-0.036


## CNN: tune_arch

In [18]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    #'params.DATA.SHRINKAGE': 'shrinkage',
    #'params.DATA.THRESH': 'thresh',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
}
rows = {}
runs_raw = retrieve('CNN', 'tune_arch', p=0)
runs = select(runs_raw, columns, rows)
runs

Select iloc 0 from 
                         start_time tags.mlflow.runName  \
5  2021-06-15 17:49:09.318000+00:00           tune_arch   
26 2021-06-14 01:58:33.648000+00:00           tune_arch   
33 2021-06-14 00:36:03.100000+00:00           tune_arch   

               tags.mlflow.source.git.commit  
5   70646f9b0549efb71ec506e6c42d1a9d7ce32ae9  
26  70646f9b0549efb71ec506e6c42d1a9d7ce32ae9  
33  9baa229455de3fd2f5bf8b9767dc97e655c122c5  


Unnamed: 0,database,dataset,estimator,AUC,TSS
0,M_Q_24hr,fused_sharp,CNN,0.986718,0.89404
1,M_Q_24hr,fused_sharp,CNN,0.951195,0.786116
2,M_Q_24hr,fused_sharp,CNN,0.988331,0.917628
3,M_Q_24hr,fused_sharp,CNN,0.986108,0.87963
4,M_Q_24hr,fused_sharp,CNN,0.990473,0.929268


In [19]:
df = organize(runs, by=['database', 'dataset', 'estimator'], std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{lll}
\toprule
    & database &       M\_Q\_24hr \\
    & dataset &    fused\_sharp \\
{} & estimator &                \\
\midrule
AUC & CNN &  0.981+/-0.017 \\
TSS & CNN &  0.881+/-0.057 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,database,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2
AUC,CNN,0.981+/-0.017
TSS,CNN,0.881+/-0.057


In [22]:
df = organize(runs, by=['database', 'dataset', 'estimator'], std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
    & database & \multicolumn{2}{c}{M\_QS\_24hr} & \multicolumn{2}{c}{M\_Q\_24hr} \\
    & dataset &    fused\_sharp &          sharp &    fused\_sharp &          sharp \\
{} & estimator &                &                &                &                \\
\midrule
AUC & CNN &  0.900+/-0.035 &  0.826+/-0.066 &  0.976+/-0.005 &  0.959+/-0.020 \\
TSS & CNN &  0.656+/-0.068 &  0.523+/-0.120 &  0.835+/-0.026 &  0.807+/-0.049 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,database,M_QS_24hr,M_QS_24hr,M_Q_24hr,M_Q_24hr
Unnamed: 0_level_1,dataset,fused_sharp,sharp,fused_sharp,sharp
Unnamed: 0_level_2,estimator,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AUC,CNN,0.900+/-0.035,0.826+/-0.066,0.976+/-0.005,0.959+/-0.020
TSS,CNN,0.656+/-0.068,0.523+/-0.120,0.835+/-0.026,0.807+/-0.049


In [24]:
dirs = tensorboard(runs_raw)
%tensorboard --logdir_spec {dirs}

## leaderboard3: LSTM_CNN
"Plain language summary" uses this

In [46]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'params.DATA.SEED': 'seed',
    'metrics.test/accuracy': 'ACC',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
    'metrics.test/hss2': 'HSS',
    'metrics.test/bss': 'BSS',
}
rows = {}
runs_raw = retrieve('leaderboard3', 'LSTM_CNN') #, p=0)

# Select only CNN for comparison with 'leaderboard3: CNN_more_epochs'
runs_raw = runs_raw[runs_raw['tags.estimator_name'] == 'CNN']

runs = select(runs_raw, columns, rows)
ckpt = runs_raw['tags.checkpoint'].str.extract(r'epoch=(?P<epoch>[0-9]+)-step=(?P<step>[0-9]+)').astype(int)
runs = pd.concat((runs, ckpt), axis=1)
runs.style \
    .set_caption('The columns `epoch` and `step` are the numbers of training epochs and steps of the selected model. Early stopping is applied in each training process and the model with the highest validation AUC among all epochs is selected. For LSTMs, training lasts for at most 20 epochs (called max_epochs) and early-stopped if the validation AUC is not increasing for 5 epochs (called patience). CNNs has max_epochs = 80 and patience = 5.') \
    .background_gradient(axis=0)#, subset=['BSS'])
#print()

Select iloc 0 from 
                         start_time tags.mlflow.runName  \
82 2021-06-27 16:03:33.239000+00:00            LSTM_CNN   

               tags.mlflow.source.git.commit  
82  e13810ffddafdb818df7c4de68d9eafd67861e8c  


Unnamed: 0,database,dataset,estimator,seed,ACC,AUC,TSS,HSS,BSS,epoch,step
42,M_Q_24hr,fused_smarp,CNN,4,0.902244,0.948103,0.804487,0.804487,-0.104738,0,167
43,M_Q_24hr,fused_smarp,CNN,3,0.93326,0.978369,0.866521,0.866521,0.623621,20,3149
44,M_Q_24hr,fused_smarp,CNN,2,0.918246,0.966363,0.836493,0.836493,0.224443,5,893
45,M_Q_24hr,fused_smarp,CNN,1,0.912926,0.968733,0.825853,0.825853,0.584576,9,1649
46,M_Q_24hr,fused_smarp,CNN,0,0.882848,0.957071,0.765695,0.765695,0.495979,12,2053
52,M_Q_24hr,smarp,CNN,4,0.854167,0.929801,0.708333,0.708333,-0.58258,0,95
53,M_Q_24hr,smarp,CNN,3,0.902079,0.970259,0.804158,0.804158,0.310475,23,2183
54,M_Q_24hr,smarp,CNN,2,0.906398,0.966979,0.812796,0.812796,0.62394,25,2521
55,M_Q_24hr,smarp,CNN,1,0.887792,0.95823,0.775583,0.775584,0.454478,12,1169
56,M_Q_24hr,smarp,CNN,0,0.890695,0.956367,0.78139,0.78139,0.56162,20,1868


In [21]:
df = organize(runs, std=True)
print(df.to_latex(multicolumn_format='c'))
df

\begin{tabular}{llllll}
\toprule
     & dataset &         fused\_sharp &          fused\_smarp &                sharp &               smarp \\
{} & estimator &                     &                      &                      &                     \\
\midrule
ACC & CNN &       0.943+/-0.030 &        0.910+/-0.019 &        0.934+/-0.011 &       0.888+/-0.021 \\
AUC & CNN &       0.983+/-0.013 &        0.964+/-0.012 &        0.981+/-0.004 &       0.956+/-0.016 \\
TSS & CNN &       0.886+/-0.059 &        0.820+/-0.038 &        0.867+/-0.023 &       0.776+/-0.041 \\
HSS & CNN &       0.886+/-0.059 &        0.820+/-0.038 &        0.867+/-0.023 &       0.776+/-0.041 \\
BSS & CNN &       0.343+/-0.388 &        0.365+/-0.305 &        0.619+/-0.125 &       0.274+/-0.493 \\
epoch & CNN &       6.600+/-4.980 &        9.200+/-7.530 &      28.000+/-19.545 &     16.000+/-10.223 \\
step & CNN &  1301.600+/-878.957 &  1582.200+/-1135.646 &  1668.800+/-1263.607 &  1567.200+/-962.466 \\
\bottomrule
\end

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACC,CNN,0.943+/-0.030,0.910+/-0.019,0.934+/-0.011,0.888+/-0.021
AUC,CNN,0.983+/-0.013,0.964+/-0.012,0.981+/-0.004,0.956+/-0.016
TSS,CNN,0.886+/-0.059,0.820+/-0.038,0.867+/-0.023,0.776+/-0.041
HSS,CNN,0.886+/-0.059,0.820+/-0.038,0.867+/-0.023,0.776+/-0.041
BSS,CNN,0.343+/-0.388,0.365+/-0.305,0.619+/-0.125,0.274+/-0.493
epoch,CNN,6.600+/-4.980,9.200+/-7.530,28.000+/-19.545,16.000+/-10.223
step,CNN,1301.600+/-878.957,1582.200+/-1135.646,1668.800+/-1263.607,1567.200+/-962.466


In [22]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACC,CNN,0.943,0.91,0.934,0.888
AUC,CNN,0.983,0.964,0.981,0.956
TSS,CNN,0.886,0.82,0.867,0.776
HSS,CNN,0.886,0.82,0.867,0.776
BSS,CNN,0.343,0.365,0.619,0.274
epoch,CNN,6.6,9.2,28.0,16.0
step,CNN,1301.6,1582.2,1668.8,1567.2


In [23]:
mask = (
    (runs_raw['params.DATA.SEED'] == '0') &
    (runs_raw['tags.dataset_name'].isin(['fused_sharp'])) &
    (runs_raw['tags.estimator_name'].isin(['CNN'])) #, 'LSTM'
    #runs_raw['artifact_uri'].str.contains('ce3844') # one particularly bad learning curve observed in tensorboard
    #[10] # this is the worst in all runs
)
runs_raw.loc[
    mask,
    ['tags.dataset_name', 'tags.estimator_name', 'params.DATA.SEED', 'artifact_uri', 'tags.checkpoint']
]

Unnamed: 0,tags.dataset_name,tags.estimator_name,params.DATA.SEED,artifact_uri,tags.checkpoint
66,fused_sharp,CNN,0,file:///home/zeyusun/work/flare-prediction-smarp/mlruns/15/35bc4235f42d4d1dbf413f9141c43713/artifacts,/home/zeyusun/work/flare-prediction-smarp/lightning_logs/version_960/checkpoints/epoch=4-step=839.ckpt


In [19]:
dirs = tensorboard(runs_raw.loc[mask])
%tensorboard --logdir_spec {dirs}

## leaderboard3: CNN_more_epochs

In [24]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'params.DATA.SEED': 'seed',
    'metrics.test/accuracy': 'ACC',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
    'metrics.test/hss2': 'HSS',
    'metrics.test/bss': 'BSS',
}
rows = {}
runs_raw = retrieve('leaderboard3', 'CNN_more_epochs') #, p=0)
runs = select(runs_raw, columns, rows)
ckpt = runs_raw['tags.checkpoint'].str.extract(r'epoch=(?P<epoch>[0-9]+)-step=(?P<step>[0-9]+)').astype(int)
runs = pd.concat((runs, ckpt), axis=1)
runs.style.background_gradient(axis=0)

Select iloc 0 from 
                         start_time tags.mlflow.runName  \
41 2021-07-04 22:43:57.585000+00:00     CNN_more_epochs   

               tags.mlflow.source.git.commit  
41  ebb736b6b6e1af8ea08fd0f5a179e68f21728e16  


Unnamed: 0,database,dataset,estimator,seed,ACC,AUC,TSS,HSS,BSS,epoch,step
21,M_Q_24hr,fused_smarp,CNN,4,0.898237,0.938048,0.796474,0.796474,0.260386,10,1847
22,M_Q_24hr,fused_smarp,CNN,3,0.910284,0.97002,0.820569,0.820569,0.64526,13,2099
23,M_Q_24hr,fused_smarp,CNN,2,0.927133,0.97376,0.854265,0.854265,0.389934,3,595
24,M_Q_24hr,fused_smarp,CNN,1,0.902154,0.960025,0.804309,0.804309,0.348842,8,1484
25,M_Q_24hr,fused_smarp,CNN,0,0.888453,0.955834,0.776906,0.776906,0.453607,32,5213
26,M_Q_24hr,smarp,CNN,4,0.890224,0.944319,0.780449,0.780449,-1.280633,0,95
27,M_Q_24hr,smarp,CNN,3,0.919584,0.976462,0.839168,0.839168,0.568313,70,6460
28,M_Q_24hr,smarp,CNN,2,0.910545,0.96701,0.82109,0.82109,0.552626,14,1454
29,M_Q_24hr,smarp,CNN,1,0.89991,0.952811,0.79982,0.79982,0.378335,45,4139
30,M_Q_24hr,smarp,CNN,0,0.888453,0.953112,0.776906,0.776906,0.496035,27,2491


* Observations
    * Low performance corresponds to premature stop
    * Effect on CNN after training more epochs (In table: `o` for increased performance with more epochs)
        * fused_smarp: only seed 2 sees an improvement.
        * smarp: seed 0 and 1 see a decrease.
        * fused_sharp: 0, 1, and 2 see a decrease
        * sharp: 0, 1 see an increase
        |             | 0 | 1 | 2 | 3 | 4 |
        |-------------|---|---|---|---|---|
        | fused_smarp | x | x | o | x | x |
        | smarp       | x | x | o | o | o |
        | fused_sharp | x | x | x | o | o |
        | sharp       | o | o | x | x | x |
    * What: On fused_sharp with seed 0, the selected epoch by validation is different (8 vs 4) although both are smaller than 20. Moreover, more training epochs leads to a lower test performance.
        * What: In tensorboard, the curves of the two experiments resemble but do not agree. 
        * Why: Resemble because the some training samples are difficult so both curves peak at them. Do not agree because the weights are initialized randomly.
        * What: With more training epochs, validation auc slightly improves (from 0.9689 to 0.9701), but test auc decreases from 0.9914 to 0.9899.
        * Why: By looking at confusion matrices, the test set performance decreases mainly because of more missed events.
            * Thought from the Reliability Diagram: it would be interesting to plot how predicted probabilities changes. Use a 2D scatter plot.
            * By looking at SSP, I predict on the 2D scatter plot of more_epoch vs fewer_epoch, all points are close to the diagonal, with the negative sample points floating up and positive points sinking down, (so that the projection on the y-axis gives more mixed marginal distributions)
        * Why: Differences probably lie within the uncertainty caused by finite sample of validation and the test set.
    * What: Interestingly, the validation BSS plateau much later than other metrics.
    * What: The validation RD is more confident than test RD, meaning most of the validation samples have a predicted probability close to either 0 or 1.
    * What: from validation SSP, we observe in the first few epochs, the curve tilted towards right. This indicates that the distribution of the predicted probabilities of positive samples has a heavier left tail(, so that lowering threshold increases false alarms while retaining most of the false negatives).
    * Thought: 2D scatter plot of predicted probs of two models can say a lot. We can incorporate marginal distributions, SSP, RD, Precisions, Recalls into this plot. We can also make it evolve with time (epochs the model has seen).

In [25]:
style(runs)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACC,CNN,0.942,0.905,0.932,0.902
AUC,CNN,0.983,0.96,0.979,0.959
TSS,CNN,0.884,0.81,0.864,0.803
HSS,CNN,0.884,0.81,0.864,0.803
BSS,CNN,0.556,0.42,0.551,0.143
epoch,CNN,14.8,13.2,24.2,31.2
step,CNN,2690.6,2247.6,1431.8,2927.8


In [26]:
organize(runs, std=True)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACC,CNN,0.942+/-0.017,0.905+/-0.015,0.932+/-0.013,0.902+/-0.013
AUC,CNN,0.983+/-0.009,0.960+/-0.014,0.979+/-0.007,0.959+/-0.013
TSS,CNN,0.884+/-0.033,0.811+/-0.029,0.864+/-0.026,0.803+/-0.027
HSS,CNN,0.884+/-0.033,0.811+/-0.029,0.864+/-0.026,0.803+/-0.027
BSS,CNN,0.556+/-0.108,0.420+/-0.144,0.551+/-0.176,0.143+/-0.799
epoch,CNN,14.800+/-9.121,13.200+/-11.122,24.200+/-18.322,31.200+/-27.308
step,CNN,2690.600+/-1581.116,2247.600+/-1752.878,1431.800+/-1035.400,2927.800+/-2466.327


In [39]:
mask = (
    (runs_raw['params.DATA.SEED'] == '0') &
    (runs_raw['tags.dataset_name'].isin(['fused_sharp'])) &
    (runs_raw['tags.estimator_name'].isin(['CNN'])) #, 'LSTM'
    #runs_raw['artifact_uri'].str.contains('ce3844') # one particularly bad learning curve observed in tensorboard
    #[10] # this is the worst in all runs
)
runs_raw.loc[
    mask,
    ['tags.dataset_name', 'tags.estimator_name', 'params.DATA.SEED', 'artifact_uri', 'tags.checkpoint']
]

Unnamed: 0,tags.dataset_name,tags.estimator_name,params.DATA.SEED,artifact_uri,tags.checkpoint
35,fused_sharp,CNN,0,file:///home/zeyusun/work/flare-prediction-smarp/mlruns/15/b4d8694f44e3457f8c6ab4f10b5f3f7f/artifacts,/home/zeyusun/work/flare-prediction-smarp/lightning_logs/version_1027/checkpoints/epoch=8-step=1511.ckpt


In [41]:
dirs_new = tensorboard(runs_raw.loc[mask])
dirs_combined = ','.join([dirs, dirs_new])
%tensorboard --logdir_spec {dirs_combined}

## debug_cnn: shuffle

* Experiment: CNN on sharp with seed 4
* Observations:
    * Shuffling leads to a much smoother loss curve.
    * Test set performance: By shuffling, AUC deacreases from 0.981 to 0.977, while all other metrics increases.
        * Why:
            * SSP: Test SSP is much more robust, contributing to the higher discriminative scores. Validation SSP curves are more similar: both tilted leftwards, with shuffling seems to have slightly fewer high-predicted-prob samples.
            * RD: For shuffling, in test RD, most predictions are either close to 0 or 1. The middle part are underforecast. RD is related to SSP, in that RD above diagonal corresponding to SSP tilted leftwards, indicating an underforecasting.
                * Read the sklearn cited ICML paper. Sigmoid shaped RD.

## leaderboard6: shuffle

In [54]:
columns = {
    'tags.database_name': 'database',
    'tags.dataset_name': 'dataset',
    'tags.estimator_name': 'estimator',
    'params.DATA.SEED': 'seed',
    'metrics.test/accuracy': 'ACC',
    'metrics.test/auc': 'AUC',
    'metrics.test/tss': 'TSS',
    'metrics.test/hss2': 'HSS',
    'metrics.test/bss': 'BSS',
}
rows = {}
runs_raw = retrieve('leaderboard6', 'shuffle') #, p=0)

# Select only CNN for comparison with 'leaderboard3: CNN_more_epochs'
runs_raw = runs_raw[runs_raw['tags.estimator_name'] == 'CNN']

runs = select(runs_raw, columns, rows)
ckpt = runs_raw['tags.checkpoint'].str.extract(r'epoch=(?P<epoch>[0-9]+)-step=(?P<step>[0-9]+)').astype(int)
runs = pd.concat((runs, ckpt), axis=1)
runs.style.background_gradient(axis=0)

Select iloc 0 from 
                         start_time tags.mlflow.runName  \
40 2021-08-09 04:19:53.386000+00:00             shuffle   

               tags.mlflow.source.git.commit  
40  546dac8dd474b80b32b13b188cc630a8e4dd1c3c  


Unnamed: 0,database,dataset,estimator,seed,ACC,AUC,TSS,HSS,BSS,epoch,step
0,M_Q_24hr,fused_smarp,CNN,4,0.901442,0.958763,0.802885,0.802885,0.62422,1,335
1,M_Q_24hr,fused_smarp,CNN,3,0.910832,0.976283,0.821663,0.821663,0.69748,1,299
2,M_Q_24hr,fused_smarp,CNN,2,0.846564,0.971853,0.693128,0.693128,0.391496,2,446
3,M_Q_24hr,fused_smarp,CNN,1,0.897217,0.964522,0.794434,0.794434,0.603338,0,164
4,M_Q_24hr,fused_smarp,CNN,0,0.856502,0.950923,0.713004,0.713004,0.445583,10,1737
10,M_Q_24hr,smarp,CNN,4,0.874199,0.935674,0.748397,0.748397,0.472893,1,191
11,M_Q_24hr,smarp,CNN,3,0.848468,0.951783,0.696936,0.696937,0.411081,14,1364
12,M_Q_24hr,smarp,CNN,2,0.780806,0.963304,0.561611,0.561611,0.173891,32,3200
13,M_Q_24hr,smarp,CNN,1,0.912029,0.970522,0.824057,0.824057,0.65833,1,179
14,M_Q_24hr,smarp,CNN,0,0.881166,0.937594,0.762332,0.762332,0.490265,1,177


In [55]:
organize(runs, std=True)

Unnamed: 0_level_0,dataset,fused_sharp,fused_smarp,sharp,smarp
Unnamed: 0_level_1,estimator,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ACC,CNN,0.832+/-0.094,0.883+/-0.029,0.912+/-0.038,0.859+/-0.049
AUC,CNN,0.975+/-0.018,0.964+/-0.010,0.983+/-0.006,0.952+/-0.015
TSS,CNN,0.665+/-0.187,0.765+/-0.058,0.824+/-0.076,0.719+/-0.099
HSS,CNN,0.665+/-0.187,0.765+/-0.058,0.824+/-0.076,0.719+/-0.099
BSS,CNN,0.359+/-0.381,0.552+/-0.129,0.685+/-0.138,0.441+/-0.175
epoch,CNN,7.200+/-12.215,2.800+/-4.087,6.000+/-1.414,9.800+/-13.627
step,CNN,1387.400+/-2063.611,596.200+/-645.627,394.200+/-108.292,1022.200+/-1320.595


In [56]:
mask = (
    (runs_raw['params.DATA.SEED'] == '3') &
    (runs_raw['tags.dataset_name'].isin(['sharp', 'fused_sharp'])) &
    (runs_raw['tags.estimator_name'].isin(['CNN'])) #, 'LSTM'
    #runs_raw['artifact_uri'].str.contains('ce3844') # one particularly bad learning curve observed in tensorboard
    #[10] # this is the worst in all runs
)
runs_raw.loc[
    mask,
    ['tags.dataset_name', 'tags.estimator_name', 'params.DATA.SEED', 'artifact_uri', 'tags.checkpoint']
]

Unnamed: 0,tags.dataset_name,tags.estimator_name,params.DATA.SEED,artifact_uri,tags.checkpoint
21,fused_sharp,CNN,3,file:///home/zeyusun/work/flare-prediction-smarp/mlruns/19/8c917208e6684ab095a5168d817ce8f4/artifacts,/home/zeyusun/work/flare-prediction-smarp/lightning_logs/version_1178/checkpoints/epoch=29-step=5069.ckpt
31,sharp,CNN,3,file:///home/zeyusun/work/flare-prediction-smarp/mlruns/19/a44dbaa0c1ef478ba9d52bf3a750b307/artifacts,/home/zeyusun/work/flare-prediction-smarp/lightning_logs/version_1164/checkpoints/epoch=4-step=274.ckpt


In [58]:
dirs_new = tensorboard(runs_raw.loc[mask])
#dirs_combined = ','.join([dirs, dirs_new])
%tensorboard --logdir_spec {dirs_new}

Reusing TensorBoard on port 6006 (pid 941), started 1 day, 9:54:29 ago. (Use '!kill 941' to kill it.)