# Compare model outputs of AICORE DL Model


### Imports 

In [None]:
import plotly.express as px
import pandas as pd
import os
import glob
from pathlib import Path
import yaml
import datetime

# workaround buggy autocomplete
%config Completer.use_jedi = False

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

### Functions 

In [None]:
def get_props_from_config(config_file):
    config = yaml.load(open(config_file), Loader=yaml.BaseLoader)
    m = config['model']
    try:
        resume = config['resume'][1]
    except:
        resume = ''
    return config, resume

In [None]:
def get_vals_from_dir(subdir):
    basename = subdir.name
    
    df_tr = pd.read_csv(subdir / 'train.csv')
    df_tr['basename'] = basename[:]
    df_tr['type'] = 'train'
    df_tr['region'] = basename.split('__')[2]
    #df_list_tr.append(df_tr)
    
    
    df_v = pd.read_csv(s / 'val.csv')
    df_v['basename'] = basename[:]
    df_v['type'] = 'val'
    df_v['region'] = basename.split('__')[2]
    #df_list_v.append(df_v)
    
    return df_tr, df_v

In [None]:
def load_stats_from_logs(logdirs):
    
    df_list_tr = []

    for s in logdirs:
        try:
            config_file = s / 'config.yml'
            config, resume = get_props_from_config(config_file)
            
            df = pd.Series()
            df['name'] = s.name
            df['log_dir_path'] = s.absolute()
            df['architecture'] = config['model']['architecture']
            df['backbone'] = config['model']['encoder']
            df['encoder_weights'] = config['model']['encoder_weights']
            df['loss_function'] = config['loss_function']
            #df['a_b'] = df['architecture'] + '_' + df['backbone']
            df['resume'] = resume
            df['stack_height'] = int(config['model_args']['stack_height'])
            df['timestamp'] = datetime.datetime.strptime(config['run_info']['timestamp'], '%Y-%m-%d_%H-%M-%S')
            df['learning_rate_scheduler'] = config['learning_rate_scheduler']
            df['learning_rate'] = config['learning_rate']
            df['augment_types'] = config['datasets']['train']['augment_types']
            df['input_channels'] = int(config['model']['input_channels'])

            train_csv =  pd.read_csv(s / 'train.csv')
            tr_max = train_csv.max().add_prefix('tr_score_max_')
            tr_05 = train_csv.sort_values(by='F1', ascending=False).iloc[4].add_prefix('tr_score_5th_')
            tr_min = train_csv.min().add_prefix('tr_score_min_')

            val_csv =  pd.read_csv(s / 'val.csv')
            val_max = val_csv.max().add_prefix('val_score_max_')
            val_05 = val_csv.sort_values(by='F1', ascending=False).iloc[4].add_prefix('val_score_5th_')
            val_min = val_csv.min().add_prefix('val_score_min_')

            df_tr = pd.concat([df, tr_max, tr_05, tr_min, val_max, val_05, val_min])
            df_list_tr.append(df_tr)
        except:
            print(f"Error on dataset: {s.name}")
            continue

    df_out = pd.concat(df_list_tr, axis=1).T.reset_index()
    return df_out

### Settings 

need to open dir in explorer to connect !!!

In [None]:
# Insert your main logdir
LOGDIR = Path(r'/isipd/projects/p_aicore_pf/initze/experiments')
SUB_REGEX = '*/RTS*'

In [None]:
subdirs = list(LOGDIR.glob(SUB_REGEX))

In [None]:
len(subdirs)

### Data Loading 

In [None]:
df_stats = load_stats_from_logs(subdirs)

In [None]:
df_stats.columns

#### Show best 3 runs (by F1 validation score)

In [None]:
df_stats.sort_values(by='val_score_5th_F1', ascending=False).head(3)

### Visualize Evaluation 

#### Overall results 

In [None]:
fig = px.imshow(df_stats,
                #labels=dict(x="Day of Week", y="Time of Day", color="Productivity"),
                x=['val_score_max_F1', 'val_score_max_IoU'],
                y=['name']
               )

#### Individual results

In [None]:
def read_raw_results(log_dir_path):
    csv_train = log_dir_path / 'train.csv'
    csv_val = log_dir_path / 'val.csv'

    train = pd.read_csv(csv_train).add_suffix('_train')
    val = pd.read_csv(csv_val).add_suffix('_val')

    df = pd.concat([train, val], axis=1)
    return df

In [None]:
df = read_raw_results(df_stats.loc[4].log_dir_path)

In [None]:
px.line(data_frame=df, x='Epoch_val', y=['F1_val', 'Precision_val', 'Recall_val'])