# Compare model outputs of AICORE DL Model


### Imports 

In [None]:
import plotly.express as px
import pandas as pd
import os
import glob
from pathlib import Path
import yaml
import datetime

# workaround buggy autocomplete
%config Completer.use_jedi = False

### Functions 

In [None]:
def get_props_from_config(config_file):
    config = yaml.load(open(config_file), Loader=yaml.BaseLoader)
    m = config['model']
    try:
        resume = config['resume'][1]
    except:
        resume = ''
    return config, resume

In [None]:
def get_vals_from_dir(subdir):
    basename = subdir.name
    
    df_tr = pd.read_csv(subdir / 'train.csv')
    df_tr['basename'] = basename[:]
    df_tr['type'] = 'train'
    df_tr['region'] = basename.split('__')[2]
    #df_list_tr.append(df_tr)
    
    
    df_v = pd.read_csv(s / 'val.csv')
    df_v['basename'] = basename[:]
    df_v['type'] = 'val'
    df_v['region'] = basename.split('__')[2]
    #df_list_v.append(df_v)
    
    return df_tr, df_v

### Settings 

need to open dir in explorer to connect !!!

In [None]:
#LOGDIR = Path('logs') / '0.3.1'
#LOGDIR = Path(r'O:\p_aicore\initze\aicore-uc2_augmentation\logs')
LOGDIR = Path(r'L:\initze\aicore-uc2_augmentation\logs')
#SUB_REGEX = '*'
SUB_REGEX = '*CV6*'

In [None]:
subdirs = list(LOGDIR.glob(SUB_REGEX))

### Data Loading 
- add config reader

In [None]:
df_list_tr = []
df_list_v = []
for s in subdirs:
    try:
        df_tr, df_v = get_vals_from_dir(s)
    except:
        continue
    config_file = s / 'config.yml'
    #try:
    #architecture, backbone, resume, loss_function, stack_height, data_sources = get_props_from_config(config_file)
    config, resume = get_props_from_config(config_file)
    #except:
    for df in [df_tr, df_v]:
        df['architecture'] = config['model']['architecture']
        df['backbone'] = config['model']['encoder']
        df['loss_function'] = config['loss_function']
        df['a-b'] = df['architecture'] + '_' + df['backbone']
        df['resume'] = resume
        df['stack_height'] = config['model_args']['stack_height']
        df['timestamp'] = datetime.datetime.strptime(config['run_info']['timestamp'], '%Y-%m-%d_%H-%M-%S')
        df['data_sources'] = df.apply(lambda x: config['data_sources'], axis=1)
        df['P-R'] = df['Precision'] - df['Recall']
    
    df_list_tr.append(df_tr)
    df_list_v.append(df_v)

df_train = pd.concat(df_list_tr).reset_index()
df_val = pd.concat(df_list_v).reset_index()

In [None]:
len(df_val)

#### Filter by date

In [None]:
date = datetime.datetime(2021,3,22)
df_train = df_train[df_train.timestamp > date]
df_val = df_val[df_val.timestamp > date]

#### Merge resumed trainings

In [None]:
for df in [df_train, df_val]:
    idxs = df['resume'] != ''
    df.loc[idxs, 'basename'] = df['resume']
    df.loc[idxs, 'Epoch'] += 50

### Data Export

In [None]:
pd.to_pickle(df_val, 'df_val_filtered.pkl')

In [None]:
pd.to_pickle(df_train, 'df_train_filtered.pkl')

### Data Analysis 

#### Best runs result per run
1. Maximum
2. 95 %
3. 50 %

#### Show best individual run per config sorted by IoU:

In [None]:
df_val.sort_values(by='IoU', ascending=False).drop_duplicates(subset='basename').set_index('basename')

In [None]:
#df_val.groupby(by='basename').quantile(1)

In [None]:
#df_val.groupby(by='basename').quantile(1).sort_values(by='IoU', ascending=False).head(10)

In [None]:
#df_val.groupby(by='basename').quantile(.95).sort_values(by='IoU', ascending=False).head(5)

#### Show median individual run per config sorted by IoU:

In [None]:
df_val.groupby(by='basename').median().sort_values(by='IoU', ascending=False)

In [None]:
df_val[df_val.basename.str.contains(site)].groupby(by='basename').max().sort_values(by='IoU', ascending=False).set_index('a-b')

#### SIte specific performance 

In [None]:
df_val[df_val.basename.str.contains(site)].sort_values(by='IoU', ascending=False).drop_duplicates(subset=['basename'])#.set_index('a-b')

In [None]:
site = 'V__Horton'
df_val[df_val.basename.str.contains(site)].sort_values(by='IoU', ascending=False).drop_duplicates().head(5)

In [None]:
site = 'V__Lena'
df_val[df_val.basename.str.contains(site)].sort_values(by='IoU', ascending=False).drop_duplicates().head(5)

### Plots 

In [None]:
# put data into plottable shape
id_vars = ['Epoch', 'basename', 'type', 'architecture', 'a-b', 'backbone']

df_train_melt = df_train.melt(id_vars=id_vars, value_vars=['IoU']).sort_values(by='Epoch')
df_val_melt = df_val.melt(id_vars=id_vars, value_vars=['IoU']).sort_values(by='Epoch')
# merge
df_melt = pd.concat([df_val_melt])
#df_melt = pd.concat([df_train_melt, df_val_melt])

### Training data 

In [None]:
#px.line(df_melt, x='Epoch' , y='value' , color='basename', line_group='type')
df_train_melt = df_train.melt(id_vars=id_vars, value_vars=['IoU']).sort_values(by='Epoch')
p = px.line(df_train_melt, x='Epoch' , y='value' , color='basename', line_group='architecture')
p.update_layout(legend_orientation='h', )

### Validation data 

#### Performance per Epoch IoU

In [None]:
df_val_melt = df_val.melt(id_vars=id_vars, value_vars=['IoU']).sort_values(by='Epoch')
p = px.line(df_val_melt.sort_values(by=['Epoch', 'architecture', 'backbone']), x='Epoch' , y='value' , color='basename', line_dash='architecture', line_group='backbone')
p.update_layout(legend_orientation='h', )

In [None]:
df_val_melt_PR = df_val.melt(id_vars=id_vars, value_vars=['P-R']).sort_values(by='Epoch')
p = px.line(df_val_melt_PR.sort_values(by=['Epoch', 'architecture', 'backbone']), x='Epoch' , y='value' , color='basename', line_dash='architecture', line_group='backbone')
p.update_layout(legend_orientation='h', )

#### Plot mean values of configurations 

In [None]:
group_vals = ['Epoch', 'architecture']
df_train_melt['ds'] = df_train_melt.apply(lambda x: x.architecture, axis=1)
mean_vals = df_train_melt.groupby(group_vals).mean().reset_index(drop=False)
px.line(mean_vals, x='Epoch', y='value', color=group_vals[1])

### Backbone 

#### Mean performance 

In [None]:
mean_vals

In [None]:
group_vals = ['Epoch', 'backbone']
df_val_melt['ds'] = df_val_melt.apply(lambda x: x.architecture, axis=1)
mean_vals = df_val_melt.groupby(group_vals).mean().reset_index(drop=False)
px.line(mean_vals, x='Epoch', y='value', color=group_vals[1])

In [None]:
group_vals = ['Epoch', 'architecture']
df_val_melt['ds'] = df_val_melt.apply(lambda x: x.architecture, axis=1)
mean_vals = df_val_melt.groupby(group_vals).mean().reset_index(drop=False)
px.line(mean_vals, x='Epoch', y='value', color=group_vals[1])

### Indivual configs per site 

#### Regional comparison UNet 

In [None]:
backbone = 'resnet50'
architecture = 'UNet'
stack_height = 6
loss = 'FocalLoss'
data_sources = '[planet, ndvi, tcvis, relative_elevation, slope]'

In [None]:
query = f'backbone == "{backbone}" & architecture == "{architecture}" & stack_height == "{stack_height}" & loss_function == "{loss}"'
plot_data = df_val.query(query)
px.line(plot_data, x='Epoch', y='IoU', color='basename')

#### Regional comparison UnetPlusPlus

In [None]:
backbone = 'resnet50'
architecture = 'UnetPlusPlus'
stack_height = 6
loss = 'FocalLoss'
data_sources = '[planet, ndvi, tcvis, relative_elevation, slope]'

In [None]:
query = f'backbone == "{backbone}" & architecture == "{architecture}" & stack_height == "{stack_height}" & loss_function == "{loss}"'
plot_data = df_val.query(query)
px.line(plot_data, x='Epoch', y='IoU', color='basename')