# Compare model outputs of AICORE DL Model


### Imports 

In [None]:
import plotly.express as px
import pandas as pd
import os
import glob
from pathlib import Path
import yaml
import datetime
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl

# workaround buggy autocomplete
%config Completer.use_jedi = False

In [None]:
def calc_rank(df, feature='basename'):
    basenames = pd.unique(df[feature])
    for basename in basenames:
        idx = (df[df.basename == basename]).sort_values(by='IoU', ascending=False).index
        df.loc[idx, 'rank'] = np.arange(1, 101, dtype=np.int)
    return df

In [None]:
def print_overview(df):
    sites = pd.unique(df.region)
    for site in sites:
        data = df[df.basename.str.contains(site)]
        print('\n', site)
        print(data.groupby(by='basename').max().sort_values(by='a_b', ascending=False).set_index('a_b')['Epoch'])

### Import Data

In [None]:
df_val = pd.read_pickle('df_val_filtered.pkl')

In [None]:
df_train = pd.read_pickle('df_train_filtered.pkl')

### Check data completeness 

In [None]:
print_overview(df_val)

### Data + Feature Engineering 

In [None]:
df_val = calc_rank(df_val)
df_train = calc_rank(df_train)

### Data Analysis 

#### Best runs result per run
1. Maximum
2. 95 %
3. 50 %

#### Show best individual run per config sorted by IoU:

#### Plot Best (1, 5, 10, 20, 50) IoU for each configuraion 

In [None]:
query = 'rank in (1, 5, 10, 20, 50)'
cm = sns.light_palette("green", as_cmap=True)
df_vals_by_basename = df_val.pivot_table(values='IoU', index='rank', columns='basename').query(query).T
df_vals_by_basename.style.background_gradient(cmap=cm)

#### Show results of best config per region 

In [None]:
sites = pd.unique(df_val['region'])

In [None]:
site = 'Horton'
query1 = 'rank in (1, 5, 10, 20, 50)'
stack = []
for site in sites:
    query2 = f'region == "{site}"'
    filtered = df_val.query(query2).query(query).pivot(index='basename', columns='rank', values='IoU')
    out = filtered.sort_values(by=1.0, axis=0, ascending=False).iloc[0]
    stack.append(out)
region_best = pd.concat(stack, axis=1).T

cm = sns.light_palette("green", as_cmap=True)
region_best.style.background_gradient(cmap=cm)

#### show stats by architecture/backbone 

In [None]:
query = 'rank in (1, 5, 10, 20, 50, 75, 100)'
cm = sns.light_palette("green", as_cmap=True)
df_vals_by_basename = df_val.query(query).pivot_table(values='IoU', index='rank', columns='a_b', aggfunc='mean').T
df_vals_by_basename.style.background_gradient(cmap=cm)

## Plots 

In [None]:
# put data into plottable shape
id_vars = ['Epoch', 'basename', 'type', 'architecture', 'a_b', 'backbone']

df_train_melt = df_train.melt(id_vars=id_vars, value_vars=['IoU']).sort_values(by='Epoch')
df_val_melt = df_val.melt(id_vars=id_vars, value_vars=['IoU']).sort_values(by='Epoch')
# merge
df_melt = pd.concat([df_val_melt])
#df_melt = pd.concat([df_train_melt, df_val_melt])

### Training data 

### Validation data 

#### Performance per Epoch IoU

In [None]:
df_val.head(3)

#### Plot  difference in region, architecture, backbone

In [None]:
df_val.columns

In [None]:
filt = df_val

In [None]:
# 'backbone' 'architecture' 'region'
group = 'backbone'
fig, axes = plt.subplots(1,2, figsize=(12,3))
ax0 = axes[0]
p0 = sns.lineplot(data=filt, x='Epoch', y='IoU', hue=group, ax=ax0)
p0.legend(loc='upper left', bbox_to_anchor=(0, -0.2), ncol=3)

ax1 = axes[1]
p1 = sns.lineplot(data=filt, x='rank', y='IoU', hue=group, ax=ax1)
p1.legend(loc='upper left', bbox_to_anchor=(0, -0.2), ncol=3)

for ax in axes:
    ax.grid()
    ax.set_xlim(1,100)
    ax.set_ylim(0,0.6)
    
fig.savefig(f'figures/{group}_epoch_rank', dpi=300, bbox_inches=mpl.transforms.Bbox([[0,-1],[12,3]]))

In [None]:
df_val_melt = df_val.melt(id_vars=id_vars, value_vars=['IoU']).sort_values(by='Epoch')
p = px.line(df_val_melt.sort_values(by=['Epoch', 'architecture', 'backbone']), x='Epoch' , y='value' , color='basename', line_dash='architecture', line_group='backbone')
p.update_layout(legend_orientation='h', )

In [None]:
df_val_melt_PR = df_val.melt(id_vars=id_vars, value_vars=['P-R']).sort_values(by='Epoch')
p = px.line(df_val_melt_PR.sort_values(by=['Epoch', 'architecture', 'backbone']), x='Epoch' , y='value' , color='basename', line_dash='architecture', line_group='backbone')
p.update_layout(legend_orientation='h', )

#### Plot mean values of configurations 

In [None]:
group_vals = ['Epoch', 'architecture']
df_train_melt['ds'] = df_train_melt.apply(lambda x: x.architecture, axis=1)
mean_vals = df_train_melt.groupby(group_vals).mean().reset_index(drop=False)
px.line(mean_vals, x='Epoch', y='value', color=group_vals[1])

### Backbone 

#### Mean performance 

In [None]:
mean_vals

In [None]:
group_vals = ['Epoch', 'backbone']
df_val_melt['ds'] = df_val_melt.apply(lambda x: x.architecture, axis=1)
mean_vals = df_val_melt.groupby(group_vals).mean().reset_index(drop=False)
px.line(mean_vals, x='Epoch', y='value', color=group_vals[1])

In [None]:
group_vals = ['Epoch', 'architecture']
df_val_melt['ds'] = df_val_melt.apply(lambda x: x.architecture, axis=1)
mean_vals = df_val_melt.groupby(group_vals).mean().reset_index(drop=False)
px.line(mean_vals, x='Epoch', y='value', color=group_vals[1])

### Indivual configs per site 

#### Regional comparison UNet 

In [None]:
backbone = 'resnet50'
architecture = 'UNet'
stack_height = 6
loss = 'FocalLoss'
data_sources = '[planet, ndvi, tcvis, relative_elevation, slope]'

In [None]:
query = f'backbone == "{backbone}" & architecture == "{architecture}" & stack_height == "{stack_height}" & loss_function == "{loss}"'
plot_data = df_val.query(query)
px.line(plot_data, x='Epoch', y='IoU', color='basename')

#### Regional comparison DeepLabV3

In [None]:
backbone = 'resnet50'
architecture = 'DeepLabV3'
stack_height = 6
loss = 'FocalLoss'
data_sources = '[planet, ndvi, tcvis, relative_elevation, slope]'

In [None]:
query = f'backbone == "{backbone}" & architecture == "{architecture}" & stack_height == "{stack_height}" & loss_function == "{loss}"'
plot_data = df_val.query(query)
px.line(plot_data, x='Epoch', y='IoU', color='basename')

#### Regional comparison UnetPlusPlus

In [None]:
backbone = 'resnet50'
architecture = 'UnetPlusPlus'
stack_height = 6
loss = 'FocalLoss'
data_sources = '[planet, ndvi, tcvis, relative_elevation, slope]'

In [None]:
query = f'backbone == "{backbone}" & architecture == "{architecture}" & stack_height == "{stack_height}" & loss_function == "{loss}"'
plot_data = df_val.query(query)
px.line(plot_data, x='Epoch', y='IoU', color='basename')

#### Sorted IoU by config

In [None]:

pd.unique(plot_data.basename)

In [None]:
#for region in regions:
query = f'region == "Horton"'
plot_data = df_val.query(query)
basenames = pd.unique(plot_data.basename)
for basename in basenames:
    idx = (plot_data[plot_data.basename == basename]).sort_values(by='IoU', ascending=False).index
    plot_data.loc[idx, 'rank'] = np.arange(100, dtype=np.int)
metric = 'IoU'
px.line(plot_data.sort_values(by=metric), x='rank', y=metric, color='basename')

In [None]:
regions

In [None]:
query = f'region == "Lena"'
plot_data = df_val.query(query)
for basename in basenames:
    idx = (plot_data[plot_data.basename == basename]).sort_values(by='IoU', ascending=False).index
    plot_data.loc[idx, 'rank'] = np.arange(100, dtype=np.int)
metric = 'IoU'
px.line(plot_data.sort_values(by=metric), x='rank', y=metric, color='basename')

In [None]:
site = 'V__Horton'
#df_val[df_val.basename.str.contains(site)].sort_values(by='IoU', ascending=False).drop_duplicates().head(5)

In [None]:
plot_data = df_val[df_val.basename.str.contains(site)]
px.line(plot_data, x='Epoch', y='IoU', color='basename')
#data.groupby(by='basename').max().sort_values(by='IoU', ascending=False).set_index('a-b')