# Imports 

In [1]:
import pandas as pd
import numpy as np
import torch
from torch.nn import functional as F
import scipy
import sklearn
from umap import UMAP

import plotly_express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt

from netcal.metrics import ECE, MCE
from netcal.scaling import TemperatureScaling
from netcal.presentation import ReliabilityDiagram
from sklearn.metrics import brier_score_loss
from sklearn.preprocessing import OneHotEncoder

2023-05-14 15:34:54.480447: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-14 15:34:54.598742: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-14 15:34:55.147105: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/ros/noetic/lib:/opt/ros/noetic/lib/x86_64-linux-gnu
2023-05-14 15:34:55.147161: W tensorflow/

# Analysis Functions 

In [2]:
def test_report(l, confidence=0.95):
    for df in l:
        df.columns = list(df.columns[1:]) + ['model_name']
        df['accuracy'] = 100 * df['accuracy']
        a = df['accuracy'].to_numpy()
        n = len(a)
        m, se = np.mean(a), scipy.stats.sem(a)
        h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
        print(m, h)

In [3]:
def conf_int(data, confidence=0.95):
    a = data.to_numpy()
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return m, h

In [4]:
def prob_metrics(df):
    df.columns = ['task', '0', '1', '2', '3', '4', 'actual']
    df[['0', '1', '2', '3', '4']] = F.softmax(torch.tensor(
        df[['0', '1', '2', '3', '4']].to_numpy()), dim=1).numpy()
    
    # Calculate ECE, MCE
    n_bins = 10
    confidences = df[['0', '1', '2', '3', '4']].to_numpy()
    ground_truth = df['actual'].to_numpy()
    ece = ECE(n_bins)
    mce = MCE(n_bins)
    uncalibrated_score_ece = ece.measure(confidences, ground_truth)
    temperature = TemperatureScaling()
    temperature.fit(confidences, ground_truth)
    calibrated = temperature.transform(confidences)
    calibrated_score_ece = ece.measure(calibrated, ground_truth)
    
    uncalibrated_score_mce = mce.measure(confidences, ground_truth)
    calibrated_score_mce = mce.measure(calibrated, ground_truth)

    # Reliability diagram
    diagram = ReliabilityDiagram(n_bins)
    diagram.plot(calibrated, ground_truth)
    
    return calibrated_score_ece, calibrated_score_mce

In [5]:
def report(df, tasks, confidence=0.95):
    df.columns = list(df.columns[1:]) + ['model_name']
    df['accuracy'] = 100 * df['accuracy']
    a = df['accuracy'].to_numpy()
    a = np.split(a, len(a)/tasks, axis=0)
    for accs in a:
        n = len(accs)
        m, se = np.mean(accs), scipy.stats.sem(accs)
        h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
        print(m, h)

In [15]:
def plot_summarize(dfs, columns, meta_batch_size, batch_size):
    dfs[0] = dfs[0].groupby(np.arange(len(dfs[0])) // meta_batch_size).mean()
    
    dfs[1].columns = list(dfs[1].columns[1:]) + ['model_name']
    valid = dfs[1].groupby('model_name', as_index=False)[dfs[1].columns].mean()
    valid['iter'] = valid['model_name'].apply(
        lambda x: int(x[x.index('_') + 1:-3]))
    valid.sort_values(by=['iter'], inplace=True)
    valid.drop(columns=['model_name'], inplace=True)
    print(valid)
    valid['accuracy'] = 100 * valid['accuracy'] 

    # Summarizing best valid scores
    ind = valid[valid['accuracy'] == valid['accuracy'].max()]['iter'].values[0]
    t = dfs[1][dfs[1]['model_name'] == 'model_' + str(ind) + '.pt']
    t['accuracy'] = 100 * t['accuracy']
    print('Validation Metrics Stats of Best Model at {}-th Iteration'.format(ind))
    display(t.describe())

    dfs[0].drop(columns=['task'], inplace=True)
    dfs[0].reset_index(drop=True, inplace=True)
    valid.reset_index(drop=True, inplace=True)
    dfs[0].columns = [str(c)+'_train' for c in columns]
    dfs[0]['accuracy_train'] = 100 * dfs[0]['accuracy_train']
    dfs[0]['iter'] = dfs[0].index
    
    # Plotting
    ctv = [str(c)+'_train' for c in columns]
    fig = px.line(dfs[0], x=dfs[0].index, y=ctv)
    fig.add_scatter(x=valid['iter'], y=valid['accuracy'], name='accuracy_valid')
    fig.show()

    #return df, test

# Analysing runs from CSV's

In [31]:
path_train = "/home/dfki.uni-bremen.de/csingh/DFKI/PhysWM/trident_primitives/output/meta_lrng/files/folder/learning_to_meta-learn/logs/TRIDENT_primitives_5-way_4-shot_1-queries/exp1"
path_test = "/home/dfki.uni-bremen.de/csingh/DFKI/PhysWM/trident_primitives/output/meta_lrng/files/folder/learning_to_meta-learn/logs/TRIDENT_test_primitives_5-way_4-shot_1-queries/exp1"
path = path_train

train_path = path + "/train.csv"
test_path = path + "/test.csv"
valid_path = path + "/valid.csv"

df_train = pd.read_csv(train_path)
df_valid = pd.read_csv(valid_path)
df_valid = df_test
df_test = pd.read_csv(test_path)

df_test = df_test[~(df_test['CE_Loss'] == 'model_last.pt')]
df_valid = df_valid[~(df_valid['CE_Loss'] == 'model_last.pt')]
print(df_valid)

Empty DataFrame
Columns: [task, accuracy, ELBO, Label_KL, Style_KL, Reconst_Loss, CE_Loss]
Index: []


In [32]:
plot_summarize([df_train, df_valid], ['accuracy', 'ELBO', 'Label_KL', 'Semantic_KL', 'Reconst_Loss', 'CE_Loss'], 20, 500)

Empty DataFrame
Columns: [iter]
Index: []


KeyError: 'accuracy'

In [None]:
df_test = pd.read_csv(f'PATH to test.csv')
test_report([df_test]) # Can also run test.py multiple times for multiple test.csv's and add in the argument list  

# Prob Metrics 

In [None]:
df_preds = pd.read_csv('PATH TO preds.csv')
ece, mce = prob_metrics(df_preds)
print(ece,mce)

In [None]:
def brier_multi(targets, probs):
    return np.mean(np.sum((probs - targets)**2, axis=1))
encoder = OneHotEncoder(sparse=False)
onehot = encoder.fit_transform(np.array(df_preds['actual']).reshape(-1, 1))
brier_multi(onehot, df_preds[['0','1','2','3','4']])

# Visualizing the Latents 

In [None]:
latents0 = torch.load('PATH to a random latents_0')
latents = torch.load('PATH to a random latents')

In [None]:
# Making Before and After latent-datasets 
df0 = pd.DataFrame(np.array(latents0['label_latents'][0]))
df0['class'] = pd.Series(list(np.full((10,), 0)) + list(np.full((10,), 1)) + list(
    np.full((10,), 2)) + list(np.full((10,), 3)) + list(np.full((10,), 4)))
features0 = df0.iloc[:, :-1]
df = pd.DataFrame(np.array(latents['label_latents'][0]))
df['class'] = pd.Series(list(np.full((10,), 0)) + list(np.full((10,), 1)) + list(
    np.full((10,), 2)) + list(np.full((10,), 3)) + list(np.full((10,), 4)))
features = df.iloc[:, :-1]

# UMAP projection to 2D space 
umap = UMAP(n_components=2, init='random', random_state=0)
proj_2d0 = umap.fit_transform(features0)
umap = UMAP(n_components=2, init='random', random_state=0)
proj_2d = umap.fit_transform(features)

In [None]:
sklearn.metrics.davies_bouldin_score(proj_2d0, df['class'])

In [None]:
sklearn.metrics.davies_bouldin_score(proj_2d, df['class'])

In [None]:
plt.figure(figsize=(25, 12))
sns.relplot(x=proj_2d[:, 0], y=proj_2d[:, 1], hue=df['class'].astype(
    int), palette="Dark2", style=df['class'].astype(int), s=250, legend=False)
a=sns.kdeplot(x=proj_2d[:, 0], y=proj_2d[:, 1],
            hue=df['class'].astype(int), palette="Pastel2", legend=False)
sns.despine(right=True)

In [None]:
plt.figure(figsize=(10, 6))
sns.relplot(x=proj_2d0[:, 0], y=proj_2d0[:, 1], hue=df['class'].astype(
    int), palette="Dark2", style=df['class'].astype(int), s=250, legend=False)
a=sns.kdeplot(x=proj_2d0[:, 0], y=proj_2d0[:, 1],
            hue=df['class'].astype(int), palette="Pastel2", legend=False)
sns.despine(right=True)

In [None]:
sfig = a.get_figure()
sfig.savefig('label_latents0.jpeg', dpi=1000)

In [None]:
# Making Before and After latent-datasets 
df0 = pd.DataFrame(np.array(latents0['semantic_latents'][0]))
df0['class'] = pd.Series(list(np.full((10,), 0)) + list(np.full((10,), 1)) + list(
    np.full((10,), 2)) + list(np.full((10,), 3)) + list(np.full((10,), 4)))
features0 = df0.iloc[:, :-1]
df = pd.DataFrame(np.array(latents['semantic_latents'][0]))
df['class'] = pd.Series(list(np.full((10,), 0)) + list(np.full((10,), 1)) + list(
    np.full((10,), 2)) + list(np.full((10,), 3)) + list(np.full((10,), 4)))
features = df.iloc[:, :-1]

# UMAP projection to 2D space
umap = UMAP(n_components=2, init='random', random_state=0)
proj_2d0 = umap.fit_transform(features0)
umap = UMAP(n_components=2, init='random', random_state=0)
proj_2d = umap.fit_transform(features)

In [None]:
sklearn.metrics.davies_bouldin_score(proj_2d0, df['class'])

In [None]:
sklearn.metrics.davies_bouldin_score(proj_2d, df['class'])

In [None]:
plt.figure(figsize=(10, 6))
sns.relplot(x=proj_2d[:, 0], y=proj_2d[:, 1], hue=df['class'].astype(
    int), palette="Dark2", style=df['class'].astype(int), s=250, legend=False)
a=sns.kdeplot(x=proj_2d[:, 0], y=proj_2d[:, 1],
            hue=df['class'].astype(int), palette="Pastel2", legend=False)
sns.despine(right=True)

In [None]:
#plt.figure(figsize=(10, 6))
sns.relplot(x=proj_2d0[:, 0], y=proj_2d0[:, 1], hue=df['class'].astype(
    int), palette="Dark2", style=df['class'].astype(int), s=250, legend=False)
a= sns.kdeplot(x=proj_2d0[:, 0], y=proj_2d0[:, 1],
            hue=df['class'].astype(int), palette="Pastel2", legend=False)
sns.despine(right=True)

In [None]:
sfig = a.get_figure()
sfig.savefig('style_latents0.jpeg', dpi=1000)