In [1]:
%matplotlib inline 

In [11]:
import os
import json
import re 

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from pandas.io.json import json_normalize

plt.rcParams['figure.figsize'] = [16, 9]

In [12]:
def CustomParser(data):
    return json.loads(data)


FOLDER = '../results/avulsos/'
regex = re.compile(r'.csv')
folders = set(os.listdir(FOLDER))
folders = filter(lambda i: regex.search(i), folders)
folders = [i for i in folders if regex.search(i)]

df = pd.DataFrame()
# Read csv file
for filename in folders:
    ddf = pd.read_csv(FOLDER + filename, sep=';', converters={'db_parameters': CustomParser, 'model_cfg': CustomParser})
    ddf[sorted(ddf['db_parameters'][0].keys())] = ddf['db_parameters'].apply(pd.Series)
    ddf[sorted(ddf['model_cfg'][0].keys())] = ddf['model_cfg'].apply(pd.Series)
    
    df = pd.concat([df, ddf])

FileNotFoundError: [Errno 2] No such file or directory: '../results/avulsos/'

In [None]:
df.shape

In [None]:
FOLDER = '../results/'
folders = set(os.listdir(FOLDER))

regex = re.compile(r'.csv')
# use only one of the following lines, whichever you prefer
folders = filter(lambda i: not regex.search(i), folders)
folders = [i for i in folders if not regex.search(i)]

folders.remove('_sources')
folders.remove('avulsos')
folders

In [None]:
result = {
    'acc': [],
    'f1': [],
    'db': [],
    'task': [],
    'model': [],
    'db_parameters': []
}

reports = pd.DataFrame()

report_id = 0
for root_folder in folders:
    for fold in os.listdir(FOLDER + root_folder):
        for run_id in os.listdir(FOLDER + root_folder + '/'+fold):
            current_folder = FOLDER + root_folder + '/' + fold + '/' + run_id

            metrics_file = current_folder + '/metrics-test.csv'
            config_file = current_folder + '/config.json'
            report_file = current_folder + '/report-train.csv'

            if not os.path.exists(metrics_file) or not os.path.exists(config_file):
                continue
                
            config_df = pd.read_json(config_file)
#             if config_df['dataset_cfg']['reader'] != "pypagai.preprocessing.dataset_babi.BaBIDataset":
#                 continue

            db = 'babi'
            task = config_df['dataset_cfg']['task']
            model = config_df['model_default_cfg']['model'].split('.')[-1]            
            
            if os.path.exists(report_file):
                report_df = pd.read_csv(report_file)
                report_df = report_df.reset_index()
                report_df['dataset'] = db
                report_df['task'] = task
                report_df['model'] = model
                report_df['id'] = report_id
                reports = pd.concat([reports,report_df])
                report_id += 1

            metrics_df = pd.read_csv(metrics_file)
            acc = metrics_df['accuracy'][0]
            f1 = metrics_df['f1_micro'][0]

            result['f1'].append(f1)
            result['db'].append(db)
            result['acc'].append(acc)
            result['task'].append(task)
            result['model'].append(model)
            result['db_parameters'].append(config_file)

new_results = pd.DataFrame(result)
df = pd.concat([df, new_results])

In [None]:
# Show best result of each model for each babi task
# model_list = ['ConvInputsRN','ConvQueryRN','ConvRN','ConvStoryRN','EmbedLSTM','EncoderModel','RNNModel','RNNoLSTM','SimpleLSTM']

ddf = df[df['db'] == 'babi']
# ddf = ddf[~(ddf['model'].isin(model_list))]
ddf = ddf.groupby(['db', 'model', 'task'])
ddf = ddf.max()
ddf = ddf.reset_index()
pivot = ddf[['model', 'acc', 'task']].pivot('task', 'model')
pivot = pivot['acc']

plt.figure(figsize = (16,9))
sns.heatmap(pivot, cmap="Blues", linewidths=1, annot=True, cbar=False, vmin=0.949, vmax=0.95)
pivot

In [None]:
# Show best result of each model for each babi task
model_list = ['ConvInputsRN','ConvQueryRN','ConvRN','ConvStoryRN','ConvLSTM','RNNoLSTM']

ddf = df[df['db'] == 'babi']
ddf = ddf[~(ddf['model'].isin(model_list))]
ddf = ddf.groupby(['db', 'model', 'task'])
ddf = ddf.max()
ddf = ddf.reset_index()
pivot = ddf[['model', 'acc', 'task']].pivot('task', 'model')
pivot = pivot['acc']

plt.figure(figsize = (16,9))
sns.heatmap(pivot, cmap="Blues", linewidths=1, annot=True, cbar=False, vmin=0.949, vmax=0.95)

# Epoch evolution

In [None]:
ids = list(reports[reports['index'] > 500]['id'].unique())

In [None]:
import seaborn as sns
sns.set(style="darkgrid")

selected_models = ['RNNModel', 'N2NMemory', 'RN']
reports['selected'] = False
reports.loc[reports['model'].isin(selected_models), 'selected'] = True

f, (ax1, ax2) = plt.subplots(1,2)

# Plot the responses for different events and regions
sns.lineplot(x="index", y="acc", data=reports[reports['id'].isin(ids)], ax=ax1)

sns.lineplot(x="index", y="acc", data=reports[reports['id'].isin(ids)], hue='selected', ax=ax2)

In [None]:
sns.lineplot(x="index", y="acc", hue='model', data=reports[(reports['id'].isin(ids)) & (~reports['model'].isin(model_list))])

In [None]:
sns.lineplot(x="index", y="acc", hue='task', data=reports[(reports['id'].isin(ids)) & (~reports['model'].isin(model_list))])

In [None]:
reports['task'] = reports['task'].astype(int)
reports['task'].unique()