# Results analysis

In [None]:
import os
import numpy as np
import pandas as pd
from scipy.stats import t

import matplotlib.pyplot as plt
import seaborn as sns

Load metrics from results and save as single file.

In [None]:
metrics_path = 'res_seed_42.csv'

## Read results downloaded from the Google Colab


In [None]:
# results_folder = os.path.join('..' , '..', 'results')
# df = pd.DataFrame()
# for root, subdirs, files in os.walk(results_folder):
#     for f in files:
#         if f == 'metrics.csv':
#             d = pd.read_csv(os.path.join(root,f))
#             if 'PL' in root:
#                 d['lang'] = 'PL'
#             elif 'ENG' in root:
#                 d['lang'] = 'ENG'
#             df = df.append(d)
# df = df.drop(columns=['Unnamed: 0']).reset_index(drop=True)
# df.to_csv(metrics_path, index=False)

## Load files for analysis

In [None]:
df_42 = pd.read_csv(metrics_path)
df_42.round(3)

In [None]:
def encode_size(r):
    s = r[0]
    lang = r[1]
    if s == 'large' and lang == 'ENG':
        return 15
    elif s == 'large' and lang == 'PL':
        return 7
    elif s == 'medium':
        return 3
    else:
        return 1


In [None]:
df_multiple = pd.read_csv('multiple_run.csv')
df = df_multiple.append(df_42)
df['size'] = df[['dataset_size', 'lang']].apply(encode_size, axis=1)
experiment_ident = ['model','dataset_type','dataset_size', 'lang']

In [None]:
df.groupby(experiment_ident).count()

## Mean values

In [None]:
df_mean = df.groupby(experiment_ident).mean()
df_mean.round(4)

## Standard Error

In [None]:
n = 4
conf = 0.95
t_correct = t.ppf((1+conf)/2, n-1)
t_correct

In [None]:
df_std = df.groupby(experiment_ident).std()
(df_std * t_correct).round(4)

## Plots

In [None]:
sns.set_style('white')
sns.set_context("paper", font_scale = 1.5)

In [None]:
d = df_mean.reset_index()
metrics_df = d[['f1', 'precision', 'recall']].melt(ignore_index=False, var_name='metric')
info_df = d[['model', 'size', 'dataset_size', 'dataset_type', 'lang']]
metrics_info_df = metrics_df.merge(info_df, left_index=True, right_index=True)

In [None]:
def plot_f1_prec_rec_for_model(data, model, model_label, bbox_anchor, xticksrot=0):
    f = (data['model']==model) 
    d = data[f].sort_values(by='size', ascending=False)
    g = sns.FacetGrid(d, col="metric", hue='dataset_type', height=3.5)
    g.map(sns.lineplot, 'size', 'value', legend=None, err_style=None)
    g.map(sns.scatterplot, 'size', 'value')
    g.set(xticks=data['size'].unique()[::-1])
    g.set_xticklabels(['small', 'medium', 'large'], rotation=xticksrot)
    ncol = data['dataset_type'].nunique()
    g.add_legend(title='Dataset type', ncol=ncol,
        loc='center', bbox_to_anchor=bbox_anchor,
        markerscale=2.0)
    plt.savefig(f'img/{model_label}_metrics.svg', bbox_inches = 'tight', pad_inches = 0)
    plt.show()

### Polish dataset

In [None]:
polish_df = metrics_info_df[metrics_info_df['lang'] == 'PL']

In [None]:
plot_f1_prec_rec_for_model(polish_df,'bert-base-multilingual-uncased', 'bert_PL', (0.41, -0.10))

In [None]:
plot_f1_prec_rec_for_model(polish_df, 'xlm-roberta-base', 'XLM-RoBERTa_PL', (0.41, -0.10))

### English

In [None]:
english_df = metrics_info_df[metrics_info_df['lang'] == 'ENG']

In [None]:
plot_f1_prec_rec_for_model(english_df,'bert-base-multilingual-uncased', 'bert_ENG', (0.355, -0.15), xticksrot=45)

In [None]:
plot_f1_prec_rec_for_model(english_df, 'xlm-roberta-base', 'XLM-RoBERTa_ENG', (0.355, -0.15), xticksrot=45)

## Dataset sizes

In [None]:
wdc_sizes = pd.DataFrame([
    {'dataset_type':'cameras',      'dataset_size':'small',     'n': 1886},
    {'dataset_type':'cameras',      'dataset_size':'medium',    'n': 5255},
    {'dataset_type':'cameras',      'dataset_size':'large',     'n': 20036},
    {'dataset_type':'computers',    'dataset_size':'small',     'n': 2834},
    {'dataset_type':'computers',    'dataset_size':'medium',    'n': 8094},
    {'dataset_type':'computers',    'dataset_size':'large',     'n': 33359},
    {'dataset_type':'watches',      'dataset_size':'small',     'n': 2255},
    {'dataset_type':'watches',      'dataset_size':'medium',    'n': 6413},
    {'dataset_type':'watches',      'dataset_size':'large',     'n': 27027},
    {'dataset_type':'shoes',        'dataset_size':'small',     'n': 2063},
    {'dataset_type':'shoes',        'dataset_size':'medium',    'n': 5805},
    {'dataset_type':'shoes',        'dataset_size':'large',     'n': 22989},
])

In [None]:
def plot_f1_size(data, save):
    sns.scatterplot(x='n', y='f1', hue='model', data=data, s=25)
    plt.ylim(0.83,0.95)
    plt.ylabel('F1 score')
    plt.xlabel('Dataset size')
    sns.despine()
    plt.legend(title='Model',frameon=False, loc='center left', bbox_to_anchor=(1.02, 1))
    plt.savefig(f'img/{save}')

In [None]:
d = df[df['lang'] == 'ENG'].merge(wdc_sizes, on=['dataset_type', 'dataset_size'])
plot_f1_size(d, 'f1_size_eng.svg')
plt.show()

In [None]:
POLISH_DATASET_PATH = '../data/PolishDataset'
types = ['all_train', 'chemia_train', 'napoje_train']
sizes = []
for t in types:
    p = os.path.join(POLISH_DATASET_PATH, t)
    for root, subdirs, files in os.walk(p):
        for f in files:
            data = pd.read_json(os.path.join(root, f), compression='gzip', lines=True)
            name = f.replace('.json.gz', '').split('_')
            sizes.append({
                'dataset_type': name[2],
                'dataset_size': name[3],
                'n': len(data)
            })
sizes_pl_df = pd.DataFrame(sizes)
sizes_pl_df

In [None]:
d = df[df['lang'] == 'PL'].merge(sizes_pl_df[~(sizes_pl_df['dataset_type'] == 'all')], on=['dataset_type', 'dataset_size'])
plot_f1_size(d, 'f1_size_pl.svg')
plt.show()


## Fit time

In [None]:
d1 = df.merge(wdc_sizes, on=['dataset_type', 'dataset_size'])
d2 = df.merge(sizes_pl_df, on=['dataset_type', 'dataset_size'])
d = d1.append(d2)
sns.regplot(x='n', y='time', data=d, x_estimator=np.mean, robust=True)
plt.yticks([1800, 3600, 5400, 7200], labels=[30, 60, 90, 120])
plt.ylabel('time [min]')
plt.xlabel('dataset size')
plt.savefig(f'img/fit_time.svg')