# Analysis of the results of experiment 4

In [1]:
import pandas as pd
import numpy as np

In [2]:
def strip(text):
    try:
        return text.strip()
    except AttributeError:
        return text

def strip_time(text):
    return text[:6].strip()

def make_int(text):
    return int(text.strip())

def make_float(text):
    return float(text.strip())

dtypes = {'Dataset': 'category',
          ' Model': 'category',
          ' Seed': np.int32,
          ' Aug': 'category',
          ' BestEpoch': np.int32,
          ' TestAcc': np.float64,
          ' TestLoss': np.float64,
          ' ValAcc': np.float64,
          ' ValLoss': np.float64,
          ' Device': 'category',
          ' Time': np.float64
          }

converters = {'Dataset': strip,
          ' Model': strip,
          ' Seed': make_int,
          ' Aug': strip,
          ' BestEpoch': make_int,
          ' TestAcc': make_float,
          ' TestLoss': make_float,
          ' ValAcc': make_float,
          ' ValLoss': make_float,
          ' Device': strip,
          ' Time': strip_time
          }

In [4]:
results = pd.read_csv('../results/train_log_exp4.csv', header=0, converters=converters)
columns = [x.strip() if x!=' Model' else 'CorruptionRate' for x in list(dtypes.keys())]
results.columns = columns
results['Time'] = results['Time'].astype('float64')

In [5]:
results.head()

Unnamed: 0,Dataset,CorruptionRate,Seed,Aug,BestEpoch,TestAcc,TestLoss,ValAcc,ValLoss,Device,Time
0,IMDB-BINARY,0.1,1314,Vanilla,3,0.75,0.547,0.67,0.582,cuda,20.24
1,IMDB-BINARY,0.1,1314,G-Mixup,97,0.77,0.6438,0.69,0.6946,cuda,18.49
2,IMDB-BINARY,0.1,1314,DropEdge,50,0.695,0.6012,0.68,0.646,cuda,38.5
3,IMDB-BINARY,0.1,11314,Vanilla,6,0.7,0.5459,0.75,0.5567,cuda,16.25
4,IMDB-BINARY,0.1,11314,G-Mixup,14,0.72,0.5076,0.73,0.554,cuda,18.61


In [7]:
grouped = results.drop(['Device'], axis=1).groupby(['Dataset', 'CorruptionRate', 'Aug']).agg([np.mean, np.std, np.count_nonzero]).loc[:,['TestAcc', 'Time']]
grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,TestAcc,TestAcc,TestAcc,Time,Time,Time
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count_nonzero,mean,std,count_nonzero
Dataset,CorruptionRate,Aug,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
IMDB-BINARY,0.1,DropEdge,0.7,0.037786,10,38.121,0.773153,10
IMDB-BINARY,0.1,G-Mixup,0.7105,0.04512,10,18.647,0.261026,10
IMDB-BINARY,0.1,Vanilla,0.7205,0.041663,10,16.798,1.25412,10
IMDB-BINARY,0.2,DropEdge,0.68,0.045277,10,35.925,0.483925,10
IMDB-BINARY,0.2,G-Mixup,0.695,0.046547,10,17.052,0.138146,10
IMDB-BINARY,0.2,Vanilla,0.6765,0.042101,10,14.99,0.109341,10
IMDB-BINARY,0.3,DropEdge,0.6725,0.044237,10,35.777,0.158188,10
IMDB-BINARY,0.3,G-Mixup,0.686,0.043512,10,17.043,0.118795,10
IMDB-BINARY,0.3,Vanilla,0.664,0.03604,10,15.002,0.073907,10
IMDB-BINARY,0.4,DropEdge,0.5975,0.04118,10,35.732,0.270752,10


# Total GPU Time

In [8]:
grouped = results.groupby(['Device']).agg([np.sum]).loc[:,['Time']] / (60*60)
grouped

Unnamed: 0_level_0,Time
Unnamed: 0_level_1,sum
Device,Unnamed: 1_level_2
cuda,4.035917
