# Analysis of the results for experiment 6-2

In [1]:
import pandas as pd
import numpy as np

In [2]:
def strip(text):
    try:
        return text.strip()
    except AttributeError:
        return text

def strip_time(text):
    return text[:6].strip()

def make_int(text):
    return int(text.strip())

def make_float(text):
    return float(text.strip())

dtypes = {'Dataset': 'category',
          'CorruptionRatio': 'category',
          'Seed': np.int32,
          'Aug': 'category',
          'Type': 'category',
          'BestEpoch': np.int32,
          'TestAcc': np.float64,
          'TestLoss': np.float64,
          'ValAcc': np.float64,
          'ValLoss': np.float64,
          'Device': 'category',
          'Time': np.float64
          }

converters = {'Dataset': strip,
          'CorruptionRatio': strip,
          'Seed': make_int,
          'Aug': strip,
          'Type': strip,
          'BestEpoch': make_int,
          'TestAcc': make_float,
          'TestLoss': make_float,
          'ValAcc': make_float,
          'ValLoss': make_float,
          'Device': strip,
          'Time': strip_time
          }

In [3]:
results = pd.read_csv('../results/train_log_exp5.csv', header=0, converters=converters)
columns = [x.strip() if x!=' Model' else 'CorruptionRate' for x in list(dtypes.keys())]
results.columns = columns
results['Time'] = results['Time'].astype('float64')

In [4]:
results.head()

Unnamed: 0,Dataset,CorruptionRatio,Seed,Aug,Type,BestEpoch,TestAcc,TestLoss,ValAcc,ValLoss,Device,Time
0,REDDIT-BINARY,0.1,1314,Vanilla,Add,235,0.87,0.3741,0.87,0.3828,cuda,73.25
1,REDDIT-BINARY,0.1,1314,G-Mixup,Add,284,0.7875,0.4709,0.765,0.455,cuda,96.04
2,REDDIT-BINARY,0.1,1314,DropEdge,Add,290,0.89,0.3171,0.885,0.3229,cuda,119.24
3,REDDIT-BINARY,0.1,11314,Vanilla,Add,261,0.8525,0.3177,0.88,0.352,cuda,71.55
4,REDDIT-BINARY,0.1,11314,G-Mixup,Add,276,0.885,0.2889,0.89,0.3332,cuda,94.88


In [5]:
grouped = results.drop(['Device'], axis=1).groupby(['Type', 'Aug', 'Dataset', 'CorruptionRatio']).agg([np.mean, np.std, np.count_nonzero]).loc[:,['TestAcc', 'Time']]
grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,TestAcc,TestAcc,TestAcc,Time,Time,Time
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,std,count_nonzero,mean,std,count_nonzero
Type,Aug,Dataset,CorruptionRatio,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Add,DropEdge,REDDIT-BINARY,0.1,0.7975,0.069332,10,119.201,0.685086,10
Add,DropEdge,REDDIT-BINARY,0.2,0.81025,0.068297,10,120.805,0.775002,10
Add,DropEdge,REDDIT-BINARY,0.3,0.805,0.049385,10,122.693,0.707045,10
Add,DropEdge,REDDIT-BINARY,0.4,0.7915,0.038028,10,124.571,0.8559,10
Add,G-Mixup,REDDIT-BINARY,0.1,0.85825,0.036402,10,95.983,0.814099,10
Add,G-Mixup,REDDIT-BINARY,0.2,0.8225,0.065944,10,97.512,0.857669,10
Add,G-Mixup,REDDIT-BINARY,0.3,0.82075,0.048334,10,99.233,0.991576,10
Add,G-Mixup,REDDIT-BINARY,0.4,0.81125,0.04142,10,101.211,0.978439,10
Add,Vanilla,REDDIT-BINARY,0.1,0.82375,0.053167,10,72.274,0.565964,10
Add,Vanilla,REDDIT-BINARY,0.2,0.8205,0.050919,10,73.665,0.494396,10


# Total GPU Hours

In [6]:
grouped = results.groupby(['Device']).agg([np.sum]).loc[:,['Time']] / (60*60)
grouped

Unnamed: 0_level_0,Time
Unnamed: 0_level_1,sum
Device,Unnamed: 1_level_2
cuda,6.236192
