# Results May 2019

In [None]:
import os
from pathlib import Path
import time
from datetime import datetime
import itertools

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns
from sklearn.model_selection import KFold, RepeatedStratifiedKFold
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix, roc_curve, roc_auc_score

In [None]:
sns.set(style="whitegrid")
colors = sns.color_palette()
colorsp1 = colors
colorsp1.pop(0)
colorsp1.append(colors[0])

def format_plot(ax, plt):
    ax.set_ylabel('validation accuracy')
    plt.ylim(bottom=0.35, top=1.05)
    ax.yaxis.set_major_formatter(FuncFormatter('{0:.0%}'.format))
    ax.set_xlabel('Dataset')
    plt.tight_layout()
    
def print_stats(data, name):
    print(name,  ':')
    print('Quantiles:\n', data['val_acc'].quantile([0.25, 0.5, 0.75]) )
    print('Mean:', data['val_acc'].mean())
    
def print_means(data, names):
    print('Mean averages:')
    for d, n, in zip(data, names):
        print(d['val_acc'].mean(), n)

# Confirm performance of untuned DNNs : GunPoint

In [None]:
print('Single train and test. Reporting validation accuracy and error.', '\n')

print('1-NN (1-nearest neighbor)')
print('0.9133333333333333')
print(1-0.9133333333333333, '\n')

file = '../../logs/2019-05-09T06:59/GunPoint/mlpwang_summary.csv'
data1 = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
print(file)
print(data1['val_acc'].mean())
print(1-data1['val_acc'].mean(), '\n')

file = '../../logs/2019-03-31T18:07/GunPoint/devnet_summary.csv'
data2 = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
print(file, '(FCN Wang)')
print(data2['val_acc'].mean())
print(1-data2['val_acc'].mean(), '\n')

file = '../../logs/2019-03-29T15:29/GunPoint/devnet_summary.csv'
data2 = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
print(file, '(ResNet Wang)')
print(data2['val_acc'].mean())
print(1-data2['val_acc'].mean(), '\n')


In [None]:
# 10-fold cross validation
print('1-NN (1-nearest neighbor)')
print(0.9450000000000001)
print(0.043779751788545644, '\n')

file = '../../logs/2019-03-17T16:35/GunPoint/mlpwang_summary.csv'
data1 = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
name1 = 'GunPoint'
print(file, '(MLP Wang)')
print(data1['val_acc'].mean())
print(data1['val_acc'].std())
print('Number of samples', data1['val_acc'].count(), '\n')

file = '../../logs/2019-05-09T09:25/GunPoint/devnet_summary.csv'
data2 = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
print(file, '(FCN Wang)')
print(data2['val_acc'].mean())
print(data2['val_acc'].std())
print('Number of samples', data2['val_acc'].count(), '\n')

file = '../../logs/2019-03-18T17:32/GunPoint/resnet_summary.csv'
data2 = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
print(file, '(ResNet Wang)')
print(data2['val_acc'].mean())
print(data2['val_acc'].std())
print('Number of samples', data2['val_acc'].count(), '\n')

# Untuned models : all dogs data (balanced dataset)

In [None]:
all_data = list()
all_names = list()
results = []

file = '../../logs/2019-05-09T13:04/private_balanced/nearestneighbours_summary.csv'
data = pd.read_csv(file, header=None, names=['val_acc'])
print(file)
all_data.append(data['val_acc'])
all_names.append('1-NN')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-03-21T14:23/private_balanced/mlpwang_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('MLP')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

# TODO FCN on private_balanced - in progress
file = '../../logs/2019-05-09T12:30/private_balanced/devnet_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('FCN')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-03-20T19:47/private_balanced/resnet_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('ResNet')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

print(results)

ax = sns.boxplot(data=all_data)
ax = sns.swarmplot(data=all_data, color='black')
plt.suptitle('all_dogs')
plt.xticks([0, 1, 2, 3], [all_names[0], all_names[1], all_names[2], all_names[3]])
format_plot(ax, plt)
ax.set_xlabel('Model')
plt.savefig('boxplot_alldogs.png', bbox_inches='tight')


# MLP : all dogs - dog0 - dog0 correct

In [None]:
all_data = list()
all_names = list()
results = []

file = '../../logs/2019-03-21T14:23/private_balanced/mlpwang_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('all_dogs')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-03-17T14:56/private_dog0/mlpwang_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('dog0')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-03-17T18:16/private_dog0_correct/mlpwang_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('dog0_correct')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

print(results)

ax = sns.boxplot(data=all_data)
ax = sns.swarmplot(data=all_data, color='black')
plt.suptitle('MLP (untuned)')
plt.xticks([0, 1, 2], [all_names[0], all_names[1], all_names[2]])
format_plot(ax, plt)
plt.savefig('boxplot_MLPWang.png', bbox_inches='tight')

# MLP : dog1 and dog2 all vs correct

In [None]:
all_data = list()
all_names = list()
results = []

file = '../../logs/2019-03-17T14:56/private_dog0/mlpwang_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('dog0')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-03-17T18:16/private_dog0_correct/mlpwang_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('dog0_correct')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-03-23T08:51/private_dog1/mlpwang_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('dog1')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-03-24T09:56/private_dog1_correct/mlpwang_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('dog1_correct')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-03-23T22:02/private_dog2/mlpwang_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('dog2')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-03-23T19:47/private_dog2_correct/mlpwang_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('dog2_correct')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

print(results)

clrs = [colorsp1[0], colorsp1[1], colorsp1[0], colorsp1[1], colorsp1[0], colorsp1[1]]
ax = sns.boxplot(data=all_data, palette=clrs)
ax = sns.swarmplot(data=all_data, color='black')
plt.suptitle('MLP (untuned)')
plt.xticks([0, 1, 2, 3, 4, 5], all_names)
format_plot(ax, plt)
plt.ylim(bottom=0.20, top=1.05)
plt.savefig('boxplot_MLPWang_dog12Correct.png', bbox_inches='tight')

# All models : dog0 correct

In [None]:
all_data = list()
all_names = list()
results = []

file = '../../logs/2019-05-09T14:07/private_dog0_correct_plus/nearestneighbours_summary.csv'
data = pd.read_csv(file, header=None, names=['val_acc'])
all_data.append(data['val_acc'])
all_names.append('1-NN')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-03-17T18:16/private_dog0_correct/mlpwang_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('MLP')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-05-09T15:15/private_dog0_correct_plus/devnet_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('FCN')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

file = '../../logs/2019-03-16T19:35/private_dog0_correct/resnet_summary.csv'
data = pd.read_csv(file, header=None, names=['run','loss','val_acc','epoch','time'])
all_data.append(data['val_acc'])
all_names.append('ResNet')
results.append([all_names[-1], all_data[-1].mean(), all_data[-1].std()])

print(results)

ax = sns.boxplot(data=all_data)
ax = sns.swarmplot(data=all_data, color='black')
plt.suptitle('dog0_correct')
plt.xticks([0, 1, 2, 3], [all_names[0], all_names[1], all_names[2], all_names[3]])
format_plot(ax, plt)
ax.set_xlabel('Model')
plt.savefig('boxplot_allModels_dog0Correct.png', bbox_inches='tight')