In [2]:
import tensorflow as tf
import numpy as np
import os, sys
from os import listdir
from os.path import isfile, join
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt

In [3]:
def read_summary_file(summary_file):
    loss=[]
    accuracy=[]
    precision=[]
    recall=[]
    for e in tf.train.summary_iterator(summary_file):
        for v in e.summary.value:
            if v.tag == 'loss_1':
                loss.append(v.simple_value)
            if v.tag == 'accuracy_1':
                accuracy.append(v.simple_value)
            if v.tag == 'precision':
                precision.append(v.simple_value)
            if v.tag == 'recall':
                recall.append(v.simple_value)
    return np.array(loss), np.array(accuracy), np.array(precision), np.array(recall)

In [4]:
loss, accuracy, precision, recall = read_summary_file('num_filter_256_fold_0/1512891150/')

FailedPreconditionError: num_filter_256_fold_0/1512891150

In [113]:
num_filters = [32, 64, 128, 256]
prefix='num_filter_'
results = []

for num_filter in num_filters:
    experiment = prefix+str(num_filter)
    loss=[]
    accuracy=[]
    precision=[]
    recall=[]
    for fold in range(4):
        experiment_dir = experiment + "_fold_%s"%fold
        dev = os.path.join(experiment_dir, [x for x in listdir(experiment_dir) if '.' not in x][0], 'summaries/dev')
        train = os.path.join(experiment_dir, [x for x in listdir(experiment_dir) if '.' not in x][0], 'summaries/train')
        train_summary = os.path.join(train, listdir(train)[0])
        dev_summary = os.path.join(dev, listdir(dev)[0])
        l, acc, p, r = read_summary_file(dev_summary)
        loss.append(l)
        accuracy.append(acc)
        recall.append(r)
        precision.append(p)
    loss = np.stack(loss)
    accuracy = np.stack(accuracy)
    precision = np.stack(precision)
    recall = np.stack(recall)
    average_loss=np.mean(loss, axis=0)
    average_accuracy=np.mean(accuracy, axis=0)
    average_precision=np.mean(precision, axis=0)
    average_recall=np.mean(recall, axis=0)
    average_f1_score=2/((1/average_precision) + (1/average_recall))
    step = np.argmax(average_f1_score)
    cross_val_loss = average_loss[step]
    cross_val_precision = average_precision[step]
    cross_val_accuracy = average_accuracy[step]
    cross_val_recall = average_recall[step]
    cross_val_f1 = average_f1_score[step]
    results.append([experiment, cross_val_accuracy, cross_val_precision, cross_val_recall, cross_val_f1])

In [114]:
df = pd.DataFrame(results, columns=['experiment', 'accuracy', 'precision', 'recall', 'f1'])

In [115]:
df.to_csv('num_filter.csv', index=False)

In [116]:
df.round(3)

Unnamed: 0,experiment,accuracy,precision,recall,f1
0,num_filter_32,0.968,0.125,0.787,0.216
1,num_filter_64,0.961,0.127,0.799,0.218
2,num_filter_128,0.954,0.131,0.786,0.225
3,num_filter_256,0.942,0.12,0.793,0.209


# Batch sizes

In [99]:
num_filters = [32, 64, 128, 256, 512, 1024]
prefix='batch_size_'
results = []

for num_filter in num_filters:
    experiment = prefix+str(num_filter)
    loss=[]
    accuracy=[]
    precision=[]
    recall=[]
    for fold in range(4):
        experiment_dir = experiment + "_fold_%s"%fold
        dev = os.path.join(experiment_dir, [x for x in listdir(experiment_dir) if '.' not in x][0], 'summaries/dev')
        train = os.path.join(experiment_dir, [x for x in listdir(experiment_dir) if '.' not in x][0], 'summaries/train')
        train_summary = os.path.join(train, listdir(train)[0])
        dev_summary = os.path.join(dev, listdir(dev)[0])
        l, acc, p, r = read_summary_file(dev_summary)
        loss.append(l[:20])
        accuracy.append(acc[:20])
        recall.append(r[:20])
        precision.append(p[:20])
    loss = np.stack(loss)
    accuracy = np.stack(accuracy)
    precision = np.stack(precision)
    recall = np.stack(recall)
    average_loss=np.mean(loss, axis=0)
    average_accuracy=np.mean(accuracy, axis=0)
    average_precision=np.mean(precision, axis=0)
    average_recall=np.mean(recall, axis=0)
    average_f1_score=2/((1/average_precision) + (1/average_recall))
    step = np.argmax(average_f1_score)
    cross_val_loss = average_loss[step]
    cross_val_precision = average_precision[step]
    cross_val_accuracy = average_accuracy[step]
    cross_val_recall = average_recall[step]
    cross_val_f1 = average_f1_score[step]
    results.append([experiment, cross_val_accuracy, cross_val_precision, cross_val_recall, cross_val_f1])

In [100]:
experiment_dir

'batch_size_1024_fold_3'

In [101]:
df = pd.DataFrame(results, columns=['experiment', 'accuracy', 'precision', 'recall', 'f1'])

In [105]:
df.round(3).to_csv('batch_size.csv', index=False)

In [106]:
df.round(3)

Unnamed: 0,experiment,accuracy,precision,recall,f1
0,batch_size_32,0.956,0.066,0.761,0.121
1,batch_size_64,0.937,0.088,0.793,0.158
2,batch_size_128,0.958,0.085,0.843,0.154
3,batch_size_256,0.938,0.076,0.864,0.139
4,batch_size_512,0.931,0.064,0.867,0.12
5,batch_size_1024,0.919,0.051,0.847,0.097


# Leaving embedding out

In [8]:
num_filters = ['leave_pos_embedding_out', 'leave_position_embedding_out', 'leave_word_embedding_out']
prefix=''
results = []

for num_filter in num_filters:
    experiment = prefix+str(num_filter)
    loss=[]
    accuracy=[]
    precision=[]
    recall=[]
    for fold in range(4):
        experiment_dir = experiment + "_fold_%s"%fold
        dev = os.path.join(experiment_dir, [x for x in listdir(experiment_dir) if '.' not in x][0], 'summaries/dev')
        train = os.path.join(experiment_dir, [x for x in listdir(experiment_dir) if '.' not in x][0], 'summaries/train')
        train_summary = os.path.join(train, listdir(train)[0])
        dev_summary = os.path.join(dev, listdir(dev)[0])
        l, acc, p, r = read_summary_file(dev_summary)
        loss.append(l)
        accuracy.append(acc)
        recall.append(r)
        precision.append(p)
    loss = np.stack(loss)
    accuracy = np.stack(accuracy)
    precision = np.stack(precision)
    recall = np.stack(recall)
    average_loss=np.mean(loss, axis=0)
    average_accuracy=np.mean(accuracy, axis=0)
    average_precision=np.mean(precision, axis=0)
    average_recall=np.mean(recall, axis=0)
    average_f1_score=2/((1/average_precision) + (1/average_recall))
    step = np.argmax(average_f1_score)
    cross_val_loss = average_loss[step]
    cross_val_precision = average_precision[step]
    cross_val_accuracy = average_accuracy[step]
    cross_val_recall = average_recall[step]
    cross_val_f1 = average_f1_score[step]
    results.append([experiment, cross_val_accuracy, cross_val_precision, cross_val_recall, cross_val_f1])

In [9]:
df = pd.DataFrame(results, columns=['experiment', 'accuracy', 'precision', 'recall', 'f1'])

In [10]:
df.round(3).to_csv('leave_one_embedding_out.csv', index=False)

In [11]:
df.round(3)

Unnamed: 0,experiment,accuracy,precision,recall,f1
0,leave_pos_embedding_out,0.953,0.082,0.893,0.15
1,leave_position_embedding_out,0.952,0.067,0.869,0.124
2,leave_word_embedding_out,0.625,0.032,0.832,0.061


# Embedding size

In [17]:
num_filters = [32, 64, 128, 256]
prefix='embedding_size_'
results = []

for num_filter in num_filters:
    experiment = prefix+str(num_filter)
    loss=[]
    accuracy=[]
    precision=[]
    recall=[]
    for fold in range(4):
        experiment_dir = experiment + "_fold_%s"%fold
        dev = os.path.join(experiment_dir, [x for x in listdir(experiment_dir) if '.' not in x][0], 'summaries/dev')
        train = os.path.join(experiment_dir, [x for x in listdir(experiment_dir) if '.' not in x][0], 'summaries/train')
        train_summary = os.path.join(train, listdir(train)[0])
        dev_summary = os.path.join(dev, listdir(dev)[0])
        l, acc, p, r = read_summary_file(dev_summary)
        loss.append(l[:20])
        accuracy.append(acc[:20])
        recall.append(r[:20])
        precision.append(p[:20])
    loss = np.stack(loss)
    accuracy = np.stack(accuracy)
    precision = np.stack(precision)
    recall = np.stack(recall)
    average_loss=np.mean(loss, axis=0)
    average_accuracy=np.mean(accuracy, axis=0)
    average_precision=np.mean(precision, axis=0)
    average_recall=np.mean(recall, axis=0)
    average_f1_score=2/((1/average_precision) + (1/average_recall))
    step = np.argmax(average_f1_score)
    cross_val_loss = average_loss[step]
    cross_val_precision = average_precision[step]
    cross_val_accuracy = average_accuracy[step]
    cross_val_recall = average_recall[step]
    cross_val_f1 = average_f1_score[step]
    results.append([experiment, cross_val_accuracy, cross_val_precision, cross_val_recall, cross_val_f1])

FileNotFoundError: [Errno 2] No such file or directory: 'embedding_size_64_fold_0'

In [None]:
df = pd.DataFrame(results, columns=['experiment', 'accuracy', 'precision', 'recall', 'f1'])

In [None]:
df.round(3).to_csv('embedding_size.csv', index=False)

In [None]:
print(df.round(3))

# Filter sizes

In [18]:
num_filters = [[3,4], [4,5], [3,5], [3], [4], [5]]
prefix='filter_sizes_'
results = []

for num_filter in num_filters:
    experiment = prefix+'_'.join([str(x) for x in num_filter])
    loss=[]
    accuracy=[]
    precision=[]
    recall=[]
    for fold in range(4):
        experiment_dir = experiment + "_fold_%s"%fold
        dev = os.path.join(experiment_dir, [x for x in listdir(experiment_dir) if '.' not in x][0], 'summaries/dev')
        train = os.path.join(experiment_dir, [x for x in listdir(experiment_dir) if '.' not in x][0], 'summaries/train')
        train_summary = os.path.join(train, listdir(train)[0])
        dev_summary = os.path.join(dev, listdir(dev)[0])
        l, acc, p, r = read_summary_file(dev_summary)
        loss.append(l)
        accuracy.append(acc)
        recall.append(r)
        precision.append(p)
    loss = np.stack(loss)
    accuracy = np.stack(accuracy)
    precision = np.stack(precision)
    recall = np.stack(recall)
    average_loss=np.mean(loss, axis=0)
    average_accuracy=np.mean(accuracy, axis=0)
    average_precision=np.mean(precision, axis=0)
    average_recall=np.mean(recall, axis=0)
    average_f1_score=2/((1/average_precision) + (1/average_recall))
    step = np.argmax(average_f1_score)
    cross_val_loss = average_loss[step]
    cross_val_precision = average_precision[step]
    cross_val_accuracy = average_accuracy[step]
    cross_val_recall = average_recall[step]
    cross_val_f1 = average_f1_score[step]
    results.append([experiment, cross_val_accuracy, cross_val_precision, cross_val_recall, cross_val_f1])

FileNotFoundError: [Errno 2] No such file or directory: 'filter_sizes_3_4_fold_0'