In [None]:
%matplotlib notebook

import os
import glob

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf

In [None]:
def extract_tensorboard_data(eventfile_path, keys):
    data = {}
    for event in tf.train.summary_iterator(eventfile_path):
        if not event.HasField("summary"):
            continue
        if event.step not in data:
            data[event.step] = {}
        for v in event.summary.value:
            if v.tag in keys:
                data[event.step][v.tag] = v.simple_value
    df = pd.DataFrame(data, ).T
    df.index.name = "epoch"
    return df

In [None]:
keys = ['loss', 'binary_accuracy']
keys += ['val_' + i for i in tuple(keys)]

experiment_dirs = glob.glob("/data/log/cnn/fd/large-fromCF/tuesday1/*")

data = {}
for experiment_dir in experiment_dirs:
    event_files = []
    for experiment_path in glob.glob(os.path.join(experiment_dir, "*.tfevents.*")):
        event_files.append(extract_tensorboard_data(experiment_path, keys))
    if event_files:
        data[experiment_dir] = pd.concat(event_files).sort_index()
        index = data[experiment_dir].index
        data[experiment_dir]['experiment_dir'] = pd.Series(experiment_dir, index=index)

In [None]:
df = pd.concat(list(data.values()))
df = df.reset_index(level=0, inplace=False)
df#

In [None]:
# for experiment_dir, data in df.groupby('experiment_dir'):
#     print(data['val_loss'].min())
g = df.groupby('experiment_dir', as_index=False)
a = g.agg('last').sort_values(by='val_loss')
# a['overfit'] = a['val_loss'] / a['loss']
# a.sort_values(by='overfit')


def extract_experiment_params(experiment_path):
    experiment_name = experiment_path.split("/")[-1]
    fields = {}
    for field in experiment_name.split(" "):
        key, value_str = field.split(":")
        try:
            fields[key] = float(value_str)
        except ValueError:
            fields[key] = value_str
    return fields
a = pd.concat([a, pd.DataFrame(a['experiment_dir'].apply(extract_experiment_params).as_matrix().tolist(), index=a.index)], axis=1)
a[list(set(a.columns.tolist()) - {'experiment_dir', 'DS', 'AC', 'LF', 'CW'})]

In [None]:
# a = df[df['experiment_dir'].apply(lambda x: x[-3:]) == '432']
# # a['val_loss'].diff()
# a.index

In [None]:
for metric in ['val_loss', 'val_binary_accuracy']:
    scale = None
    if metric == 'val_loss':
        scale = {'loglog': True}
    else:
        scale = {'logx': True}
    plt.figure()
    ax = plt.subplot(1,2,1)
    a[a['KR'] == 0][['AR'] + [metric]].set_index('AR').apply(np.log).plot(style='.', ax=ax, **scale)
    ax=plt.subplot(1,2,2)
    a[a['AR'] == 0][['KR'] + [metric]].set_index('KR').apply(np.log).plot(style='.', ax=ax, **scale)
