In [None]:
%matplotlib inline
import os
import pandas as pd
import numpy as np
from tools import train_test
from tools import plots

# pandas displaying options
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.height', 1000)
pd.set_option('display.width', 1000)

# read all results into infos dataframe
rdir = './results'
infos = pd.DataFrame()
for root, subdirs, files in os.walk(rdir):
    if root[len(rdir)+1:].count(os.sep) == 2:
        if 'model_info.tsv' in files:
            info = pd.read_csv(root + '/model_info.tsv', sep='\t')
            timestamp = str(root.split(os.sep)[-1])
            
            # add loss stats to info
            if 'learning_curve.tsv' in files:
                learning_curve = pd.read_csv(root + '/learning_curve.tsv', sep='\t')
                if info['log_type'].iloc[0] == 'epoch':
                    info['epoch_loss_min'] = learning_curve['epoch_loss'].min()
                    info['epoch_loss_last'] = learning_curve['epoch_loss'].iloc[-1]
            
            info.index = [timestamp]
            infos = pd.concat((infos, info))
            
# define helper functions
def filter_by_timestamps(df, timestamps):
    timestamps = [unicode(t) for t in timestamps]
    return df.loc[df.index.isin(timestamps)]

def learning_curve_from_df(df, timestamps=[]):
    df = filter_by_timestamps(df, timestamps)
    for idx,row in df.iterrows():
        model_path = './results/' + row['dataset_name'] +'/'+ row['model_name'] +'/'+ idx
        learning_curve = pd.read_csv(model_path + '/learning_curve.tsv', sep='\t')
        fig = plots.plot_learning_curve(learning_curve, row)
        
def export_model_infos(filepath, df, timestamps=[]):
    df = filter_by_timestamps(df, timestamps)
    output.to_csv(filepath, sep='\t')
    
def compare_models(df, timestamps=[]):
    df = filter_by_timestamps(df, timestamps)
    columns = []
    for col in df.columns.tolist():
        if len(df[col].unique()) > 1:
            columns.append(col)
    return df.loc[:, columns]

# Analysis

## Filter data

In [None]:
filt = infos.loc[
    (infos['model_name']   == 'TransE') &
    (infos['dataset_name'] == 'FB13')
].sort_index()

filt

In [None]:
filter_by_timestamps(filt, [1527012161]).transpose()

## Plot learning curve

In [None]:
learning_curve_from_df(filt, [1527013864])

## Compare two or more models

In [None]:
compare_models(filt, [1527012161, 1527013864]).transpose()

# Export a set of models

In [None]:
export_model_infos(
    '~/Downloads/best_models.tsv',
    df,
    timestamps=[1526710056, 1526710447, 1526711822, 1526417226, 1526535074])

# Debug