In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import pickle
from ggplot import *
from collections import OrderedDict
pd.options.mode.chained_assignment = None

In [None]:
def get_df(history_file, n_hidden=[16, 32, 64, 128], cols=["n_hidden", "loss", "val_loss", "acc", "val_acc"]):
    dfs = []
    for n in n_hidden:
        with open(history_file.format(n), mode="br") as f:
            history = pickle.load(f)
            dfs.append(pd.DataFrame({
                    "n_hidden": n,
                    "loss": history["loss"],
                    "val_loss": history["val_loss"],
                    "acc": history["acc"],
                    "val_acc": history["val_acc"],
            }))

    history_seq = pd.concat(dfs, keys=[str(n) for n in n_hidden])
    history_seq = history_seq[cols]
    history_seq.n_hidden = history_seq.n_hidden.astype(np.int32).astype(str)
    return history_seq

In [None]:
def get_best(file):
    history = get_df(file)
    best = pd.DataFrame()
    for idx, df_select in history.groupby(level=[0]):
        best_n_hidden_index = df_select["val_acc"].idxmax()
        best_n_hidden = df_select.loc[best_n_hidden_index]
        best_n_hidden["epoch"] = best_n_hidden_index[1]
        best_n_hidden["n_epochs"] = len(df_select)
        best = best.append(best_n_hidden)
    best = best.reset_index()
    best["epoch"] = best["epoch"].astype(np.int8)
    best["n_epochs"] = best["n_epochs"].astype(np.int8)
    del best["index"]
    best = pd.DataFrame(OrderedDict([("n_hidden", best["n_hidden"]),
                                     ("epoch", best["epoch"]),
                                     ("n_epochs", best["n_epochs"]),
                                     ("loss", best["loss"]),
                                     ("val_loss", best["val_loss"]),
                                     ("acc", best["acc"]),
                                     ("val_acc", best["val_acc"])]))
    return best

In [None]:
# seq ... sequential model (GRU-GRU-Dense)
history_seq_file = "results/activity_model-117k-grid-150_smiles/activity-model-117k-70_30_train_test-150_smiles-history-{}_hidden.pickle"
best_seq = get_best(history_seq_file)
best_seq

In [None]:
# shared_a ... shared model (GRU-GRU-GRU)-Merge-GRU-Dense
history_shared_a_file = "results/activity_model-3_shared_a-117k-grid-50_smiles/activity_model-3_shared_a-117k-grid-50_smiles-history-{}_hidden.pickle"
best_shared_a = get_best(history_shared_a_file)
best_shared_a

In [None]:
# shared_b ... shared model (GRU-GRU-GRU)-(GRU-GRU-GRU)-Merge-Dense
history_shared_b_file = "results/activity_model-3_shared_b-117k-grid-50_smiles/activity_model-3_shared_b-117k-grid-50_smiles-history-{}_hidden.pickle"
best_shared_b = get_best(history_shared_b_file)
best_shared_b

In [None]:
# shared ... shared model (GRU-GRU-GRU)-Merge-Dense
# should'n be compared with other shared models, because it was trained on SMILES with max. length 150
history_shared_file = "results/activity_model-3_shared-117k-grid-150_smiles/activity_model-3_shared-117k-grid-150_smiles-history-{}_hidden.pickle"
best_shared = get_best(history_shared_file)
best_shared

In [None]:
#sns.set()
#sns.set_style("whitegrid")
#plot = sns.factorplot(x="loss", y="val_loss", data=history_seq, hue="n_hidden", scale=0.7, size=10, join=True)
#plot.set(xticks=np.arange(0, 1.1, 0.1))

In [None]:
"""
fig, ax = plt.subplots(1, 4, figsize=(8, 8))
g = ggplot(history_seq, aes(x="loss", y="val_loss", color="n_hidden")) + \
        geom_line(size=2.0) + \
        xlab("loss") + \
        ylab("val_loss") + \
        ggtitle("loss vs. validation loss | sequential model")
g
"""