In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import pickle
from ggplot import *
from collections import OrderedDict
pd.options.mode.chained_assignment = None

In [2]:
def get_df(history_file, n_hidden=[16, 32, 64, 128], cols=["n_hidden", "loss", "val_loss", "acc", "val_acc"]):
    dfs = []
    for n in n_hidden:
        with open(history_file.format(n), mode="br") as f:
            history = pickle.load(f)
            dfs.append(pd.DataFrame({
                    "n_hidden": n,
                    "loss": history["loss"],
                    "val_loss": history["val_loss"],
                    "acc": history["acc"],
                    "val_acc": history["val_acc"],
            }))

    history_seq = pd.concat(dfs, keys=[str(n) for n in n_hidden])
    history_seq = history_seq[cols]
    history_seq.n_hidden = history_seq.n_hidden.astype(np.int32).astype(str)
    return history_seq

In [3]:
def get_best(file):
    history = get_df(file)
    best = pd.DataFrame()
    for idx, df_select in history.groupby(level=[0]):
        best_n_hidden_index = df_select["val_acc"].idxmax()
        best_n_hidden = df_select.loc[best_n_hidden_index]
        best_n_hidden["epoch"] = best_n_hidden_index[1]
        best_n_hidden["n_epochs"] = len(df_select)
        best = best.append(best_n_hidden)
    best = best.reset_index()
    best["epoch"] = best["epoch"].astype(np.int8)
    best["n_epochs"] = best["n_epochs"].astype(np.int8)
    del best["index"]
    best = pd.DataFrame(OrderedDict([("n_hidden", best["n_hidden"]),
                                     ("epoch", best["epoch"]),
                                     ("n_epochs", best["n_epochs"]),
                                     ("loss", best["loss"]),
                                     ("val_loss", best["val_loss"]),
                                     ("acc", best["acc"]),
                                     ("val_acc", best["val_acc"])]))
    return best

In [4]:
# seq ... sequential model (GRU-GRU-Dense)
history_seq_file = "results/activity_model-117k-grid-150_smiles/activity-model-117k-70_30_train_test-150_smiles-history-{}_hidden.pickle"
best_seq = get_best(history_seq_file)
best_seq

Unnamed: 0,n_hidden,epoch,n_epochs,loss,val_loss,acc,val_acc
0,16,29,30,0.313078,0.326423,0.857953,0.852975
1,32,28,30,0.269232,0.31627,0.883827,0.863782
2,64,14,15,0.253726,0.318738,0.892474,0.862669
3,128,10,11,0.223962,0.326513,0.907087,0.865635


In [5]:
# shared_a ... shared model (GRU-GRU-GRU)-Merge-GRU-Dense
history_shared_a_file = "results/activity_model-3_shared_a-117k-grid-50_smiles/activity_model-3_shared_a-117k-grid-50_smiles-history-{}_hidden.pickle"
best_shared_a = get_best(history_shared_a_file)
best_shared_a

Unnamed: 0,n_hidden,epoch,n_epochs,loss,val_loss,acc,val_acc
0,16,11,16,0.342068,0.365419,0.841689,0.830204
1,32,10,14,0.329517,0.380016,0.852216,0.831199
2,64,5,10,0.353607,0.376347,0.838079,0.824623
3,128,3,5,0.266837,0.506233,0.894161,0.786154


In [6]:
# shared_b ... shared model (GRU-GRU-GRU)-(GRU-GRU-GRU)-Merge-Dense
history_shared_b_file = "results/activity_model-3_shared_b-117k-grid-50_smiles/activity_model-3_shared_b-117k-grid-50_smiles-history-{}_hidden.pickle"
best_shared_b = get_best(history_shared_b_file)
best_shared_b

Unnamed: 0,n_hidden,epoch,n_epochs,loss,val_loss,acc,val_acc
0,16,22,27,0.341188,0.356912,0.845425,0.837527
1,32,9,14,0.333089,0.37271,0.850959,0.831909
2,64,3,10,0.332656,0.400454,0.85198,0.812285
3,128,1,5,0.343461,0.437523,0.846333,0.79861


In [10]:
# shared ... shared model (GRU-GRU-GRU)-Merge-Dense
# shouldn't be compared with other shared models, because it was trained on SMILES with max. length 150
history_shared_file = "results/activity_model-3_shared-117k-grid-150_smiles/activity_model-3_shared-117k-grid-150_smiles-history-{}_hidden.pickle"
best_shared = get_best(history_shared_file)
best_shared

Unnamed: 0,n_hidden,epoch,n_epochs,loss,val_loss,acc,val_acc
0,16,28,30,0.344035,0.349315,0.840676,0.839373
1,32,26,30,0.318622,0.334057,0.857276,0.851841
2,64,19,21,0.282884,0.324444,0.877039,0.85916
3,128,14,15,0.254224,0.325103,0.8908,0.861288


In [8]:
#sns.set()
#sns.set_style("whitegrid")
#plot = sns.factorplot(x="loss", y="val_loss", data=history_seq, hue="n_hidden", scale=0.7, size=10, join=True)
#plot.set(xticks=np.arange(0, 1.1, 0.1))

In [9]:
"""
fig, ax = plt.subplots(1, 4, figsize=(8, 8))
g = ggplot(history_seq, aes(x="loss", y="val_loss", color="n_hidden")) + \
        geom_line(size=2.0) + \
        xlab("loss") + \
        ylab("val_loss") + \
        ggtitle("loss vs. validation loss | sequential model")
g
"""

'\nfig, ax = plt.subplots(1, 4, figsize=(8, 8))\ng = ggplot(history_seq, aes(x="loss", y="val_loss", color="n_hidden")) +         geom_line(size=2.0) +         xlab("loss") +         ylab("val_loss") +         ggtitle("loss vs. validation loss | sequential model")\ng\n'