In [8]:
from datetime import date
from random import randint
import glob
import os

from bokeh.io import output_file, show
from bokeh.layouts import widgetbox
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import DataTable, DateFormatter, TableColumn

import pandas as pd



grid_search_path = 'results/grid_search_1121_2043'
path = 'results/grid_search_1121_2043/experiment_000738.csv'
cv_path = 'results/grid_search_1123_1102/summary.csv'

cv_df = pd.read_csv(cv_path)
df = pd.read_csv(path)


In [34]:
csv_files = sorted(glob.glob(os.path.join(grid_search_path, 'experiment*.csv')))
csvs = [pd.read_csv(c) for c in csv_files]

# Get csv file names in a dict
csv_names = {}
for i, f in enumerate(csv_files):
    csv_names[f[-10:-4]] = {
        'file_name': f,
        'id': i
    }

csvs[0].columns

Index(['0', 'batch_size', 'lr', 'max_epochs', 'module__activation_fct',
       'module__channel_list', 'module__conv_type', 'module__dropout',
       'module__fc_hidden_layers', 'module__preprocess', 'train_loss_0',
       'train_loss_1', 'train_loss_min', 'train_loss_max', 'train_loss_mean',
       'train_loss_std', 'valid_acc_0', 'valid_acc_1', 'valid_acc_min',
       'valid_acc_max', 'valid_acc_mean', 'valid_acc_std', 'valid_loss_0',
       'valid_loss_1', 'valid_loss_min', 'valid_loss_max', 'valid_loss_mean',
       'valid_loss_std'],
      dtype='object')

In [39]:
dict(csvs[0].iloc[0,1:10])

{'batch_size': 64.0,
 'lr': 0.0001,
 'max_epochs': 2000.0,
 'module__activation_fct': 'prelu',
 'module__channel_list': '[(108, 3), (21, 3), (117, 3), (57, 3), (108, 3)]',
 'module__conv_type': 'temporal',
 'module__dropout': 0.4183401938717719,
 'module__fc_hidden_layers': '[1936, 64]',
 'module__preprocess': nan}

In [56]:
summary = pd.DataFrame(columns=['exp_name','final_acc','final_std','hyper_params'])

for i, c in enumerate(csvs):
    exp_name = [list(csv_names.keys())[i]]
    
    # with patience set to 100 in EarlyStopper callback, valid_acc is spposed not to move in the last 100 epochs
    c_final = c.iloc[-100:,:].mean()
    
    final_acc = [c_final['valid_acc_mean']]
    final_std = [c_final['valid_acc_std']]
    
    hyper_params = [dict(c.iloc[0,1:10])]
    
    summary = pd.concat([summary, pd.DataFrame({
            'exp_name':exp_name,
    'final_acc':final_acc,
    'final_std':final_std,
    'hyper_params':hyper_params,
    })])
    
summary

Unnamed: 0,exp_name,final_acc,final_std,hyper_params
0,12,0.830056,0.006884,"{'batch_size': 64.0, 'lr': 0.0001, 'max_epochs..."
0,738,0.906032,0.007344,"{'batch_size': 64.0, 'lr': 0.0001, 'max_epochs..."
0,2433,0.826256,0.027488,"{'batch_size': 64.0, 'lr': 0.0001, 'max_epochs..."
0,4835,0.91495,0.005919,"{'batch_size': 64.0, 'lr': 0.0001, 'max_epochs..."
0,10102,0.902525,0.017406,"{'batch_size': 64.0, 'lr': 0.0001, 'max_epochs..."
0,12219,0.914179,0.01021,"{'batch_size': 64.0, 'lr': 0.0001, 'max_epochs..."
0,12941,0.861082,0.004732,"{'batch_size': 64.0, 'lr': 0.0001, 'max_epochs..."
0,14843,0.921841,0.009138,"{'batch_size': 64.0, 'lr': 0.0001, 'max_epochs..."
0,15516,0.926853,0.007094,"{'batch_size': 64.0, 'lr': 0.0001, 'max_epochs..."
0,20113,0.867226,0.005103,"{'batch_size': 64.0, 'lr': 0.0001, 'max_epochs..."


In [2]:
output_file("data_table.html")

data = dict(
        dates=[date(2014, 3, i+1) for i in range(10)],
        downloads=[randint(0, 100) for i in range(10)],
    )
source = ColumnDataSource(data)

columns = [
        TableColumn(field="dates", title="Date", formatter=DateFormatter()),
        TableColumn(field="downloads", title="Downloads"),
    ]
data_table = DataTable(source=source, columns=columns, width=400, height=280)

show(widgetbox(data_table))

In [23]:
test_scores = df.loc[-10:,'valid_acc_0']
test_scores

0      0.070896
1      0.070896
2      0.070896
3      0.067164
4      0.067164
5      0.074627
6      0.070896
7      0.070896
8      0.078358
9      0.074627
10     0.070896
11     0.074627
12     0.070896
13     0.070896
14     0.070896
15     0.074627
16     0.070896
17     0.067164
18     0.070896
19     0.074627
20     0.059701
21     0.074627
22     0.074627
23     0.067164
24     0.074627
25     0.074627
26     0.085821
27     0.078358
28     0.070896
29     0.100746
         ...   
310    0.917910
311    0.906716
312    0.910448
313    0.895522
314    0.910448
315    0.895522
316    0.906716
317    0.914179
318    0.910448
319    0.906716
320    0.906716
321    0.906716
322    0.910448
323    0.899254
324    0.906716
325    0.906716
326    0.902985
327    0.899254
328    0.914179
329    0.910448
330    0.910448
331    0.910448
332    0.906716
333    0.899254
334    0.914179
335    0.921642
336    0.921642
337    0.921642
338    0.921642
339    0.921642
Name: valid_acc_0, Lengt

In [21]:
cv = pd.DataFrame({
    'mean_test_score':[0],
    'std_test_score':[0],
    'param_max_epochs':[int(df.loc[0,'max_epochs'])],
    'param_batch_size':[int(df.loc[0,'batch_size'])],
    'param_lr':[df.loc[0,'lr']],
    'param_module__preprocess':[df.loc[0,'module__preprocess']],
    'param_module__conv_type':[df.loc[0,'module__conv_type']],
    'param_module__channel_list':[df.loc[0,'module__channel_list']],
    'param_module__fc_hidden_layers':[df.loc[0,'module__fc_hidden_layers']],
    'param_module__activation_fct':[df.loc[0,'module__activation_fct']],
    'param_module__dropout':[df.loc[0,'module__dropout']],
    'mean_fit_time':[0],
})

cv.to_csv('~/Desktop/test.csv')

In [16]:
df.columns

Index(['0', 'batch_size', 'lr', 'max_epochs', 'module__activation_fct',
       'module__channel_list', 'module__conv_type', 'module__dropout',
       'module__fc_hidden_layers', 'module__preprocess', 'train_loss_0',
       'train_loss_1', 'train_loss_2', 'train_loss_min', 'train_loss_max',
       'train_loss_mean', 'train_loss_std', 'valid_acc_0', 'valid_acc_1',
       'valid_acc_2', 'valid_acc_min', 'valid_acc_max', 'valid_acc_mean',
       'valid_acc_std', 'valid_loss_0', 'valid_loss_1', 'valid_loss_2',
       'valid_loss_min', 'valid_loss_max', 'valid_loss_mean',
       'valid_loss_std'],
      dtype='object')

In [27]:
from bokeh.models import Band, TableColumn, DataTable
import glob
import os

import pandas
from bokeh.io import curdoc, show
from bokeh.layouts import row, column, widgetbox
from bokeh.models import Band
from bokeh.models import ColumnDataSource, Select
from bokeh.plotting import figure

grid_search_path = 'results/grid_search_1121_2043'

# output_file("test.html", title="Test")


# In[13]:


csv_files = sorted(glob.glob(os.path.join(grid_search_path, 'experiment*.csv')))
# Replace Nan values with '' to avoid JSON serialization error
csvs = [pandas.read_csv(c).fillna('None') for c in csv_files]

# Get csv file names in a dict
csv_names = {}
for i, f in enumerate(csv_files):
    csv_names[f[-10:-4]] = {
        'file_name': f,
        'id': i
    }


# In[14]:


def get_dataset(exp_id):
    df = csvs[exp_id]

    df['lower'] = df['valid_acc_mean'] - 2 * df['valid_acc_std']
    df['upper'] = df['valid_acc_mean'] + 2 * df['valid_acc_std']

    return ColumnDataSource(df.reset_index())


# In[15]:


def update_plot(attrname, old, new):
    exp = experiment_select.value

    exp_id = csv_names[exp]['id']
    src = get_dataset(exp_id)

    experiment_source.data.update(src.data)


# In[21]:


def make_plot(source):
    TOOLS = "pan,wheel_zoom,box_zoom,reset,save"

    p = figure(plot_width=1000, plot_height=600, tools=TOOLS)
    p.scatter(x='0', y='valid_acc_mean', fill_alpha=0.3, size=5, source=source)

    band = Band(base='0', lower='lower', upper='upper', source=source, level='underlay',
                fill_alpha=1.0, line_width=1, line_color='black')
    p.add_layout(band)

    p.title.text = 'Grid Search {}'.format(grid_search_path)
    p.xgrid[0].grid_line_color = None
    p.ygrid[0].grid_line_alpha = 0.5
    p.xaxis.axis_label = 'epoch'
    p.yaxis.axis_label = 'valid_acc'

    return p


def get_summary_table():
    summary = pandas.DataFrame(columns=['exp_name', 'final_acc', 'final_std', 'hyper_params'])

    for i, c in enumerate(csvs):
        exp_name = [list(csv_names.keys())[i]]

        # with patience set to 100 in EarlyStopper callback, valid_acc is spposed not to move in the last 100 epochs
        c_final = c.iloc[-100:, :].mean()

        final_acc = [c_final['valid_acc_mean']]
        final_std = [c_final['valid_acc_std']]

        #hyper_params = [dict(c.iloc[0, 1:10])]

        summary = pandas.concat([summary, pandas.DataFrame({
            'exp_name': exp_name,
            'final_acc': final_acc,
            'final_std': final_std,
            #'hyper_params': hyper_params,
        })])
        
    print(summary)

    return ColumnDataSource(summary.reset_index())


# In[22]:


# Summary table
table_source = get_summary_table()
columns = [
    TableColumn(field="exp_name", title="Experiment"),
    TableColumn(field="final_acc", title="Accuracy"),
    TableColumn(field="final_std", title="Std"),
]
data_table = DataTable(source=table_source, columns=columns, width=400, height=600)

# Selected experiment graph
experiment_select = Select(value='0', title='Experiment', options=sorted(csv_names.keys()))

experiment_source = get_dataset(exp_id=0)
plot = make_plot(experiment_source)

experiment_select.on_change('value', update_plot)
controls = column(experiment_select)

# HTML layout
show(widgetbox(data_table))
# curdoc().add_root(row(column(plot, controls), data_table))
# curdoc().title = "Grid Search Results"


  exp_name  final_acc  final_std hyper_params
0   000012   0.830056   0.006884          NaN
0   000738   0.906032   0.007344          NaN
0   002433   0.826256   0.027488          NaN
0   004835   0.914950   0.005919          NaN
0   010102   0.902525   0.017406          NaN
0   012219   0.914179   0.010210          NaN
0   012941   0.861082   0.004732          NaN
0   014843   0.921841   0.009138          NaN
0   015516   0.926853   0.007094          NaN
0   020113   0.867226   0.005103          NaN
0   021835   0.873184   0.029135          NaN
0   024149   0.909677   0.009098          NaN
0   205704   0.915871   0.009180          NaN
0   212142   0.898905   0.009461          NaN
0   213310   0.824104   0.026874          NaN
0   215844   0.796704   0.031566          NaN
0   222557   0.911766   0.006808          NaN
0   223707   0.860100   0.015943          NaN
0   233310   0.922040   0.008397          NaN
0   233830   0.908122   0.015121          NaN


In [25]:
print(pandas.DataFrame({
    'exp_name': ['bla'],
    'final_acc': [0.9],
    'final_std': [0.01],
}))

  exp_name  final_acc  final_std
0      bla        0.9       0.01


In [26]:
get_summary_table()

  exp_name  final_acc  final_std  \
0   000012   0.830056   0.006884   
0   000738   0.906032   0.007344   
0   002433   0.826256   0.027488   
0   004835   0.914950   0.005919   
0   010102   0.902525   0.017406   
0   012219   0.914179   0.010210   
0   012941   0.861082   0.004732   
0   014843   0.921841   0.009138   
0   015516   0.926853   0.007094   
0   020113   0.867226   0.005103   
0   021835   0.873184   0.029135   
0   024149   0.909677   0.009098   
0   205704   0.915871   0.009180   
0   212142   0.898905   0.009461   
0   213310   0.824104   0.026874   
0   215844   0.796704   0.031566   
0   222557   0.911766   0.006808   
0   223707   0.860100   0.015943   
0   233310   0.922040   0.008397   
0   233830   0.908122   0.015121   

                                        hyper_params  
0  {'batch_size': 64.0, 'lr': 0.0001, 'max_epochs...  
0  {'batch_size': 64.0, 'lr': 0.0001, 'max_epochs...  
0  {'batch_size': 64.0, 'lr': 0.0001, 'max_epochs...  
0  {'batch_size': 64.0,