In [21]:
import os
from datetime import datetime
import h5py
import numpy as np
import pandas as pd
from sklearn import preprocessing
from data_helpers import format_timeseries
from tempConv import tempConvDecoder

%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [44]:
conf = {
    'bs' : 256,
    'eps' : 2,
    'lr' : 0.0005,
    'kernel' : 2,
    'nb_filter' : 5,
    'window' : 50,
    'offset' : -20,
    'regressor' : True,
    'pyramidal' : True,
    'resample_data' : False,
    'sample_size' : 40000,
    'run_id' : datetime.now(),
    'verbose' : False,
    'key' : ['absolute pitch'],
    'dataset' : None
}

In [45]:
## produce dataset_paths, format: [[path/to/X1, path/to/y1], [path/to/X2, path/to/y2]..]
## example of collecting some datasets into a list of tuples
rat_path = 'datasets/GRat31/'
data_keys = [folder for folder in os.listdir('datasets/GRat31/')]
folders = [rat_path+folder for folder in os.listdir('datasets/GRat31/')]

X_fname = 'mua_firing_rates_100hz.hdf5'
y_fname = 'all_head_data_100hz.hdf5'
dataset_paths = []
for folder in folders:
    data_file_list = os.listdir(folder)
    dataset_paths.append([
        folder+'/'+data_file_list[data_file_list.index(X_fname)],
        folder+'/'+data_file_list[data_file_list.index(y_fname)]
    ])

# lets train on the first dataset and test on the second two
test_paths = [dataset_paths.pop(0)]
test_paths, dataset_paths

X_train, y_train = format_timeseries(
    dataset_paths, 
    conf['window'],
    conf['offset'],
    (0,4,1), 
    (0,1)
)

X_test, y_test = format_timeseries(
    test_paths, 
    conf['window'],
    conf['offset'],
    (0,4,1), 
    (0,1)
)

dataset = {
    'train' : (X_train, y_train),
    'test' : (X_test, y_test)
}

conf['dataset'] = dataset

In [46]:
stats = []

In [47]:
TCD = tempConvDecoder(**conf)
TCD.fit()
R2s,rs = TCD.determine_fit()
stats.append([R2s, rs])

Train on 184560 samples, validate on 92280 samples
Epoch 1/2
Epoch 2/2


In [48]:
print(stats)

[[[0.66102943245724788], [0.83742087752279881]]]


In [44]:
TCD.model.evaluate(TCD.X_test,TCD.y_test)



[0.52790511442393295, 0.74992975554931163]

In [12]:
path = 'datasets/GRat31'
X_val, y_val = format_timeseries(
    path, 
    conf['window'],
    conf['offset'],
    (0,12,1), 
    (7,8),
    resample_data=conf['resample_data'],
    sample_size=conf['sample_size'],
    regressor=conf['regressor'],
    all_test=True
)

X_val.shape

['636429913515267697.hdf5', '636428029026710180.hdf5', '636430663717571697.hdf5', '636427282621202061.hdf5', '636431765535543697.hdf5', '636438658377315948.hdf5', '636439164041965948.hdf5', '636428953768193973.hdf5']


(493063, 60, 12)

In [13]:
dataset['train'][0].shape

(394453, 60, 12)