In [1]:
import l2f_tda as tda
import numpy as np
import pandas as pd
import datetime as dt
import gudhi as gd
import sklearn as sk
import dask_ml as daml
import pickle as pkl
from sklearn_tda.hera_wasserstein import wasserstein
from l2f_tda.DiagramDistance import kernel_l2_distance

import matplotlib.pyplot as plt
%matplotlib inline

import sklearn.utils as skutils
from sklearn.model_selection import TimeSeriesSplit, KFold
import sklearn.preprocessing as skprep
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

from keras.models import Sequential
import keras.layers as klayers
import keras.optimizers as koptimizers
import keras.callbacks as kcallbacks
from keras.wrappers.scikit_learn import KerasRegressor

Using TensorFlow backend.


In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1" 

In [3]:
import keras
import tensorflow as tf

config = tf.ConfigProto( device_count = {'CPU': 48} ) 
config = tf.ConfigProto( device_count = {'GPU': 1} ) 

sess = tf.Session(config=config) 
keras.backend.set_session(sess)

In [4]:
data = pd.read_csv('AAPL.csv', names=['time', 'price'], parse_dates=['time'])

data.time = pd.to_datetime(data.time, utc=True)
data.time = data.time.dt.tz_convert(tz='America/New_York')
data.set_index('time', drop=True, inplace=True)
data.columns = range(1)
print(data.shape)
samplingTimeList = [ dt.time(i, 30, 0) for i in range(9, 20, 4) ]

(1586477, 1)


In [5]:
numberTrain = data.shape[0] * 3 // 4
X_train = data[:numberTrain]
y_train = np.empty((X_train.shape[0], 1))
X_train.head(10)

Unnamed: 0_level_0,0
time,Unnamed: 1_level_1
2014-12-31 19:00:00-05:00,110.75
2014-12-31 19:01:00-05:00,110.7
2014-12-31 19:02:00-05:00,110.7
2014-12-31 19:03:00-05:00,110.7
2014-12-31 19:04:00-05:00,110.7
2014-12-31 19:05:00-05:00,110.7
2014-12-31 19:06:00-05:00,110.7
2014-12-31 19:07:00-05:00,110.65
2014-12-31 19:08:00-05:00,110.65
2014-12-31 19:09:00-05:00,110.67


In [6]:
numberTest = data.shape[0] // 4
X_test = data[numberTrain:numberTrain+numberTest]
y_test = np.empty((X_test.shape[0], 1))
X_test.head()

Unnamed: 0_level_0,0
time,Unnamed: 1_level_1
2017-12-11 01:54:00-05:00,169.2
2017-12-11 01:55:00-05:00,169.2
2017-12-11 01:56:00-05:00,169.2
2017-12-11 01:57:00-05:00,169.2
2017-12-11 01:58:00-05:00,169.2


In [7]:
steps = [
    ('sampling', tda.Sampler(transformationType='return', removeWeekends=True, samplingType = 'fixed', samplingTimeList=samplingTimeList)),
    ('embedding', tda.TakensEmbedder()),
    ('labelling', tda.Labeller(labellingType='variation', function = np.std)),
    ('diagram', tda.VietorisRipsDiagram()),
    ('distance', tda.DiagramDistance()),
    ('physical', tda.MDS()),
    ('derivatives', tda.Derivatives()),
    ('scaling', tda.ScalerWrapper(copy=True)),
    ('formulation', tda.FormulationTransformer()),
    ('regression', tda.KerasRegressorWrapper())
]

pipeline_transform = Pipeline(steps[:-1])
pipeline_estimate = Pipeline([steps[-1]])

pipeline = Pipeline(steps)

In [8]:
pipeline_transform.get_params()
pipeline_estimate.get_params()
pipeline.get_params()

{'memory': None,
 'steps': [('sampling',
   Sampler(removeWeekends=True, samplingPeriod='2h', samplingType='fixed')),
  ('embedding', TakensEmbedder(innerWindowDuration=5, innerWindowStride=1,
           outerWindowDuration=20, outerWindowStride=2)),
  ('labelling', Labeller(deltaT=0, function=<function std at 0x7f86f43eab70>,
        labellingType='variation')),
  ('diagram', VietorisRipsDiagram(dataType='points', homologyDimensions=[0, 1],
             maxEdgeLength=inf)),
  ('distance',
   DiagramDistance(metric=<built-in function bottleneck_distance>, n_jobs=1)),
  ('physical',
   MDS(dissimilarity='precomputed', eps=0.001, max_iter=300, metric=True,
     n_components=2, n_init=4, n_jobs=None, random_state=None, verbose=0)),
  ('derivatives', Derivatives(orders=[0, 1, 2])),
  ('scaling', ScalerWrapper(copy=True, feature_range=(0, 1))),
  ('formulation',
   FormulationTransformer(numberStepsInPast=10, stepInFuture=1)),
  ('regression',
   <l2f_tda.KerasWrapper.KerasRegressorWrapper 

In [9]:
# Sampling
sampling_param = {}
sampling_param_grid = {'sampling__' + k: v for k, v in sampling_param.items()}

# Embedding
embedding_param = {}
embedding_param['outerWindowDuration'] = [ 100, 200 ]
embedding_param['outerWindowStride'] = [ 10, 20 ]
embedding_param['innerWindowDuration'] = [ 10, 40 ]
embedding_param['innerWindowStride'] = [ 1 ]
embedding_param_grid = {'embedding__' + k: v for k, v in embedding_param.items()}

# Labelling
labelling_param = {}
labelling_param['deltaT'] = [ 10 ]
labelling_param_grid = {'labelling__' + k: v for k, v in labelling_param.items()}

# Diagram
diagram_param = {}
diagram_param['homologyDimensions'] = [ [ 0, 1 ] ]
diagram_param_grid = {'diagram__' + k: v for k, v in diagram_param.items()}

# Distance
distance_param = {}
distance_param['metric'] = [ kernel_l2_distance ]
distance_param_grid = {'distance__' + k: v for k, v in distance_param.items()}

# Physical
physical_param = {}
physical_param['n_components'] = [ 10 ]
physical_param_grid = {'physical__' + k: v for k, v in physical_param.items()}

# Derivatives
derivatives_param = {}
derivatives_param['orders'] = [ [0, 1, 2] ]
derivatives_param_grid = {'derivatives__' + k: v for k, v in derivatives_param.items()}

# Scaling
scaling_param = {}
# scaling_param['scaler'] = [ skprep.MinMaxScaler, skprep.StandardScaler ]
scaling_param_grid = {'scaling__' + k: v for k, v in scaling_param.items()}

# Formulation
formulation_param = {}
formulation_param['numberStepsInPast'] = [ 20 ]
formulation_param['stepInFuture'] = [ 1 ]
formulation_param_grid = {'formulation__' + k: v for k, v in formulation_param.items()}

# Regression
regression_param = {}
regression_param['numberFeatures'] =  [ physical_param['n_components'][0] *  len(derivatives_param['orders'][0]) ]
regression_param['numberStepsInPast'] =  formulation_param['numberStepsInPast']
regression_param['modelSteps'] = [ 
    [
        {'layerClass': klayers.normalization.BatchNormalization},
        {'layerClass': klayers.Dropout, 'rate': rateInput},
        {'layerClass': layerClass, 'units': units, 'activation': 'tanh'},
        {'layerClass': klayers.Dropout, 'rate': rateLSTM},
        {'layerClass': klayers.Dense, 'units': 1}
] for layerClass in [klayers.LSTM] for units in [4, 16] for rateInput in [0, 0.2] for rateLSTM in [0, 0.2]] +\
[ [
        {'layerClass': klayers.normalization.BatchNormalization},
        {'layerClass': klayers.Dropout, 'rate': rateInput},
        {'layerClass': layerClass, 'units': units, 'activation': 'tanh'},
        {'layerClass': klayers.Dropout, 'rate': rateLSTM1},
        {'layerClass': layerClass, 'units': units, 'activation': 'tanh'},
        {'layerClass': klayers.Dense, 'units': 1}
] for layerClass in [klayers.LSTM] for units in [2, 8] for rateInput in [0, 0.2] for rateLSTM1 in [0, 0.2] ]

regression_param['optimizerClass'] = [ koptimizers.Adam, koptimizers.RMSprop ]
regression_param['optimizer_kwargs'] = [ {'lr': lr}
                                         for lr in [0.01, 0.1] ]
# regression_param['callbacks'] = [ [kcallbacks.ModelCheckpoint('./model.sk', monitor='loss', save_best_only=True)] ]
regression_param['loss'] = [ 'mean_squared_error' ]
regression_param['batch_size'] =  [ 100, 500 ]
regression_param['epochs'] =  [ 5000, 10000 ]
regression_param_grid = {'regression__' + k: v for k, v in regression_param.items()}

param_grid_transform = {**sampling_param_grid, **embedding_param_grid, **labelling_param_grid, **diagram_param_grid, **distance_param_grid, 
              **physical_param_grid, **derivatives_param_grid, **scaling_param_grid, **formulation_param_grid}
param_grid_estimate = {**regression_param_grid}
param_grid = {**param_grid_transform, ** param_grid_estimate}
print(param_grid_transform)
print(param_grid_estimate)

{'embedding__outerWindowDuration': [100, 200], 'embedding__outerWindowStride': [10, 20], 'embedding__innerWindowDuration': [10, 40], 'embedding__innerWindowStride': [1], 'labelling__deltaT': [10], 'diagram__homologyDimensions': [[0, 1]], 'distance__metric': [<function kernel_l2_distance at 0x7f86c7fb2620>], 'physical__n_components': [10], 'derivatives__orders': [[0, 1, 2]], 'formulation__numberStepsInPast': [20], 'formulation__stepInFuture': [1]}
{'regression__numberFeatures': [30], 'regression__numberStepsInPast': [20], 'regression__modelSteps': [[{'layerClass': <class 'keras.layers.normalization.BatchNormalization'>}, {'layerClass': <class 'keras.layers.core.Dropout'>, 'rate': 0}, {'layerClass': <class 'keras.layers.recurrent.LSTM'>, 'units': 4, 'activation': 'tanh'}, {'layerClass': <class 'keras.layers.core.Dropout'>, 'rate': 0}, {'layerClass': <class 'keras.layers.core.Dense'>, 'units': 1}], [{'layerClass': <class 'keras.layers.normalization.BatchNormalization'>}, {'layerClass': <c

In [None]:
from dask_ml.model_selection import GridSearchCV
from sklearn.model_selection import GridSearchCV

# from dask.distributed import Client
# client = Client()
# skutils.parallel_backend(backend='multiprocessing')

In [None]:
%%time

cv = TimeSeriesSplit(n_splits=5)
grid_estimate = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=cv, n_jobs=-1, verbose=100, error_score='raise') #iid=False ???
grid_result_estimate = grid_estimate.fit(X_train, X_train)


Fitting 5 folds for each of 4096 candidates, totalling 20480 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 48 concurrent workers.
Memmapping (shape=(1189857,), dtype=datetime64[ns]) to new file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd86508173441758a8969421865e51d.pkl
Memmapping (shape=(1, 1189857), dtype=float64) to new file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-2742565ba5c642b0ab5a7b21f46ac2fe.pkl
Memmapping (shape=(198312,), dtype=int64) to new file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-7a1fa1959b4944f78df99ce93901b993.pkl
Memmapping (shape=(198309,), dtype=int64) to new file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-78795f1cf9ad447d80b7e5336ec7be99.pkl
Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd86508173441758a8969421865e51d.pkl
Memmapping (

Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd86508173441758a8969421865e51d.pkl
Memmapping (shape=(1, 1189857), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-2742565ba5c642b0ab5a7b21f46ac2fe.pkl
Memmapping (shape=(991548,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-03c110f683ed40528568dff5d66a89f1.pkl
Memmapping (shape=(198309,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-c3dbb2fbfc744861ac0f77e6847eb792.pkl
Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd86508173441758a8969421865e51d.pkl
Memmapping (shape=(1, 1189857), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-2742565ba5c642b0ab5a7b2

Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd86508173441758a8969421865e51d.pkl
Memmapping (shape=(1, 1189857), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-2742565ba5c642b0ab5a7b21f46ac2fe.pkl
Memmapping (shape=(396621,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-b0ea3072dc634c458d2f18c6d5b6f65f.pkl
Memmapping (shape=(198309,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-a460bb995c654cc5adbc6aff1ee1de2f.pkl
Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd86508173441758a8969421865e51d.pkl
Memmapping (shape=(1, 1189857), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-2742565ba5c642b0ab5a7b2

[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:    6.4s
Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd86508173441758a8969421865e51d.pkl
Memmapping (shape=(1, 1189857), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-2742565ba5c642b0ab5a7b21f46ac2fe.pkl
Memmapping (shape=(396621,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-b0ea3072dc634c458d2f18c6d5b6f65f.pkl
Memmapping (shape=(198309,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-a460bb995c654cc5adbc6aff1ee1de2f.pkl
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:    6.5s
Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd86508173441758a8969421865e51d.pkl
Memmapping (shape=(1, 1189857),

[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    7.3s
Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd86508173441758a8969421865e51d.pkl
Memmapping (shape=(1, 1189857), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-2742565ba5c642b0ab5a7b21f46ac2fe.pkl
Memmapping (shape=(198312,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-7a1fa1959b4944f78df99ce93901b993.pkl
Memmapping (shape=(198309,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-78795f1cf9ad447d80b7e5336ec7be99.pkl
[Parallel(n_jobs=-1)]: Done  19 tasks      | elapsed:    7.3s
[Parallel(n_jobs=-1)]: Done  20 tasks      | elapsed:    7.4s
Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd865

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    7.7s
Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd86508173441758a8969421865e51d.pkl
Memmapping (shape=(1, 1189857), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-2742565ba5c642b0ab5a7b21f46ac2fe.pkl
Memmapping (shape=(991548,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-03c110f683ed40528568dff5d66a89f1.pkl
Memmapping (shape=(198309,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-c3dbb2fbfc744861ac0f77e6847eb792.pkl
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    7.8s
[Parallel(n_jobs=-1)]: Done  35 tasks      | elapsed:    7.8s
Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd865

[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    8.2s
Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd86508173441758a8969421865e51d.pkl[Parallel(n_jobs=-1)]: Done  57 tasks      | elapsed:    8.2s

Memmapping (shape=(1, 1189857), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-2742565ba5c642b0ab5a7b21f46ac2fe.pkl
[Parallel(n_jobs=-1)]: Done  58 tasks      | elapsed:    8.2s
Memmapping (shape=(991548,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-03c110f683ed40528568dff5d66a89f1.pkl
Memmapping (shape=(198309,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-c3dbb2fbfc744861ac0f77e6847eb792.pkl
Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd865

[Parallel(n_jobs=-1)]: Done 118 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done 119 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done 121 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done 122 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done 123 tasks      | elapsed:    8.4s
Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd86508173441758a8969421865e51d.pkl[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed:    8.4s

Memmapping (shape=(1, 1189857), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-2742565ba5c642b0ab5a7b21f46ac2fe.pkl
Memmapping (shape=(793239,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-726f07ffb72e430c8ca436d60642532c.pkl
Memmapping (shape=(198309,), dtype=int64) to old file /dev/

Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd86508173441758a8969421865e51d.pkl
Memmapping (shape=(1, 1189857), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-2742565ba5c642b0ab5a7b21f46ac2fe.pkl
Memmapping (shape=(793239,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-726f07ffb72e430c8ca436d60642532c.pkl
Memmapping (shape=(198309,), dtype=int64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-f769375614ad4c6a933cb06dc83e4c06.pkl
Memmapping (shape=(1189857,), dtype=datetime64[ns]) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-cbd86508173441758a8969421865e51d.pkl
Memmapping (shape=(1, 1189857), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_24012_8392265189/24012-139690792686368-2742565ba5c642b0ab5a7b2

ValueError: buffer source array is read-only

In [10]:
%%time

param_transform = { key: param_grid_transform[key][0] for key in param_grid_transform.keys() }
print(param_transform)

pipeline_transform.set_params(**param_transform)
pipeline_transform.fit(X_train, y_train)

{'embedding__outerWindowDuration': 100, 'embedding__outerWindowStride': 10, 'embedding__innerWindowDuration': 10, 'embedding__innerWindowStride': 1, 'labelling__deltaT': 10, 'diagram__homologyDimensions': [0, 1], 'distance__metric': <function kernel_l2_distance at 0x7f86c7fb2620>, 'physical__n_components': 10, 'derivatives__orders': [0, 1, 2], 'formulation__numberStepsInPast': 20, 'formulation__stepInFuture': 1}
(217, 10) (215, 30) (217, 1) (215, 1)
CPU times: user 3min 8s, sys: 164 ms, total: 3min 9s
Wall time: 1min 49s


In [11]:
%%time

X_train_transformed = pipeline_transform.transform(X_train)
pkl.dump(X_train_transformed, open('XAAPL_train_transformed.pkl', 'wb'))


invalid value encountered in add


invalid value encountered in greater



KeyboardInterrupt: 

In [None]:
%%time

X_test_transformed = pipeline_transform.transform(X_test)
pkl.dump(X_test_transformed, open('XAAPL_test_transformed.pkl', 'wb'))


invalid value encountered in add


invalid value encountered in greater



(50, 10) (48, 30) (50, 1) (48, 1)
CPU times: user 1min 17s, sys: 127 ms, total: 1min 17s
Wall time: 59.6 s


In [None]:
%%time
X_train_transformed = pkl.load(open('XAAPL_train_transformed.pkl', 'rb'))

cv = KFold(n_splits=3, shuffle=True)
grid_estimate = GridSearchCV(estimator=pipeline_estimate, param_grid=param_grid_estimate, cv=cv, n_jobs=-1, verbose=100, error_score='raise') #iid=False ???
grid_result_estimate = grid_estimate.fit(X_train_transformed[0], X_train_transformed[1])


Fitting 3 folds for each of 256 candidates, totalling 768 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 48 concurrent workers.
Memmapping (shape=(797, 20, 30), dtype=float64) to new file /dev/shm/joblib_memmapping_folder_33003_3838996862/33003-140385306321136-6ce5829dca6f45a4a234c6fa12a96015.pkl
Pickling array (shape=(797, 1), dtype=float64).
Pickling array (shape=(531,), dtype=int64).
Pickling array (shape=(266,), dtype=int64).
Memmapping (shape=(797, 20, 30), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_33003_3838996862/33003-140385306321136-6ce5829dca6f45a4a234c6fa12a96015.pkl
Pickling array (shape=(797, 1), dtype=float64).
Pickling array (shape=(531,), dtype=int64).
Pickling array (shape=(266,), dtype=int64).
Memmapping (shape=(797, 20, 30), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_33003_3838996862/33003-140385306321136-6ce5829dca6f45a4a234c6fa12a96015.pkl
Pickling array (shape=(797, 1), dtype=float64).
Pickling array (shape=(532,), 

Memmapping (shape=(797, 20, 30), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_33003_3838996862/33003-140385306321136-6ce5829dca6f45a4a234c6fa12a96015.pkl
Pickling array (shape=(797, 1), dtype=float64).
Pickling array (shape=(531,), dtype=int64).
Pickling array (shape=(266,), dtype=int64).
Memmapping (shape=(797, 20, 30), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_33003_3838996862/33003-140385306321136-6ce5829dca6f45a4a234c6fa12a96015.pkl
Pickling array (shape=(797, 1), dtype=float64).
Pickling array (shape=(531,), dtype=int64).
Pickling array (shape=(266,), dtype=int64).
Memmapping (shape=(797, 20, 30), dtype=float64) to old file /dev/shm/joblib_memmapping_folder_33003_3838996862/33003-140385306321136-6ce5829dca6f45a4a234c6fa12a96015.pkl
Pickling array (shape=(797, 1), dtype=float64).
Pickling array (shape=(532,), dtype=int64).
Pickling array (shape=(265,), dtype=int64).
Memmapping (shape=(797, 20, 30), dtype=float64) to old file /dev/shm/joblib_memmap

In [None]:
# summarize results
print("Best: %f using %s" % (grid_result_estimate.best_score_, grid_result_estimate.best_params_))
means = grid_result_estimate.cv_results_['mean_test_score']
stds = grid_result_estimate.cv_results_['std_test_score']
params = grid_result_estimate.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
y_predict_train, y_true_train = grid_result_estimate.predict(X_train_transformed)

In [None]:
plt.plot(y_predict_train, marker='x')
plt.plot(y_true_train)

In [None]:
X_test_transformed = pkl.load(open('XAAPL_test_transformed.pkl', 'rb'))
y_predict_test, y_true_test = grid_result_estimate.predict(X_test_transformed)

In [None]:
plt.plot(y_predict_test, marker='x')
plt.plot(y_true_test)