In [159]:
import sys
sys.path.append("pyESN")
import numpy as np
from matplotlib import pyplot as plt
import pandas

from pyESN import ESN
import sklearn.metrics as sm
from scipy.special import expit

from sklearn import preprocessing


from sklearn.grid_search import GridSearchCV
from sklearn.base import BaseEstimator, RegressorMixin

from sklearn.cross_validation import train_test_split

%matplotlib inline

In [198]:
def getDataWindowed(data,inSize,outSize):
    if outSize > inSize:
        biggest = outSize
    else:
        biggest = inSize
    
    matrixIn = np.zeros((len(data)-biggest, inSize))
    matrixOut = np.zeros((len(data)-biggest, outSize))
    
    for i in range(len(data)-2*biggest):
        matrixIn[i,:] = data[i:i+inSize]
        matrixOut[i,:] = data[i+inSize+1:i+inSize+outSize+1]
    return matrixIn,matrixOut

In [208]:
def createFolds(dataSize, k):
    vector = np.arange(dataSize)
    splitted = np.array_split(vector,k+1)
    
    folds = []
    
    test_set = []
    for i in range(k-1):
        test_set = np.hstack((test_set, splitted[i]))
        val_set = splitted[i+1]
        folds.append((test_set.astype('int'),val_set.astype('int')))
    return folds

In [264]:
class skESN(BaseEstimator):
    def __init__(self, n_inputs=12,n_outputs=12,n_reservoir = 1000,
                 spectral_radius = 1.5, sparsity=0.3, teacher_forcing= True):
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs
        self.n_reservoir = n_reservoir
        self.spectral_radius = spectral_radius
        self.sparsity = sparsity
        self.teacher_forcing = teacher_forcing
    
    def get_params(self,deep=True):
        return {'n_inputs': self.n_inputs, 'n_outputs': self.n_outputs, 'n_reservoir':self.n_reservoir,
               'spectral_radius':self.spectral_radius, 'teacher_forcing':self.teacher_forcing}
   
    def fit(self,X,y):
        print self.get_params()
        self.esn = ESN(n_inputs = self.n_inputs,
          n_outputs = self.n_outputs,
          n_reservoir = self.n_reservoir,
          random_state=42,
          spectral_radius = self.spectral_radius,
          sparsity= self.sparsity,
          teacher_forcing= self.teacher_forcing,
          silent= True)
        self.esn.fit(X,y)
        return self

    def predict(self,X):
        return self.esn.predict(X)
    
    def score(self,X,y):
        y_pred = self.esn.predict(X)
        score = sm.mean_squared_error(y_pred,y)
        print "Score: " + str(score)
        return score

# Testing

In [256]:
b08 = pandas.read_csv('../data/b08c2.csv')
b08_viento = b08["viento"].as_matrix()

prediction_steps = 12
input_steps = 12

In [257]:
train_perc = 0.8

datalen = len(b08_viento)
trainlen = int(train_perc*(datalen))

minmax = preprocessing.MinMaxScaler() 
standarization = preprocessing.StandardScaler()

minmax.fit(b08_viento[:trainlen].reshape(-1,1))
standarization.fit(b08_viento[:trainlen].reshape(-1,1))

preproc = minmax

data = preproc.transform(b08_viento.reshape(-1,1)).reshape((-1,))

In [258]:
X,y = getDataWindowed(data,input_steps,prediction_steps)

In [259]:
train_data,train_output = X[:trainlen],y[:trainlen]
test_data, test_output  = X[trainlen:-prediction_steps],y[trainlen:-prediction_steps]

In [262]:
params = {'n_reservoir': [200],'spectral_radius':np.linspace(0,15,10)}

In [263]:
#folds = createFolds(train_data.shape[0],5)
clf = skESN()
gs = GridSearchCV(clf, params, cv=5, verbose=True)
gs.fit(train_data,train_output)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
{'n_outputs': 12, 'teacher_forcing': True, 'spectral_radius': 0.0, 'n_inputs': 12, 'n_reservoir': 200}
0.0133032086474
{'n_outputs': 12, 'teacher_forcing': True, 'spectral_radius': 0.0, 'n_inputs': 12, 'n_reservoir': 200}
0.0155236262124
{'n_outputs': 12, 'teacher_forcing': True, 'spectral_radius': 0.0, 'n_inputs': 12, 'n_reservoir': 200}
0.0240860067715
{'n_outputs': 12, 'teacher_forcing': True, 'spectral_radius': 0.0, 'n_inputs': 12, 'n_reservoir': 200}
0.0224030532013
{'n_outputs': 12, 'teacher_forcing': True, 'spectral_radius': 0.0, 'n_inputs': 12, 'n_reservoir': 200}
0.0147926867784
{'n_outputs': 12, 'teacher_forcing': True, 'spectral_radius': 1.6666666666666667, 'n_inputs': 12, 'n_reservoir': 200}
0.0208111178001
{'n_outputs': 12, 'teacher_forcing': True, 'spectral_radius': 1.6666666666666667, 'n_inputs': 12, 'n_reservoir': 200}
0.0184288584429
{'n_outputs': 12, 'teacher_forcing': True, 'spectral_radius': 1.666666666666

[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:   23.5s


0.0345059094382
{'n_outputs': 12, 'teacher_forcing': True, 'spectral_radius': 11.666666666666668, 'n_inputs': 12, 'n_reservoir': 200}


[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed:   23.9s finished


GridSearchCV(cv=5, error_score='raise',
       estimator=skESN(n_inputs=12, n_outputs=12, n_reservoir=1000, spectral_radius=1.5,
   teacher_forcing=True),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'spectral_radius': array([  0.     ,   1.66667,   3.33333,   5.     ,   6.66667,   8.33333,
        10.     ,  11.66667,  13.33333,  15.     ]), 'n_reservoir': [200]},
       pre_dispatch='2*n_jobs', refit=True, scoring=None, verbose=True)