In [2]:
import os
import pandas as pd
import numpy as np
from numba import jit

from sklearn import preprocessing
from sklearn.grid_search import GridSearchCV
from sklearn.base import BaseEstimator, RegressorMixin


import matplotlib.pyplot as plt
plt.rcParams['image.cmap'] = 'jet'
%matplotlib inline



In [3]:
files = ["informe_Lagunillas_20170728235145.xls",  "informe_Lota_Urbana_20170729002437.xls",
"informe_Lota_Rural_20170728235710.xls",  "informe_Meteorologia_20170729002107.xls"]
names = ["lagunillas", "lota_u", "lota_r", "meteo"]

for file,name in zip(files,names):
    data = pd.read_html("../data/"+file,header=0)[0]
    d = data["Velocidad Viento"]
    datetime = pd.to_datetime(data["Fecha"] + " " + data["Hora"],format="%d-%m-%Y %H:%M")
    d.index = datetime
    d.to_csv("../data/"+name+".csv")

In [4]:
@jit
def getDataWindowed(data,inSize,outSize):
    biggest = np.max([inSize,outSize])
    
    matrixIn = np.zeros((len(data)-2*biggest, inSize))
    matrixOut = np.zeros((len(data)-2*biggest, outSize))
    for i in range(len(data)-2*biggest):
        matrixIn[i,:] = data[i:i+inSize]
        matrixOut[i,:] = data[i+inSize+1:i+inSize+outSize+1]
    return matrixIn,matrixOut

In [5]:
def createFolds(dataSize, k):
    vector = np.arange(dataSize)
    splitted = np.array_split(vector,k+1)
    
    folds = []
    
    test_set = []
    for i in range(k):
        test_set = np.hstack((test_set, splitted[i]))
        val_set = splitted[i+1]
        folds.append((test_set.astype('int'),val_set.astype('int')))
    return folds

In [6]:
data = pd.read_csv("../data/meteo.csv",index_col=0,names=["datetime","windspeed"])["windspeed"]

In [28]:
X,y = getDataWindowed(data, 12,5)

In [125]:
class skESN(BaseEstimator):
    def __init__(self, n_reservoir = 1000,
                 spectral_radius = 1.5, sparsity=0.3, leaking_rate=0.3, regularization=1, teacher_forcing= True):
        self.n_inputs = None
        self.n_outputs = None
        self.n_reservoir = n_reservoir
        self.spectral_radius = spectral_radius
        self.sparsity = sparsity
        self.teacher_forcing = teacher_forcing
        self.leaking_rate = leaking_rate
        self.regularization = regularization
        self.last_state = None
        self.activation = np.tanh
        
        
    def get_params(self,deep=True):
        params =  {'n_reservoir':self.n_reservoir,'spectral_radius':self.spectral_radius, 
                   'teacher_forcing':self.teacher_forcing, 'sparsity':self.sparsity,
                  'leaking_rate': self.leaking_rate, "regularization":self.regularization}
        if self.n_inputs and self.n_outputs:
            params["n_inputs"] = self.n_inputs
            params["n_outputs"] = self.n_outputs
        return params
    def fit(self,X,y):
        in_rows,self.n_inputs = X.shape
        out_rows, self.n_outputs = y.shape
        initLen = int(0.01*in_rows)
        
        #Raise exception
        assert(in_rows == out_rows)
        
        #Input length
        N = in_rows
        
        self.Win = (np.random.rand(self.n_reservoir,1+self.n_inputs)-0.5) * 1
        self.W = np.random.rand(self.n_reservoir,self.n_reservoir)-0.5
        self.W *= self.spectral_radius
        
        X_states = np.zeros((1+self.n_inputs+self.n_reservoir,N-initLen))

        self.last_state  = np.zeros(self.n_reservoir)
        
        for t in range(N):
            u = X[t]
            self.last_state = (1-self.leaking_rate)*self.last_state  + self.leaking_rate*np.tanh( np.dot( self.Win, np.hstack((1,u)) ) \
                                                                    + np.dot( self.W, self.last_state  ) )
            if t >= initLen:
                X_states[:,t-initLen] = np.hstack((1,u,self.last_state ))
                

        Y_T = y[initLen:].T
#         self.Wout =np.linalg.lstsq(X,y)[0]

        X_sqrd = np.dot(X_states,X_states.T)+  self.regularization*np.eye(1+self.n_inputs+self.n_reservoir) 
        Y_sqrd = np.dot(Y_T,X_states.T).T

        self.Wout=np.linalg.solve(X_sqrd,Y_sqrd)
        
        return self

    def predict(self,X):
        Y = np.empty((self.n_outputs,self.len(X)))
        
        for t in len(X):
                self.last_state = (1 - self.leaking_rate) * self.last_state + self.leaking_rate*np.tanh
        return 
    
    def score(self,X,y):
        return

In [126]:
t = skESN()
t.fit(X[:500],y[:500])


skESN(leaking_rate=0.3, n_inputs=12, n_outputs=5, n_reservoir=1000,
   regularization=1, sparsity=0.3, spectral_radius=1.5,
   teacher_forcing=True)

In [127]:
reg = 1e-8  # regularization coefficient