In [25]:
# import required packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# ESN
from scipy.sparse import rand as sprand
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import eigs as speigs

# Generate Network

# Input Data

In [27]:
# load csv data for input and output

# 7 columns each for joint angles, angular velocities and angular acceleraions
uVec = pd.read_csv("Sarcos.csv", usecols=np.arange(21),  header=None,
                  names = ['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 
                           'v1', 'v2', 'v3', 'v4', 'v5', 'v6', 'v7', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'a7'])
# 7 columns for joint torques
yVec = pd.read_csv("Sarcos.csv", usecols=np.arange(21, 28, 1), header=None, 
                  names = ['T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7'])

# to numpy array
u = uVec.values[:, :, np.newaxis]
y = yVec.values[:, :, np.newaxis]


# Self Organized Layer

In [None]:
class SelfOrganizedLayer:
    '''''Self organized layer for Neural Networks using the Generalized Hebbian Learning (GHL) algorithm to update weights. 
    Functions:
        * __init__ : initilizing the network layer, creating a random weight matrix and related variables
        * updateOutput: calcualtes the output for a given input vector. The input is centerd within the function
        * updateWeightMatrix: updates the weight accoring to the GHL algorithm'''''
    
    def __init__(self, layer_size, input_size, eta):
        '''layer_size - number of neurons
        input_size - number of inputs
        eta - learning rate'''
        
        # initalize W with random weights
        self.W = np.random.rand(size = (layer_size, input_size))*2-1
        # self.W = self.W/np.sum(self.W)
        
        # save learning rate
        self.eta = eta
        
        # initalize input sum and mean
        self.uMean = 0 
        self.uAbs = 0.00001
        self.i = 0
    
    def updateOutput(self, u):
        '''u - network input vector'''
        
        # normalize and center input
        self.uMean = self.uMean + (u-self.uMean)/(self.i+1)
        uCenter = u - self.uMean
        self.uAbs = np.maximum(self.uAbs, np.absolute(uCenter))
        uNormCenter = uCenter/self.uAbs
        
        # update index
        self.i += 1
        
        # calculate output
        s = np.tanh(self.W@uNormCenter)
        
        return (uNormCenter, s)
    
    def updateWeights(self, u, s):
        '''u - network input vector
        s - network output'''
        # transpose
        uT = np.transpose(u)
        sT = np.transpose(s)
        
        # calculate GHL update 
        triang = np.tril(s@sT);
        dW = self.eta*(s@uT - triang@self.W);
        
        # update W matrix
        self.W += dW

# ESN

In [None]:
#def computeRMSE():

def normalize_input(inputSequence):
    iMu = mean(inputSequence, axis=0)
    iMax = amax(abs(inputSequence), axis=0)
    normInputSequence = (inputSequence-iMu)/iMax
    return normInputSequence

class ESN():
    def __init__(self,nInputUnits,nReservoirUnits,nOutputUnits,spectralRadius):
        print('Creating Echo State Network...')
        ### STRUCTURE
        self.nInputUnits = nInputUnits
        self.nReservoirUnits = nReservoirUnits
        self.nOutputUnits = nOutputUnits
        self.nTotalUnits = nInputUnits + nReservoirUnits + nOutputUnits
        
        self.spectralRadius = spectralRadius

        ### INITIALIZE WEIGHTS
        ## JAEGER (Sparse reservoir weights) (Polydoros et. al. Algorithm 1)
        success = 0                                             
        while success == 0: # following block might fail
            try:
                self.Wres = sprand(nReservoirUnits, nReservoirUnits, density=10/nReservoirUnits)
                self.Wres = self.Wres.toarray()
                self.Wres[self.Wres!=0] -= 0.5 # modify only nonzero elements
                self.Wres = csr_matrix(self.Wres) # back to sparse
                maxVal = max(abs(speigs(A=self.Wres, k=1, which='LM')[0]))
                self.Wres /= maxVal
                success = 1
            except:
                success = 0   
        self.Wres *= self.spectralRadius
        
        ## MANTAS (Standard array)
        #self.Wres = random.rand(nReservoirUnits, nReservoirUnits)-0.5
        #rho = max(abs(scipy.linalg.eig(self.Wres)[0]))
        #self.Wres *= 1.25 / rho

        self.Win = 2.0 * random.rand(nReservoirUnits, nInputUnits)- 1.0
        self.Wout = zeros((nOutputUnits, nReservoirUnits + nInputUnits))
        self.Wfb = (2.0 * random.rand(nReservoirUnits, nOutputUnits)- 1.0)

        ### INIT DEFAULT PARAMETERS
        self.inputScaling = ones((nInputUnits, 1)) # MAKE SURE INPUT IS NORMALIZED!!!
        self.inputShift = zeros((nInputUnits, 1))
        self.teacherScaling = ones((nOutputUnits, 1)) # DOES TEACHER SCALING MAKE ANY DIFFERENCE???
        self.teacherShift = zeros((nOutputUnits, 1))
        self.teacherForcing = True # Desired output y_teacher instead of predicted y -> Ridge regression!!!  
        self.feedbackScaling = zeros((nOutputUnits, 1))
        
        self.noiseLevel = 0.0
        self.leakingRate = 1
        self.forgetPoints = 100
        self.reg = 1e-5 # If ridge regression!!! (something else than 1)
        
        self.RLS_lambda = 0.9999995
        self.RLS_delta = 0.000001

        self.trained = 0
        self.pseudo = True
        
        print('Successful!')
    
    def train_single(self,inputSequence,outputSequence):
        ## STATE COLLECTION
        nDataPoints = outputSequence.shape[0]
        self.stateCollect = zeros((nDataPoints - self.forgetPoints, self.nInputUnits + self.nReservoirUnits))
        self.totalState = zeros((self.nInputUnits + self.nReservoirUnits + self.nOutputUnits, 1))
        self.reservoirState = zeros((self.nReservoirUnits, 1))
        
        print('Training...')
        collectIndex = 0;
        for i in range(nDataPoints):
            IN = self.inputScaling * array([inputSequence[i,:]]).T + self.inputShift
            self.totalState[self.nReservoirUnits:self.nReservoirUnits+self.nInputUnits, :] = IN;

            # PLAIN ESN (OR LEAKY) # Wres.array if sparse
            self.reservoirState = tanh(hstack((self.Wres.toarray(), self.Win, self.Wfb@ \
                                               diag(self.feedbackScaling[:,0])))@self.totalState)
        
            # Adding noise, more computational but seems to stabilize solutions in models with output feedback
            self.reservoirState += self.noiseLevel *(random.rand(self.nReservoirUnits,1) - 0.5)
            
            if self.teacherForcing:
                self.netOut = self.teacherScaling * array([outputSequence[i,:]]).T + self.teacherShift
            else:
                self.netOut = self.Wout @ vstack((self.reservoirState, IN)) # activation function identity
                
            self.totalState = vstack((self.reservoirState, IN, self.netOut))
            
            if i > self.forgetPoints:
                collectIndex = collectIndex + 1;
                self.stateCollect[collectIndex,:] = hstack((self.reservoirState.T, IN.T))
        
        ## TEACHER COLLECTION
        nOutputPoints  = outputSequence.shape[0]
        self.teacherCollect = zeros(((nOutputPoints - self.forgetPoints), self.nOutputUnits))
        
        outputSequence = outputSequence[self.forgetPoints:,:]
        nOutputPoints = outputSequence.shape[0] # update the size of outputSequence

        self.teacherCollect = array((diag(self.teacherScaling[:,0]) @ outputSequence.T).T + \
                                    matlib.repmat(self.teacherShift.T,nOutputPoints, 1))
        
        if self.pseudo: # (WIENER-HOPF, faster but less stable than pin, H.Jaeger)
            self.Wout = (linalg.pinv(self.stateCollect)@self.teacherCollect).T
                               
        else: # Ridge regression 'Tikhonov' (the larger alpha the smoother output,zero then same)
            covMat = self.stateCollect.T @ self.stateCollect / self.stateCollect.shape[0]
            pVec = self.stateCollect.T @ self.teacherCollect / self.stateCollect.shape[0]
            self.Wout = (linalg.inv(covMat+(self.reg**2)*eye(covMat.shape[0])) @ pVec).T
        
        self.trained = 1
        print('Training finished!')
        
        
    def train_online(self,trainInput,trainOutput): # FROM H.JAEGER TOOLBOX (RLS ALGORITHM)
        nSampleInput = trainInput.shape[0]
        self.stateCollect = zeros((nSampleInput, self.nReservoirUnits + self.nInputUnits))
        SInverse = 1 / self.RLS_delta * eye(self.nReservoirUnits + self.nInputUnits)
        self.totalState = zeros((self.nTotalUnits,1))
        reservoirState = zeros((self.nReservoirUnits,1)) 
        error = zeros((nSampleInput , 1)) 
        weights = zeros((nSampleInput , 1)) 
        
        print('Training...')
        for j in range(nSampleInput):
            IN = self.inputScaling * array([trainInput[j,:]]).T + self.inputShift 
            
            #write input into totalstate
            self.totalState[self.nReservoirUnits:self.nReservoirUnits+self.nInputUnits,:] = IN
            
            # update totalstate except at input positions
            self.reservoirState = tanh(hstack((self.Wres.todense(), self.Win, self.Wfb@ \
                                               diag(self.feedbackScaling[:,0])))@self.totalState)
        
            # Adding noise, more computational but seems to stabilize solutions in models with output feedback
            self.reservoirState += self.noiseLevel *(random.rand(self.nReservoirUnits,1) - 0.5)
            
            self.netOut = self.Wout @ vstack((self.reservoirState, IN)) # activation function identity
            self.totalState = vstack((self.reservoirState, IN, self.netOut))  
            
            state = vstack((self.reservoirState, IN)) 
            
            self.stateCollect[j, :] = state.T
            phi = state.T * SInverse
            k = phi.T/(self.RLS_lambda + phi * state)
            e = self.teacherScaling * trainOutput[j,0] + self.teacherShift - self.netOut[0,:]
            
            # update the weights 
            self.Wout[0,:] = self.Wout[0,:] + (k*e).T 
                                               
            SInverse = (SInverse-k*phi) / self.RLS_lambda
            
        self.trained = 1
        print('Training finished!')
        
                               
    def test(self, inputSequence):
        nDataPoints = inputSequence.shape[0]
        self.stateCollect = zeros((nDataPoints - self.forgetPoints, self.nInputUnits + self.nReservoirUnits))
        self.totalState = zeros((self.nInputUnits + self.nReservoirUnits + self.nOutputUnits, 1))
        self.reservoirState = zeros((self.nReservoirUnits, 1))
        
        collectIndex = 0;
        for i in range(nDataPoints):
            IN = self.inputScaling * array([inputSequence[i,:]]).T + self.inputShift
            self.totalState[self.nReservoirUnits:self.nReservoirUnits+self.nInputUnits, :] = IN;

            # Plain update
            self.reservoirState = tanh(hstack((self.Wres.toarray(), self.Win, self.Wfb@ \
                                               diag(self.feedbackScaling[:,0])))@self.totalState)
        
            # Adding noise, more computational but seems to stabilize solutions in models with output feedback
            self.reservoirState += self.noiseLevel *(random.rand(self.nReservoirUnits,1) - 0.5)
            
            self.netOut = self.Wout @ vstack((self.reservoirState, IN)) # activation function identity
                
            self.totalState = vstack((self.reservoirState, IN, self.netOut))
            
            if i > self.forgetPoints:
                collectIndex = collectIndex + 1;
                self.stateCollect[collectIndex,:] = hstack((self.reservoirState.T, IN.T))
                
        yPred = self.stateCollect @ self.Wout.T
        
        # plot prediction vs true sequence
        plt.figure(2)
        plt.plot(yPred[0:200,0], 'c--',label='Predicted')
        plt.xlabel('Sample')
        plt.ylabel('Torque [Nm]')
        plt.legend()

# Output Layer

In [60]:
class output_layer:
    def __init__(self, s_size, r_size, n_output, sigma_2 = 0.1, phi_2 = 1):
        
        # save parameters
        self.sigma_2 = sigma_2
        self.phi_2 = phi_2
        
        # init weights
        self.W_train = np.zeros((n_output, s_size + r_size))
        
        #Initialize V
        self.V = self.sigma_2 * np.identity(s_size + r_size)
        
    def updateWeights(self, c_t, tau):
        # save previous V
        V_prev = self.V
        
        self.V = np.linalg.inv(self.V + (1/self.sigma_2)* c_t @ c_t.T)
        
        a = self.V @ np.linalg.inv(V_prev) @ self.W_train.T
        b = 1/self.sigma_2 * self.V @ c_t @ tau.T
        print(b.shape)
        print(a.shape)
        self.W_train = np.sum([a.T, b.T], axis=0)
            
    def updateOutput(self, c_t):
        o_dot = np.dot(self.W_train, c_t)
        return o_dot

# Bin

In [61]:
Layer = output_layer(21, 100, 7)

In [66]:
c_t = np.ones((21+100, 1))
tau = np.ones((7,1))
o = Layer.updateOutput(c_t)
Layer.updateWeights(c_t, tau)
Layer.W_train
print(o)

(121, 7)
(121, 7)
[[0.99991736]
 [0.99991736]
 [0.99991736]
 [0.99991736]
 [0.99991736]
 [0.99991736]
 [0.99991736]]
