In [2]:
import csv
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from scipy import random
from numpy.linalg import pinv
from numpy.linalg import inv

from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from timeit import default_timer as timer

random.seed(0)
from sklearn import preprocessing

path = "../datasets/"


numLags = 100
predictionStep = 1

nDimInput = numLags
nDimOutput = 1
lamb=0.0001
outputWeightFF = 0.92 

In [3]:
def getTimeEmbeddedMatrix(sequence, numLags=100, predictionStep=1):
  inDim = numLags
  X = np.zeros(shape=(len(sequence), inDim))
  T = np.zeros(shape=(len(sequence), 1))
  for i in range(numLags-1, len(sequence)-predictionStep):
    X[i, :] = np.array(sequence[columna][(i-numLags+1):(i+1)])
    T[i, :] = sequence[columna][i+predictionStep]
  return (X, T)

def linear(features,weights,bias):
   return np.dot(features, np.transpose(weights)) + bias

def sigmoidActFunc(features):
    return 1.0 / (1.0 + np.exp(-features))


def reluActFunc(features):
  return np.maximum(0,features)


def computeSquareDeviation(predictions, truth):
  squareDeviation = np.square(predictions-truth)
  return squareDeviation


class OSELM(object):
  def __init__(self, inputs, outputs, numHiddenNeurons, forgettingFactor=0.999,act = "sigmoid"):
    self.name = 'OSELM'
    self.inputs = inputs
    self.outputs = outputs
    self.numHiddenNeurons = numHiddenNeurons
    self.act = act
    # input to hidden weights
    self.inputWeights = None
    # bias of hidden units
    self.bias = None
    # hidden to output layer connection
    self.beta = None
    # auxiliary matrix used for sequential learning
    self.M = None

    self.forgettingFactor = forgettingFactor

  def calculateHiddenLayerActivation(self, features):
    """
    Calculate activation level of the hidden layer
    :param features feature matrix with dimension (numSamples, numInputs)
    :return: activation level (numSamples, numHiddenNeurons)
    """
    V = linear(features, self.inputWeights,self.bias)
    if(self.act == "sigmoid"):
        return sigmoidActFunc(V)
    if(self.act == "relu"):
        return reluActFunc(V)


  def initializePhase(self, lamb=0.0001):
    """
    Step 1: Initialization phase
    """
    # randomly initialize the input->hidden connections
    self.inputWeights = np.random.random((self.numHiddenNeurons, self.inputs))
    self.inputWeights = self.inputWeights * 2 - 1
    self.bias = np.random.random((1, self.numHiddenNeurons)) * 2 - 1
    # auxiliary matrix used for sequential learning
    self.M = inv(lamb*np.eye(self.numHiddenNeurons))
    # hidden to output layer connection
    self.beta = np.zeros([self.numHiddenNeurons,self.outputs])



  def train(self, features, targets):
    """
    Step 2: Sequential learning phase
    :param features feature matrix with dimension (numSamples, numInputs)
    :param targets target matrix with dimension (numSamples, numOutputs)
    """
    (numSamples, numOutputs) = targets.shape
    assert features.shape[0] == targets.shape[0]

    H = self.calculateHiddenLayerActivation(features)
    Ht = np.transpose(H)


    self.M = (1/self.forgettingFactor) * self.M - np.dot((1/self.forgettingFactor) * self.M,
                                     np.dot(Ht, np.dot(
                                       pinv(np.eye(numSamples) + np.dot(H, np.dot((1/self.forgettingFactor) * self.M, Ht))),
                                       np.dot(H, (1/self.forgettingFactor) * self.M))))
    self.beta = self.beta + np.dot(self.M, np.dot(Ht, targets - np.dot(H, self.beta)))

  def predict(self, features):
    """
    Make prediction with feature matrix
    :param features: feature matrix with dimension (numSamples, numInputs)
    :return: predictions with dimension (numSamples, numOutputs)
    """
    H = self.calculateHiddenLayerActivation(features)
    prediction = np.dot(H, self.beta)
    return prediction


def TrainModel(columna='none',net = None):
    meanSeq = np.mean(df[columna])
    stdSeq = np.std(df[columna])
    df[columna] = (df[columna] - meanSeq)/stdSeq
    predictions= []
    target= []
    (X, T) = getTimeEmbeddedMatrix(df, numLags, predictionStep)
    for i in range(numLags, len(df)-predictionStep-1):
        net.train(X[[i], :], T[[i], :])
        Y = net.predict(X[[i+1], :])
        predictions.append(Y[0][0])
        target.append(T[i][0])
    predictions = np.array(predictions)
    target = np.array(target)
    predictions = predictions * stdSeq + meanSeq
    target = target * stdSeq + meanSeq
    #Calculate NRMSE from skip_eval to the end
    skip_eval=100
    squareDeviation = computeSquareDeviation(predictions, target)
    squareDeviation[:skip_eval] = None
    nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(predictions)
    #print("NRMSE {}".format(nrmse))
    predictions = np.array(predictions)
    target = np.array(target)
    predictions = predictions * stdSeq + meanSeq
    target = target * stdSeq + meanSeq
    
    return nrmse,predictions,target

In [4]:


dataset = "nyc_taxi.csv"
columna = 'passenger_count'

df = pd.read_csv(path + dataset, header=0, skiprows=[1])


for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(1):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "sigmoid")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))
    

for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(1):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "relu")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))


Neurons = 25 SIGMOID
Tiempo promedio 2.156020099999992
NRMSE promedio 0.36210672993961707

Neurons = 50 SIGMOID
Tiempo promedio 2.423478700000004
NRMSE promedio 0.3995955830848154

Neurons = 100 SIGMOID
Tiempo promedio 7.7942865000000126
NRMSE promedio 0.4895630133964439

Neurons = 25 RELU
Tiempo promedio 2.121854199999987
NRMSE promedio 0.34521223620205127

Neurons = 50 RELU
Tiempo promedio 2.408497699999998
NRMSE promedio 0.3770730002191724

Neurons = 100 RELU
Tiempo promedio 8.16218459999999
NRMSE promedio 0.5051942428815163


In [9]:
#ABALONE DATASET

dataset = "Abalone.csv"
columna = 'rings'

df = pd.read_csv(path + dataset, header=0, skiprows=[1])
map = {'M':0,'F':1,'I':2}
df = df.replace({'sex':map})

for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(1):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "sigmoid")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))
    

for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(1):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "relu")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))


Neurons = 25 SIGMOID
Tiempo promedio 0.5020526000000132
NRMSE promedio 0.9994812240801328

Neurons = 50 SIGMOID
Tiempo promedio 0.5673383999999828
NRMSE promedio 0.9779684365596123

Neurons = 100 SIGMOID
Tiempo promedio 1.9438456999999971
NRMSE promedio 1.0077015886912708

Neurons = 25 RELU
Tiempo promedio 0.4808353999999895
NRMSE promedio 0.9880062203599944

Neurons = 50 RELU
Tiempo promedio 0.5420799000000329
NRMSE promedio 0.9880873117621674

Neurons = 100 RELU
Tiempo promedio 1.9304614000000129
NRMSE promedio 0.9951909590679145


In [12]:
#AUTO MPG DATASET

dataset = "auto-mpg.csv"
columna = 'mpg'

df = pd.read_csv(path + dataset, header=0, skiprows=[1])
df.drop(['car name'],inplace=True,axis=1)


for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(1):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "sigmoid")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))
    

for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(50):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "relu")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))


Neurons = 25 SIGMOID
Tiempo promedio 0.036880399999972724
NRMSE promedio 0.7373368765759339

Neurons = 50 SIGMOID
Tiempo promedio 0.04201039999998102
NRMSE promedio 0.9657369694132972

Neurons = 100 SIGMOID
Tiempo promedio 0.13783990000001722
NRMSE promedio 1.1327530930291878

Neurons = 25 RELU
Tiempo promedio 0.03559915199998841
NRMSE promedio 0.6854579642783827

Neurons = 50 RELU
Tiempo promedio 0.041432399999994235
NRMSE promedio 0.793832515742108

Neurons = 100 RELU
Tiempo promedio 0.14008707199999434
NRMSE promedio 0.9746713872667465


In [13]:
#COVID CHILE DATASET
dataset = "covid_chile.csv"
columna = 'confirmed'

df = pd.read_csv(path + dataset, header=0, skiprows=[1])
df["day"] = [t.day for t in pd.DatetimeIndex(df.date)]
df["month"] = [t.month for t in pd.DatetimeIndex(df.date)]
df.drop(["date"],inplace=True,axis=1)

for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(1):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "sigmoid")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))
    

for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(50):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "relu")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))


Neurons = 25 SIGMOID
Tiempo promedio 0.04336820000003172
NRMSE promedio 0.01249398212992536

Neurons = 50 SIGMOID
Tiempo promedio 0.048648399999933645
NRMSE promedio 0.013205255826171727

Neurons = 100 SIGMOID
Tiempo promedio 0.1429217999999537
NRMSE promedio 0.014103597821836119

Neurons = 25 RELU
Tiempo promedio 0.04230641999999534
NRMSE promedio 0.017995989561990902

Neurons = 50 RELU
Tiempo promedio 0.046721845999998096
NRMSE promedio 0.020039154453769106

Neurons = 100 RELU
Tiempo promedio 0.15567198799999005
NRMSE promedio 0.020905930176133288


In [15]:
#Google Stock Price DATASET
dataset = "Google_Stock_Price_Train.csv"
columna = 'Open'

df = pd.read_csv(path + dataset, header=0, skiprows=[1])
df["day"] = [t.day for t in pd.DatetimeIndex(df.Date)]
df["month"] = [t.month for t in pd.DatetimeIndex(df.Date)]
df['year'] = [t.year for t in pd.DatetimeIndex(df.Date)]
df.drop(['Date',"day","month"],inplace=True,axis=1)

df['Volume'] = df['Volume'].str.replace(',', '')
df['Close'] = df['Close'].str.replace(',', '')


for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(50):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "sigmoid")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))
    

for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(50):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "relu")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))


Neurons = 25 SIGMOID
Tiempo promedio 0.14259181799999623
NRMSE promedio 0.3913034833447124


  squareDeviation = np.square(predictions-truth)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  sqr = np.multiply(arr, arr, out=arr)
  nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(predictions)



Neurons = 50 SIGMOID
Tiempo promedio 0.16513215800000808
NRMSE promedio nan

Neurons = 100 SIGMOID
Tiempo promedio 0.5355518120000079
NRMSE promedio nan

Neurons = 25 RELU
Tiempo promedio 0.13886373399999685
NRMSE promedio 0.9856651344959794

Neurons = 50 RELU
Tiempo promedio 0.15804688400001396
NRMSE promedio 1.0468024339018858

Neurons = 100 RELU
Tiempo promedio 0.5321892339999863
NRMSE promedio 1.0467893346785346


In [16]:
#REC CENTER DATASET
dataset = "rec-center-hourly.csv"
columna = 'kw_energy_consumption'

df = pd.read_csv(path + dataset, header=0, skiprows=[1,2])

df["day"] = [t.day for t in pd.DatetimeIndex(df.timestamp)]
df["month"] = [t.month for t in pd.DatetimeIndex(df.timestamp)]
df['year'] = [t.year for t in pd.DatetimeIndex(df.timestamp)]
df.drop(['timestamp'],inplace=True,axis=1)


for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(50):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "sigmoid")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))
    

for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(50):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "relu")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))

Unnamed: 0,timestamp,kw_energy_consumption
0,7/2/10 0:00,21.2
1,7/2/10 1:00,16.4
2,7/2/10 2:00,4.7
3,7/2/10 3:00,4.7
4,7/2/10 4:00,4.6



Neurons = 25 SIGMOID
Tiempo promedio 0.5282479059999968
NRMSE promedio 0.6623482198861683

Neurons = 50 SIGMOID
Tiempo promedio 0.5882494620000103
NRMSE promedio 0.7515940320766212

Neurons = 100 SIGMOID
Tiempo promedio 1.9999759660000063
NRMSE promedio 0.8436845031495867

Neurons = 25 RELU
Tiempo promedio 0.5114657379999972
NRMSE promedio 0.6411522426117733

Neurons = 50 RELU
Tiempo promedio 0.5764492740000015
NRMSE promedio 0.7151724815103746

Neurons = 100 RELU
Tiempo promedio 2.003557045999987
NRMSE promedio 0.8242836820979802


In [17]:
dataset = "train_bikeDemand.csv"
columna = 'count'

df = pd.read_csv(path + dataset, header=0, skiprows=[1,2])
df = df.drop(df.columns[0], axis=1)

for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(50):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "sigmoid")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))
    

for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(50):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "relu")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))


Neurons = 25 SIGMOID
Tiempo promedio 1.33391348000001
NRMSE promedio 0.7022228833275322

Neurons = 50 SIGMOID
Tiempo promedio 1.5139906540000083
NRMSE promedio 0.7641704342515264

Neurons = 100 SIGMOID
Tiempo promedio 5.15114004600001
NRMSE promedio 0.8554179023612407

Neurons = 25 RELU
Tiempo promedio 1.281992204000012
NRMSE promedio 0.9845614068377293

Neurons = 50 RELU
Tiempo promedio 1.4406652340000163
NRMSE promedio 1.0040065753814726


  squareDeviation = np.square(predictions-truth)
  sqr = np.multiply(arr, arr, out=arr)
  nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(predictions)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)



Neurons = 100 RELU
Tiempo promedio 4.961359275999993
NRMSE promedio nan


In [18]:
dataset = 'filtered_avocados.csv'
avocados = pd.read_csv(path + dataset)
new_columns = pd.DataFrame(avocados["Date"].str.split('-').tolist(), columns = ['yr', 'month', 'day'])


new_columns["day"] = new_columns["day"].astype(np.int32)
new_columns["month"] = new_columns["month"].astype(np.int32)

avocados["day"] = new_columns["day"]
avocados["month"] = new_columns["month"]
avocados["type"]=pd.Categorical(avocados["type"])
avocados["region"]=pd.Categorical(avocados["region"])
df_type = pd.get_dummies(avocados['type'], prefix = 'category')
df_region = pd.get_dummies(avocados['region'], prefix = 'category')


del avocados["Date"]
del avocados["type"]
del avocados["region"]
avocados_cleaned=pd.concat([avocados, df_type, df_region], axis=1, sort=False)

df = avocados_cleaned
columna = 'AveragePrice'


for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(20):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "sigmoid")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))
    

for j in [25,50,100]:
    t_ = []
    errors = []
    for i in range(20):

        net = OSELM(inputs=nDimInput,outputs=nDimOutput, numHiddenNeurons=j,forgettingFactor=outputWeightFF,act = "relu")
        net.initializePhase(lamb=lamb)

        start = timer()
        nrmse,predictions,target = TrainModel(columna=columna,net = net)
        end = timer()
        t_.append(end - start)
        errors.append(nrmse)
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errors))


Neurons = 25 SIGMOID
Tiempo promedio 0.8891213950000179
NRMSE promedio 1.2982807540289565

Neurons = 50 SIGMOID
Tiempo promedio 0.9997458900000197
NRMSE promedio 1.1349685422860085

Neurons = 100 SIGMOID
Tiempo promedio 3.3965286500000276
NRMSE promedio 1.064693447499741

Neurons = 25 RELU
Tiempo promedio 0.8718761449999874
NRMSE promedio 1.2444976880301095

Neurons = 50 RELU
Tiempo promedio 0.9715577750000648
NRMSE promedio 1.1279664473166917

Neurons = 100 RELU
Tiempo promedio 3.33625261500008
NRMSE promedio 1.0013193960936833
