In [1]:
from pyoselm.oselm import OSELMRegressor

from sklearn.model_selection import train_test_split


from sklearn.metrics import r2_score
from sklearn import preprocessing
from timeit import default_timer as timer
import math

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings('ignore')
path = "../datasets/"


In [2]:
def LeferradOSELM(hidden,X,y,f,shuffle_):
    oselmr = OSELMRegressor(n_hidden=hidden, activation_func=f, random_state=123)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=shuffle_)
    n_batch = 1
    
    batches_x = [X_train[:hidden]] + [[x_i] for x_i in X_train[hidden:]]
    batches_y = [y_train[:hidden]] + [[y_i] for y_i in y_train[hidden:]]
    for b_x, b_y in zip(batches_x, batches_y):
        if len(batches_x) > 0:
            oselmr.fit(b_x, b_y)
    return oselmr,X_test,y_test

def metrics(true,pred):
    squareDeviation = computeSquareDeviation(pred, true)
    nmrse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(pred)
    return nmrse
    
def computeSquareDeviation(predictions, truth):
  squareDeviation = np.square(predictions-truth)
  return squareDeviation

In [3]:
#NYC DATASET

dataset = "nyc_taxi.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])


raw["hour"] = [t.hour for t in pd.DatetimeIndex(raw.timestamp)]
raw["minute"] = [t.minute for t in pd.DatetimeIndex(raw.timestamp)]
raw["day"] = [t.dayofweek for t in pd.DatetimeIndex(raw.timestamp)]
raw["month"] = [t.month for t in pd.DatetimeIndex(raw.timestamp)]
raw['year'] = [t.year for t in pd.DatetimeIndex(raw.timestamp)]
raw.drop(['timestamp'],inplace=True,axis=1)

data = raw.to_numpy()

X = data[:,1:]
y = data[:,:1]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]





for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))


Neurons = 25 SIGMOID
Tiempo promedio 5.587795900000003
NRMSE promedio 0.795498516402602

Neurons = 50 SIGMOID
Tiempo promedio 14.208216800000002
NRMSE promedio 0.7813191888304126

Neurons = 100 SIGMOID
Tiempo promedio 41.640399900000006
NRMSE promedio 0.7162594218847093

Neurons = 25 RELU
Tiempo promedio 6.296189999999996
NRMSE promedio 0.6945730084413646

Neurons = 50 RELU
Tiempo promedio 14.235566399999996
NRMSE promedio 0.7015411344265146

Neurons = 100 RELU
Tiempo promedio 41.72449500000002
NRMSE promedio 0.6004136954068064


In [6]:
#COVID DATASET
dataset = "covid_chile.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])
raw["day"] = [t.day for t in pd.DatetimeIndex(raw.date)]
raw["month"] = [t.month for t in pd.DatetimeIndex(raw.date)]

raw.drop(["date"],inplace=True,axis=1)

data = raw.to_numpy()

y = data[:,:1]
X = data[:,1:]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]






min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)






for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
#         predictions = pred 
#         target = y_test
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))


Neurons = 25 SIGMOID
Tiempo promedio 0.151529499999981
NRMSE promedio 0.020431605875264575

Neurons = 50 SIGMOID
Tiempo promedio 0.3566361000000029
NRMSE promedio 0.018891195412161256

Neurons = 100 SIGMOID
Tiempo promedio 0.8205302000000074
NRMSE promedio 0.005238476062413792

Neurons = 25 RELU
Tiempo promedio 0.1477444999999875
NRMSE promedio 0.024011371331107437

Neurons = 50 RELU
Tiempo promedio 0.30529319999999416
NRMSE promedio 0.01912895033154531

Neurons = 100 RELU
Tiempo promedio 0.7214710000000082
NRMSE promedio 0.023104170349323893


In [7]:
#BIKE DATASET
dataset = "train_bikeDemand.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])
raw.drop(raw.columns[0], axis=1,inplace=True)


raw = raw[['count','holiday', 'workingday', 'temp', 'atemp', 'humidity', 'windspeed', 'season_1', 'season_2', 'season_3', 'season_4', 'weather_1', 'weather_2', 'weather_3', 'weather_4', 'hour', 'day', 'month', 'year']]

data = raw.to_numpy()

y = data[:,:1]
X = data[:,1:]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]




# min_max_scaler = preprocessing.MinMaxScaler()
# X = min_max_scaler.fit_transform(X)






for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
#         predictions = pred 
#         target = y_test
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))


Neurons = 25 SIGMOID
Tiempo promedio 3.980520800000022
NRMSE promedio 1.0004671423829923

Neurons = 50 SIGMOID
Tiempo promedio 9.750371400000006
NRMSE promedio 1.1474090486920272

Neurons = 100 SIGMOID
Tiempo promedio 29.00394749999998
NRMSE promedio 1.0240688374995912

Neurons = 25 RELU
Tiempo promedio 4.186543900000004
NRMSE promedio 1.288376243649557

Neurons = 50 RELU
Tiempo promedio 9.872567899999979
NRMSE promedio 1.0785286069603626

Neurons = 100 RELU
Tiempo promedio 29.707523000000037
NRMSE promedio 0.9701791162137201


In [18]:
dataset = "rec-center-hourly.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1,2])

raw["day"] = [t.day for t in pd.DatetimeIndex(raw.timestamp)]
raw["month"] = [t.month for t in pd.DatetimeIndex(raw.timestamp)]

raw.drop(['timestamp'],inplace=True,axis=1)

data = raw.to_numpy()

y = data[:,:1]
X = data[:,1:]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)



for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",False)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))


Neurons = 25 SIGMOID
Tiempo promedio 1.4323341000000482
NRMSE promedio 1.668934570246403

Neurons = 50 SIGMOID
Tiempo promedio 3.745005499999934
NRMSE promedio 1.0216372756717191

Neurons = 100 SIGMOID
Tiempo promedio 10.482266399999958
NRMSE promedio 1.6255548959965151

Neurons = 25 RELU
Tiempo promedio 1.3587956000000077
NRMSE promedio 6.232156863085215

Neurons = 50 RELU
Tiempo promedio 3.4679859999999962
NRMSE promedio 5.378899906225576

Neurons = 100 RELU
Tiempo promedio 10.068835199999967
NRMSE promedio 5.606826967485764


In [21]:
#AVOCADOS

avocados = pd.read_csv(path + 'filtered_avocados.csv')
new_columns = pd.DataFrame(avocados["Date"].str.split('-').tolist(), columns = ['yr', 'month', 'day'])

# Change data type of new columns from str to int32
new_columns["day"] = new_columns["day"].astype(np.int32)
new_columns["month"] = new_columns["month"].astype(np.int32)
# Attach new columns
avocados["day"] = new_columns["day"]
avocados["month"] = new_columns["month"]
avocados["type"]=pd.Categorical(avocados["type"])
avocados["region"]=pd.Categorical(avocados["region"])
df_type = pd.get_dummies(avocados['type'], prefix = 'category')
df_region = pd.get_dummies(avocados['region'], prefix = 'category')

# Drop Date, type and region
del avocados["Date"]
del avocados["type"]
del avocados["region"]

# Attach the one-hot encodings columns to other integer ones
avocados_cleaned=pd.concat([avocados, df_type, df_region], axis=1, sort=False)


data = avocados_cleaned.to_numpy()

y = data[:,:1]
X = data[:,1:]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)



for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",False)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))


Neurons = 25 SIGMOID
Tiempo promedio 3.1108348000000206
NRMSE promedio 1.1147748988658535

Neurons = 50 SIGMOID
Tiempo promedio 7.8864366000000246
NRMSE promedio 0.9233905055462382

Neurons = 100 SIGMOID
Tiempo promedio 22.14843369999994
NRMSE promedio 0.8239541924839744

Neurons = 25 RELU
Tiempo promedio 3.0003513999999996
NRMSE promedio 1.0302852304919201

Neurons = 50 RELU
Tiempo promedio 7.64829409999993
NRMSE promedio 1.014789296212759

Neurons = 100 RELU
Tiempo promedio 21.659707000000026
NRMSE promedio 0.8425791741555198


In [29]:
#GOOGLE DATASET
dataset = "Google_Stock_Price_Train.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])

raw["day"] = [t.day for t in pd.DatetimeIndex(raw.Date)]
raw["month"] = [t.month for t in pd.DatetimeIndex(raw.Date)]
raw['year'] = [t.year for t in pd.DatetimeIndex(raw.Date)]
raw.drop(['Date',"day","month"],inplace=True,axis=1)

raw['Volume'] = raw['Volume'].str.replace(',', '')
raw['Close'] = raw['Close'].str.replace(',', '')

display(raw.head())

data = raw.to_numpy(dtype = 'float')

y = data[:,:1]
X = data[:,1:]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)



for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

Unnamed: 0,Open,High,Low,Close,Volume,year
0,331.27,333.87,329.08,666.45,5749400,2012
1,329.83,330.75,326.89,657.21,6590300,2012
2,328.34,328.77,323.68,648.24,5405900,2012
3,322.04,322.29,309.46,620.76,11688800,2012
4,313.7,315.72,307.3,621.43,8824000,2012



Neurons = 25 SIGMOID
Tiempo promedio 0.4325461000000814
NRMSE promedio 0.0229462724642386

Neurons = 50 SIGMOID
Tiempo promedio 1.1917350999999599
NRMSE promedio 0.020272779813461636

Neurons = 100 SIGMOID
Tiempo promedio 3.106396300000142
NRMSE promedio 0.09614258154692411

Neurons = 25 RELU
Tiempo promedio 0.4151047999998809
NRMSE promedio 0.021815088229928965

Neurons = 50 RELU
Tiempo promedio 0.9712715999999091
NRMSE promedio 0.023438810892846246

Neurons = 100 RELU
Tiempo promedio 2.7113299000000097
NRMSE promedio 0.05676719839398335


In [33]:
#AbALONE DATASET

dataset = "Abalone.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])
map = {'M':0,'F':1,'I':2}
raw = raw.replace({'sex':map})
raw = raw[["rings","length","diameter","height","weight.w","weight.s","weight.v","weight.sh","sex"]]


data = raw.to_numpy(dtype = 'float')

y = data[:,:1]
X = data[:,1:]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)



for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))


Neurons = 25 SIGMOID
Tiempo promedio 1.5721671000001152
NRMSE promedio 0.9047026815435472

Neurons = 50 SIGMOID
Tiempo promedio 4.024708700000019
NRMSE promedio 0.8609001279804894

Neurons = 100 SIGMOID
Tiempo promedio 11.56558300000006
NRMSE promedio 0.8557301770094331

Neurons = 25 RELU
Tiempo promedio 1.4481640999999854
NRMSE promedio 0.8562627937491328

Neurons = 50 RELU
Tiempo promedio 3.686470800000052
NRMSE promedio 0.8429061629338797

Neurons = 100 RELU
Tiempo promedio 10.371818000000076
NRMSE promedio 0.8640124953670569


In [36]:
dataset = "auto-mpg.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])
raw.drop(["car name"],inplace=True,axis=1)
data = raw.to_numpy(dtype = 'float')

y = data[:,:1]
X = data[:,1:]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)



for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = (pred * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))


Neurons = 25 SIGMOID
Tiempo promedio 0.14608320000002095
NRMSE promedio 0.41800383937158986

Neurons = 50 SIGMOID
Tiempo promedio 0.32143699999983255
NRMSE promedio 0.47322740458846496

Neurons = 100 SIGMOID
Tiempo promedio 0.705336299999999
NRMSE promedio 0.4978323542369855

Neurons = 25 RELU
Tiempo promedio 0.1310429000000113
NRMSE promedio 0.44417759143475144

Neurons = 50 RELU
Tiempo promedio 0.27657030000000304
NRMSE promedio 0.4349965774955417

Neurons = 100 RELU
Tiempo promedio 0.6559745999998086
NRMSE promedio 0.4071110702648536
