In [1]:
from pyoselm.oselm import OSELMRegressor

from sklearn.model_selection import train_test_split


from sklearn.metrics import r2_score
from sklearn import preprocessing
from timeit import default_timer as timer
import math

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import warnings
warnings.filterwarnings('ignore')
path = "../datasets/"

In [2]:
def LeferradOSELM(hidden,X,y,f,shuffle_):
    oselmr = OSELMRegressor(n_hidden=hidden, activation_func=f, random_state=123)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=shuffle_)
    n_batch = 1
    
    batches_x = [X_train[:hidden]] + [[x_i] for x_i in X_train[hidden:]]
    batches_y = [y_train[:hidden]] + [[y_i] for y_i in y_train[hidden:]]
    for b_x, b_y in zip(batches_x, batches_y):
        if len(batches_x) > 0:
            oselmr.fit(b_x, b_y)
    return oselmr,X_test,y_test

def metrics(true,pred):
    squareDeviation = computeSquareDeviation(pred, true)
    nmrse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(pred)
    return nmrse
    
def computeSquareDeviation(predictions, truth):
  squareDeviation = np.square(predictions-truth)
  return squareDeviation

In [3]:
#NYC DATASET

dataset = "nyc_taxi.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])


raw["hour"] = [t.hour for t in pd.DatetimeIndex(raw.timestamp)]
raw["minute"] = [t.minute for t in pd.DatetimeIndex(raw.timestamp)]
raw["day"] = [t.dayofweek for t in pd.DatetimeIndex(raw.timestamp)]
raw["month"] = [t.month for t in pd.DatetimeIndex(raw.timestamp)]
raw['year'] = [t.year for t in pd.DatetimeIndex(raw.timestamp)]
raw.drop(['timestamp'],inplace=True,axis=1)

data = raw.to_numpy()

X = data[:,1:]
y = data[:,:1]


for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))


Neurons = 25 SIGMOID
Tiempo promedio 5.593715399999994
NRMSE promedio 0.7922285768964041

Neurons = 50 SIGMOID
Tiempo promedio 14.408046799999994
NRMSE promedio 0.7873130758253564

Neurons = 100 SIGMOID
Tiempo promedio 40.191344799999996
NRMSE promedio 0.72447787594832

Neurons = 25 RELU
Tiempo promedio 5.5566240000000136
NRMSE promedio 0.7023035523168265

Neurons = 50 RELU
Tiempo promedio 13.807851499999998
NRMSE promedio 0.70408017993319

Neurons = 100 RELU
Tiempo promedio 42.653465900000015
NRMSE promedio 0.6163212395377745


In [5]:
#COVID DATASET
dataset = "covid_chile.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])
raw["day"] = [t.day for t in pd.DatetimeIndex(raw.date)]
raw["month"] = [t.month for t in pd.DatetimeIndex(raw.date)]

raw.drop(["date"],inplace=True,axis=1)

data = raw.to_numpy()

y = data[:,:1]
X = data[:,1:]


min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)






for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred 
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred 
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))


Neurons = 25 SIGMOID
Tiempo promedio 0.14938730000000078
NRMSE promedio 0.01939045486401937

Neurons = 50 SIGMOID
Tiempo promedio 0.35263349999999605
NRMSE promedio 0.01735231617041292

Neurons = 100 SIGMOID
Tiempo promedio 0.8289634999999862
NRMSE promedio 0.006698356856153685

Neurons = 25 RELU
Tiempo promedio 0.14216880000000742
NRMSE promedio 0.024083205289077265

Neurons = 50 RELU
Tiempo promedio 0.30531519999999546
NRMSE promedio 0.01620185739224892

Neurons = 100 RELU
Tiempo promedio 0.7247572999999932
NRMSE promedio 0.018833378858127945


In [7]:
#BIKE DATASET
dataset = "train_bikeDemand.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])
raw.drop(raw.columns[0], axis=1,inplace=True)


raw = raw[['count','holiday', 'workingday', 'temp', 'atemp', 'humidity', 'windspeed', 'season_1', 'season_2', 'season_3', 'season_4', 'weather_1', 'weather_2', 'weather_3', 'weather_4', 'hour', 'day', 'month', 'year']]

data = raw.to_numpy()

y = data[:,:1]
X = data[:,1:]



min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)






for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred 
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred 
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))


Neurons = 25 SIGMOID
Tiempo promedio 4.152931200000012
NRMSE promedio 1.2901061290497853

Neurons = 50 SIGMOID
Tiempo promedio 11.46947449999999
NRMSE promedio 1.1281020282118035

Neurons = 100 SIGMOID
Tiempo promedio 33.83839820000003
NRMSE promedio 0.9901728559987408

Neurons = 25 RELU
Tiempo promedio 4.348999400000025
NRMSE promedio 1.5292528110045043

Neurons = 50 RELU
Tiempo promedio 11.769761399999993
NRMSE promedio 1.2085651714822394

Neurons = 100 RELU
Tiempo promedio 33.280370800000014
NRMSE promedio 1.0345458607633518


In [15]:
#REC CENTER

dataset = "rec-center-hourly.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1,2])

raw["day"] = [t.day for t in pd.DatetimeIndex(raw.timestamp)]
raw["month"] = [t.month for t in pd.DatetimeIndex(raw.timestamp)]

raw.drop(['timestamp'],inplace=True,axis=1)

data = raw.to_numpy(dtype= 'float')

y = data[:,:1]
X = data[:,1:]


min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)



for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",False)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred 
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",False)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred 
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))


Neurons = 25 SIGMOID
Tiempo promedio 1.5410825000001296
NRMSE promedio 1.6689360991267652

Neurons = 50 SIGMOID
Tiempo promedio 3.8727086999999756
NRMSE promedio 1.021638663303797

Neurons = 100 SIGMOID
Tiempo promedio 11.30968689999986
NRMSE promedio 1.6255567929494565

Neurons = 25 RELU
Tiempo promedio 1.4044375000000855
NRMSE promedio 7.972177789560509

Neurons = 50 RELU
Tiempo promedio 3.844607399999859
NRMSE promedio 6.079421319479798

Neurons = 100 RELU
Tiempo promedio 10.745680500000162
NRMSE promedio 1.4240648370129514


In [16]:
#AVOCADOS

avocados = pd.read_csv(path + 'filtered_avocados.csv')
new_columns = pd.DataFrame(avocados["Date"].str.split('-').tolist(), columns = ['yr', 'month', 'day'])

# Change data type of new columns from str to int32
new_columns["day"] = new_columns["day"].astype(np.int32)
new_columns["month"] = new_columns["month"].astype(np.int32)
# Attach new columns
avocados["day"] = new_columns["day"]
avocados["month"] = new_columns["month"]
avocados["type"]=pd.Categorical(avocados["type"])
avocados["region"]=pd.Categorical(avocados["region"])
df_type = pd.get_dummies(avocados['type'], prefix = 'category')
df_region = pd.get_dummies(avocados['region'], prefix = 'category')

# Drop Date, type and region
del avocados["Date"]
del avocados["type"]
del avocados["region"]

# Attach the one-hot encodings columns to other integer ones
avocados_cleaned=pd.concat([avocados, df_type, df_region], axis=1, sort=False)


data = avocados_cleaned.to_numpy()

y = data[:,:1]
X = data[:,1:]


min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)



for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",False)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred 
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred 
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))




Neurons = 25 SIGMOID
Tiempo promedio 3.191766300000154
NRMSE promedio 1.1067013990753456

Neurons = 50 SIGMOID
Tiempo promedio 8.45430999999985
NRMSE promedio 0.9129759972917441

Neurons = 100 SIGMOID
Tiempo promedio 25.25314419999995
NRMSE promedio 0.8223211823321848

Neurons = 25 RELU
Tiempo promedio 3.1322018000000753
NRMSE promedio 1.0306106000088575

Neurons = 50 RELU
Tiempo promedio 8.418966999999839
NRMSE promedio 1.0144162798403835

Neurons = 100 RELU
Tiempo promedio 23.854961399999866
NRMSE promedio 0.8395106320168128


In [17]:
#GOOGLE DATASET
dataset = "Google_Stock_Price_Train.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])

raw["day"] = [t.day for t in pd.DatetimeIndex(raw.Date)]
raw["month"] = [t.month for t in pd.DatetimeIndex(raw.Date)]
raw['year'] = [t.year for t in pd.DatetimeIndex(raw.Date)]
raw.drop(['Date',"day","month"],inplace=True,axis=1)

raw['Volume'] = raw['Volume'].str.replace(',', '')
raw['Close'] = raw['Close'].str.replace(',', '')

display(raw.head())

data = raw.to_numpy(dtype = 'float')

y = data[:,:1]
X = data[:,1:]


min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)



for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

Unnamed: 0,Open,High,Low,Close,Volume,year
0,331.27,333.87,329.08,666.45,5749400,2012
1,329.83,330.75,326.89,657.21,6590300,2012
2,328.34,328.77,323.68,648.24,5405900,2012
3,322.04,322.29,309.46,620.76,11688800,2012
4,313.7,315.72,307.3,621.43,8824000,2012



Neurons = 25 SIGMOID
Tiempo promedio 0.447335700000167
NRMSE promedio 0.024017578450759084

Neurons = 50 SIGMOID
Tiempo promedio 1.2265127999999095
NRMSE promedio 0.01935443452306518

Neurons = 100 SIGMOID
Tiempo promedio 3.3178928999998334
NRMSE promedio 0.036004628695201414

Neurons = 25 RELU
Tiempo promedio 0.41161299999998846
NRMSE promedio 0.024750563963725915

Neurons = 50 RELU
Tiempo promedio 1.0006677000001218
NRMSE promedio 0.023457743168134752

Neurons = 100 RELU
Tiempo promedio 2.8644308000000365
NRMSE promedio 0.025959077360333968


In [18]:
#AbALONE DATASET

dataset = "Abalone.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])
map = {'M':0,'F':1,'I':2}
raw = raw.replace({'sex':map})
raw = raw[["rings","length","diameter","height","weight.w","weight.s","weight.v","weight.sh","sex"]]


data = raw.to_numpy(dtype = 'float')

y = data[:,:1]
X = data[:,1:]

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)



for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))


Neurons = 25 SIGMOID
Tiempo promedio 1.6273573999999371
NRMSE promedio 0.8988357967758727

Neurons = 50 SIGMOID
Tiempo promedio 4.441134899999952
NRMSE promedio 0.8244244843876786

Neurons = 100 SIGMOID
Tiempo promedio 12.377235400000018
NRMSE promedio 0.8593665062906908

Neurons = 25 RELU
Tiempo promedio 1.459498100000019
NRMSE promedio 0.884433172540677

Neurons = 50 RELU
Tiempo promedio 3.901646600000049
NRMSE promedio 0.8003851921805201

Neurons = 100 RELU
Tiempo promedio 11.336138500000061
NRMSE promedio 0.8587490110409793


In [20]:
dataset = "auto-mpg.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])
raw.drop(["car name"],inplace=True,axis=1)
data = raw.to_numpy(dtype = 'float')

y = data[:,:1]
X = data[:,1:]



min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)



for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"sigmoid",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"SIGMOID")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))

    
    
for j in [25,50,100]:
    t_ = []
    errores = []
    for i in range(1):
        start = timer()
        model,X_test,y_test = LeferradOSELM(j,X,y,"relu",True)
        end = timer()
        t_.append(end - start)
        pred = model.predict(X_test)
        predictions = pred
        target = y_test
        errores.append(metrics(target,predictions))
    print("\nNeurons =",j,"RELU")
    print("Tiempo promedio",np.mean(t_))
    print("NRMSE promedio",np.mean(errores))


Neurons = 25 SIGMOID
Tiempo promedio 0.14580369999998766
NRMSE promedio 0.43070430568884294

Neurons = 50 SIGMOID
Tiempo promedio 0.32222279999996317
NRMSE promedio 0.37911965524659774

Neurons = 100 SIGMOID
Tiempo promedio 0.7691647999999986
NRMSE promedio 0.40005520724824606

Neurons = 25 RELU
Tiempo promedio 0.13280009999994036
NRMSE promedio 0.3958405627257386

Neurons = 50 RELU
Tiempo promedio 0.3065317000000505
NRMSE promedio 0.4266070619510049

Neurons = 100 RELU
Tiempo promedio 0.7204800000001796
NRMSE promedio 0.47167926204901095
