In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, BatchNormalization, Dense
from tensorflow.keras.layers import Activation, Flatten, Dropout
from tensorflow.keras.callbacks import EarlyStopping


from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_percentage_error

from timeit import default_timer as timer
from sklearn import preprocessing
# Resets 
tf.compat.v1.reset_default_graph()
path = "../datasets/"

n_input_nodes = 100 
n_output_nodes = 1


In [2]:
def computeSquareDeviation(predictions, truth):
  squareDeviation = np.square(predictions-truth)
  return squareDeviation


# Function that initializes weights and biases for a given layer of the network architecture.
def layer(inputs,neurons,layer_name):
    # We need to initialize the weights with the constraint of a standard deviation
    weights = tf.Variable(tf.random.normal((inputs.shape[1],neurons), stddev = 1/neurons ), name = layer_name)
    # With bias we don't need to worry about this as it is used mainly to recenter the function line to the origin
    bias = tf.Variable(tf.zeros([neurons]), name = layer_name)
    return weights,bias

# Function that receives the inputs and network architecture and initializes every layer.
def create_network(X,layers):
    network = []
    variables = []
    # First previous are the data inputs.
    previous = X
    for ix, neurons in enumerate(layers):
        weights,bias = layer(previous,neurons,f'layer_{ix}')
        network.append( (weights,bias) )
        variables.extend( (weights,bias) )
        # Chain weights to next layer.
        previous = weights
    return network, variables

# Function that will iterate through the network architecture and apply the linear (multiplication of the weight and sum of the bias) and non linear (apply the Leaky ReLU activation) tranformations.
def predict(X, network,f):
    net = X
    layer = 1
    # For every layer except the output one (last one) apply the linear transformation and activation
    for weights,bias in network[:-1]:
        with tf.name_scope(f'Layer_{layer}'):
            net = tf.add(tf.matmul(net, weights), bias,name='net')
            if f == "relu":
                net = tf.nn.relu(net, name="relu")
            else:
                net = tf.nn.sigmoid(net, name="sigmoid")
        layer += 1
    weights,bias = network[-1]
    # Output layer
    with tf.name_scope('Output'):
        net = tf.add(tf.matmul(net, weights), bias)
    return net


# Loss function that will calculate the quadratic error between a predicted value and it's target.
def mean_squared_error(predicted, y):
    cost = tf.reduce_mean(tf.math.square(y-predicted))
    return cost

# Function that will create the GradientTape object that will trace the computations and compute the derivatives.
# Receive variables because they already are in a list (variable, gradient)
def grad(X, Y, network, variables,f):
    with tf.GradientTape() as tape:
        predicted = predict(X, network,f)
        loss_val = mean_squared_error(predicted,Y)
    return tape.gradient(loss_val, variables),variables

        
def AndreTensor(X,Y,size,steps,n_input_nodes,n_hidden_nodes,n_output_nodes,valid_X,valid_Y,f,use_earlyStopFunction):
    # Define net architecture
    layers=[n_input_nodes,n_hidden_nodes,n_output_nodes]
    # Define batch size
    batch_size = size

    # Get the network layer initialization
    network, variables = create_network(X,layers)
    # Calculate the batches per epoch
    batches_per_epoch = X.shape[0]//batch_size
    #batches_per_epoch = 1
    learning_rate = 0.0001
    # Initialize the optimizer to be used.
    optimizer = Adam(learning_rate = learning_rate)

    # Number of epochs to be trained
    epochs = steps
    er_ = []
    # Iterate through the epochs
    for epoch in range(epochs):
        print("\r",epoch,"/",epochs,end="")
        # Get indexes for batch row choice
        shuffled = np.arange(len(Y))
        # Randomize these indexes
        np.random.shuffle(shuffled)
        # Iterate through the batches
        for batch_num in range(batches_per_epoch):
            # Get batch starting position
            start = batch_num*batch_size
            # Get training inputs batch and labels input batch, also turn them into float32 as it is numerically safer
            batch_xs = tf.constant(X[shuffled[start:start+batch_size],:].astype(np.float32))
            batch_ys = tf.constant(Y[shuffled[start:start+batch_size]].astype(np.float32))
            # Go get the gradients to be computed
            gradients,variables = grad(batch_xs, batch_ys, network, variables,f)
            # Compute the gradients using the optimizer and update network
            optimizer.apply_gradients(zip(gradients, variables))
        train_pred = predict(tf.constant(X.astype(np.float32)), network,f)
        # Predict the labels for the whole input validation list 
        val_pred = predict(tf.constant(valid_X.astype(np.float32)), network,f)

        # Compute the quadratic error for train and test predicted sets
        train_error = mean_squared_error(train_pred, Y)
        val_error = mean_squared_error(val_pred, valid_Y)
        if(use_earlyStopFunction and epoch >= 2):
            ar = er_[-3:]
            if(val_error >= ar[0] and val_error >= ar[1] and val_error >= ar[2]):
                break
        er_.append(val_error)

    return network

In [3]:
dataset = "Google_Stock_Price_Train.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])

raw["day"] = [t.day for t in pd.DatetimeIndex(raw.Date)]
raw["month"] = [t.month for t in pd.DatetimeIndex(raw.Date)]
raw['year'] = [t.year for t in pd.DatetimeIndex(raw.Date)]
raw.drop(['Date',"day","month"],inplace=True,axis=1)

raw['Volume'] = raw['Volume'].str.replace(',', '')
raw['Close'] = raw['Close'].str.replace(',', '')

data = raw.to_numpy(dtype='float')

X = data[:,1:]
y = data[:,:1]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=123)


batch = 1
steps = int(len(X_train) / batch)


use_earlyStopFunction = True

for f in ["relu","sigmoid"]:
    for n_ in [25,50,100]:
        start = timer()
        model_  = AndreTensor(X_train,y_train,batch,steps,n_input_nodes,n_,n_output_nodes,X_test,y_test,f,use_earlyStopFunction)
        end = timer()
        t_ = end - start
        test_predicted = predict(tf.constant(X_test.astype(np.float32)), model_,f)
        predictions = (test_predicted * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        squareDeviation = computeSquareDeviation(predictions, target)
        print("\n")
        print(f,n_)
        print("Time",t_)
        print("NRMSE",np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(predictions))

In [5]:
dataset = "Abalone.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])
map = {'M':0,'F':1,'I':2}
raw = raw.replace({'sex':map})

raw = raw.reindex(columns=['rings','length','diameter','height','weight.w','weight.s','weight.v','weight.sh','sex'])
display(raw.head())

data = raw.to_numpy(dtype='float')

X = data[:,1:]
y = data[:,:1]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=123)


batch = 1
steps = int(len(X_train) / batch)


use_earlyStopFunction = True

for f in ["relu","sigmoid"]:
    for n_ in [25,50,100]:
        start = timer()
        model_  = AndreTensor(X_train,y_train,batch,steps,n_input_nodes,n_,n_output_nodes,X_test,y_test,f,use_earlyStopFunction)
        end = timer()
        t_ = end - start
        test_predicted = predict(tf.constant(X_test.astype(np.float32)), model_,f)
        predictions = (test_predicted * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        squareDeviation = computeSquareDeviation(predictions, target)
        print("\n")
        print(f,n_)
        print("Time",t_)
        print("NRMSE",np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(predictions))

Unnamed: 0,rings,length,diameter,height,weight.w,weight.s,weight.v,weight.sh,sex
0,7,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,0
1,9,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,1
2,10,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,0
3,7,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,2
4,8,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,2


In [8]:
#COVID DATASET
dataset = "covid_chile.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])
raw["day"] = [t.day for t in pd.DatetimeIndex(raw.date)]
raw["month"] = [t.month for t in pd.DatetimeIndex(raw.date)]
raw['year'] = [t.year for t in pd.DatetimeIndex(raw.date)]
raw.drop(["date"],inplace=True,axis=1)
raw = raw.reindex(columns=['confirmed','day','month','year','deaths','recovered'])



data = raw.to_numpy(dtype='float')

X = data[:,1:]
y = data[:,:1]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=123)


batch = 1
steps = int(len(X_train) / batch)


use_earlyStopFunction = False

for f in ["relu","sigmoid"]:
    for n_ in [25,50,100]:
        start = timer()
        model_  = AndreTensor(X_train,y_train,batch,steps,n_input_nodes,n_,n_output_nodes,X_test,y_test,f,use_earlyStopFunction)
        end = timer()
        t_ = end - start
        test_predicted = predict(tf.constant(X_test.astype(np.float32)), model_,f)
        predictions = (test_predicted * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        squareDeviation = computeSquareDeviation(predictions, target)
        print("\n")
        print(f,n_)
        print("Time",t_)
        print("NRMSE",np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(predictions))

 299 / 300

relu 25
Time 254.13177789999997
NRMSE 0.012236692
 299 / 300

relu 50
Time 261.5967908000001
NRMSE 0.01706225
 299 / 300

relu 100
Time 266.8086251
NRMSE 0.009368287
 299 / 300

sigmoid 25
Time 270.9827737999999
NRMSE 0.064026564
 299 / 300

sigmoid 50
Time 259.53861059999986
NRMSE 0.06955298
 299 / 300

sigmoid 100
Time 261.8416264000002
NRMSE 0.07746383


In [10]:
#AVOCADOS

avocados = pd.read_csv(path + 'filtered_avocados.csv')
new_columns = pd.DataFrame(avocados["Date"].str.split('-').tolist(), columns = ['yr', 'month', 'day'])

# Change data type of new columns from str to int32
new_columns["day"] = new_columns["day"].astype(np.int32)
new_columns["month"] = new_columns["month"].astype(np.int32)
# Attach new columns
avocados["day"] = new_columns["day"]
avocados["month"] = new_columns["month"]
avocados["type"]=pd.Categorical(avocados["type"])
avocados["region"]=pd.Categorical(avocados["region"])
df_type = pd.get_dummies(avocados['type'], prefix = 'category')
df_region = pd.get_dummies(avocados['region'], prefix = 'category')

# Drop Date, type and region
del avocados["Date"]
del avocados["type"]
del avocados["region"]

# Attach the one-hot encodings columns to other integer ones
avocados_cleaned=pd.concat([avocados, df_type, df_region], axis=1, sort=False)

# Shuffle data using the pandas function sample, with fraction as 1 since we want all data 
avocados = avocados_cleaned.sample(frac=1)
# Converto to numpy array
avocados = avocados.to_numpy()

X = avocados[:,1:]
y = avocados[:,:1]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=123)


batch = 1
steps = int(len(X_train) / batch)


use_earlyStopFunction = True

for f in ["relu","sigmoid"]:
    for n_ in [25,50,100]:
        start = timer()
        model_  = AndreTensor(X_train,y_train,batch,steps,n_input_nodes,n_,n_output_nodes,X_test,y_test,f,use_earlyStopFunction)
        end = timer()
        t_ = end - start
        test_predicted = predict(tf.constant(X_test.astype(np.float32)), model_,f)
        predictions = (test_predicted * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        squareDeviation = computeSquareDeviation(predictions, target)
        print("\n")
        print(f,n_)
        print("Time",t_)
        print("NRMSE",np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(predictions))

 15 / 4891

relu 25
Time 214.50988310000002
NRMSE 0.60192126
 20 / 4891

relu 50
Time 285.7526662999999
NRMSE 0.52774817
 14 / 4891

relu 100
Time 201.3653757000002
NRMSE 0.6197592
 7 / 4891

sigmoid 25
Time 106.09863450000012
NRMSE 0.92879117
 9 / 4891

sigmoid 50
Time 133.03386109999974
NRMSE 0.8584741
 8 / 4891

sigmoid 100
Time 123.00270439999986
NRMSE 0.8897776


In [9]:
dataset = "auto-mpg.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])
raw.drop(["car name"],inplace=True,axis=1)
display(raw.head())

data = raw.to_numpy(dtype='float')

X = data[:,1:]
y = data[:,:1]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=123)


batch = 1
steps = int(len(X_train) / batch)


use_earlyStopFunction = False

for f in ["relu","sigmoid"]:
    for n_ in [25,50,100]:
        start = timer()
        model_  = AndreTensor(X_train,y_train,batch,steps,n_input_nodes,n_,n_output_nodes,X_test,y_test,f,use_earlyStopFunction)
        end = timer()
        t_ = end - start
        test_predicted = predict(tf.constant(X_test.astype(np.float32)), model_,f)
        predictions = (test_predicted * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        squareDeviation = computeSquareDeviation(predictions, target)
        print("\n")
        print(f,n_)
        print("Time",t_)
        print("NRMSE",np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(predictions))

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model year,origin
0,15.0,8,350.0,165,3693,11.5,70,1
1,18.0,8,318.0,150,3436,11.0,70,1
2,16.0,8,304.0,150,3433,12.0,70,1
3,17.0,8,302.0,140,3449,10.5,70,1
4,15.0,8,429.0,198,4341,10.0,70,1


 264 / 265

relu 25
Time 189.21876919999977
NRMSE 0.42196378
 264 / 265

relu 50
Time 189.71898310000006
NRMSE 0.42817438
 264 / 265

relu 100
Time 190.6256042
NRMSE 0.42347154
 264 / 265

sigmoid 25
Time 189.71132549999993
NRMSE 0.44586417
 264 / 265

sigmoid 50
Time 192.02405810000027
NRMSE 0.430094
 264 / 265

sigmoid 100
Time 194.91069620000007
NRMSE 0.4809105


In [None]:
#NYC DATASET

dataset = "nyc_taxi.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])


raw["hour"] = [t.hour for t in pd.DatetimeIndex(raw.timestamp)]
raw["minute"] = [t.minute for t in pd.DatetimeIndex(raw.timestamp)]
raw["day"] = [t.dayofweek for t in pd.DatetimeIndex(raw.timestamp)]
raw["month"] = [t.month for t in pd.DatetimeIndex(raw.timestamp)]
raw['year'] = [t.year for t in pd.DatetimeIndex(raw.timestamp)]
raw.drop(['timestamp'],inplace=True,axis=1)

data = raw.to_numpy()

X = data[:,1:]
y = data[:,:1]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=123)


batch = 1
steps = int(len(X_train) / batch)


use_earlyStopFunction = True

for f in ["relu","sigmoid"]:
    for n_ in [25,50,100]:
        start = timer()
        model_  = AndreTensor(X_train,y_train,batch,steps,n_input_nodes,n_,n_output_nodes,X_test,y_test,f,use_earlyStopFunction)
        end = timer()
        t_ = end - start
        test_predicted = predict(tf.constant(X_test.astype(np.float32)), model_,f)
        predictions = (test_predicted * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        squareDeviation = computeSquareDeviation(predictions, target)
        print("\n")
        print(f,n_)
        print("Time",t_)
        print("NRMSE",np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(predictions))

In [None]:
#BIKE DATASET
dataset = "train_bikeDemand.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1])
raw.drop(raw.columns[0], axis=1,inplace=True)
cols = list(raw.columns)
a, b = cols.index('holiday'), cols.index('count')
cols[b], cols[a] = cols[a], cols[b]
raw = raw[cols]

data = raw.to_numpy(dtype='float')

X = data[:,1:]
y = data[:,:1]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=123)


batch = 1
steps = int(len(X_train) / batch)


use_earlyStopFunction = True

for f in ["relu","sigmoid"]:
    for n_ in [25,50,100]:
        start = timer()
        model_  = AndreTensor(X_train,y_train,batch,steps,n_input_nodes,n_,n_output_nodes,X_test,y_test,f,use_earlyStopFunction)
        end = timer()
        t_ = end - start
        test_predicted = predict(tf.constant(X_test.astype(np.float32)), model_,f)
        predictions = (test_predicted * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        squareDeviation = computeSquareDeviation(predictions, target)
        print("\n")
        print(f,n_)
        print("Time",t_)
        print("NRMSE",np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(predictions))

In [None]:
#REC CENTER DATASET
dataset = "rec-center-hourly.csv"
raw = pd.read_csv(path + dataset, header=0, skiprows=[1,2])

raw["day"] = [t.day for t in pd.DatetimeIndex(raw.timestamp)]
raw["month"] = [t.month for t in pd.DatetimeIndex(raw.timestamp)]
raw.drop(["timestamp"],inplace=True,axis=1)
display(raw.head())

data = raw.to_numpy(dtype='float')


X = data[:,1:]
y = data[:,:1]

means = np.mean(y,axis=0)
stds = np.std(y,axis=0)
y = (y-means)/stds

std_Ys = stds[0]
mean_Ys = means[0]

min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=123)


batch = 1
steps = int(len(X_train) / batch)


use_earlyStopFunction = True

for f in ["relu","sigmoid"]:
    for n_ in [25,50,100]:
        start = timer()
        model_  = AndreTensor(X_train,y_train,batch,steps,n_input_nodes,n_,n_output_nodes,X_test,y_test,f,use_earlyStopFunction)
        end = timer()
        t_ = end - start
        test_predicted = predict(tf.constant(X_test.astype(np.float32)), model_,f)
        predictions = (test_predicted * std_Ys) + mean_Ys
        target = (y_test * std_Ys) + mean_Ys
        squareDeviation = computeSquareDeviation(predictions, target)
        print("\n")
        print(f,n_)
        print("Time",t_)
        print("NRMSE",np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(predictions))