In [7]:
import pandas as pd
import numpy as np

from tensorflow import keras
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier
from tensorflow.keras import losses
from tensorflow.keras.constraints import MaxNorm as maxnorm
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, cross_validate
from sklearn.preprocessing import MinMaxScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import pickle
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
#from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, roc_auc_score
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import Audio
sound_file = 'beep.wav'
from tensorflow.keras.callbacks import *


import numpy as np
import pandas as pd   
import matplotlib.pyplot as plt
import random
import os

from sklearn.metrics import mean_absolute_error

import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.callbacks import * 
from tensorflow.keras.optimizers import *
from tensorflow.keras import backend as K

In [149]:
class T2V(Layer):
    
    def __init__(self, output_dim=None, **kwargs):
        self.output_dim = output_dim
        super(T2V, self).__init__(**kwargs)
        
    def build(self, input_shape):

        self.W = self.add_weight(name='W',
                                shape=(input_shape[-1], self.output_dim),
                                initializer='uniform',
                                trainable=True)

        self.P = self.add_weight(name='P',
                                shape=(input_shape[1], self.output_dim),
                                initializer='uniform',
                                trainable=True)

        self.w = self.add_weight(name='w',
                                shape=(input_shape[1], 1),
                                initializer='uniform',
                                trainable=True)

        self.p = self.add_weight(name='p',
                                shape=(input_shape[1], 1),
                                initializer='uniform',
                                trainable=True)

        super(T2V, self).build(input_shape)
        
    def call(self, x):
        
        original = self.w * x + self.p
        sin_trans = K.sin(K.dot(x, self.W) + self.P)
        
        return K.concatenate([sin_trans, original], -1)


In [8]:
np.random.seed(7)

In [285]:
# load dataset
dataframe = pd.read_csv("../data/pca_all_reg.csv", sep=',')

In [286]:
dataframe.head(3)

Unnamed: 0,comp_1,comp_2,comp_3,comp_4,comp_5,comp_6,priceUSD
0,0.074775,-0.058775,-0.016531,-0.026882,-0.018071,0.034708,0.0495
1,0.103432,-0.097974,0.025112,0.008138,-0.00961,0.003282,0.0726
2,0.078321,-0.038384,-0.004899,-0.021897,-0.02156,0.029447,0.0859


In [342]:
dataframe.shape

(735, 7)

In [288]:
length=dataframe.shape[1]-1

In [289]:
length

6

In [290]:
# split into input (X) and output (Y) variables
X = dataframe.iloc[:,0:length]
y = dataframe['priceUSD']

In [291]:
X.head(3)

Unnamed: 0,comp_1,comp_2,comp_3,comp_4,comp_5,comp_6
0,0.074775,-0.058775,-0.016531,-0.026882,-0.018071,0.034708
1,0.103432,-0.097974,0.025112,0.008138,-0.00961,0.003282
2,0.078321,-0.038384,-0.004899,-0.021897,-0.02156,0.029447


In [292]:
y=np.ravel(y)

In [293]:
y

array([ 0.0495,  0.0726,  0.0859,  0.0783,  0.0767,  0.0649,  0.0566,
        0.0581,  0.053 ,  0.053 ,  0.058 ,  0.0595,  0.0648,  0.0663,
        0.0664,  0.0611,  0.0613,  0.06  ,  0.0597,  0.0596,  0.0622,
        0.0599,  0.06  ,  0.066 ,  0.069 ,  0.0635,  0.069 ,  0.0655,
        0.0663,  0.0661,  0.0643,  0.0678,  0.069 ,  0.0673,  0.0661,
        0.066 ,  0.0655,  0.0655,  0.065 ,  0.0656,  0.0644,  0.0645,
        0.0646,  0.0644,  0.0648,  0.0621,  0.0625,  0.0622,  0.0609,
        0.0618,  0.062 ,  0.0622,  0.0613,  0.0615,  0.0611,  0.0614,
        0.0628,  0.0618,  0.0621,  0.062 ,  0.0615,  0.0619,  0.0599,
        0.06  ,  0.0618,  0.0621,  0.0629,  0.0623,  0.0622,  0.0621,
        0.0621,  0.062 ,  0.0621,  0.0619,  0.062 ,  0.0618,  0.0619,
        0.0616,  0.0613,  0.0612,  0.0614,  0.0622,  0.0651,  0.0769,
        0.0884,  0.0948,  0.093 ,  0.0945,  0.0985,  0.103 ,  0.103 ,
        0.101 ,  0.102 ,  0.103 ,  0.0985,  0.0995,  0.102 ,  0.105 ,
        0.107 ,  0.1

In [294]:
shape=X.shape[1]

In [314]:
X.shape

(735, 6)

In [295]:
X_train,X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, train_size=0.8, shuffle=True, random_state=7)

In [296]:
estimators=[]

In [297]:
estimators.append(['mixmax',MinMaxScaler()])

In [298]:
estimators.append(['robust',RobustScaler()])

In [299]:
scale=Pipeline(estimators,verbose=True)

In [300]:
scale.fit(X_train)

[Pipeline] ............ (step 1 of 2) Processing mixmax, total=   0.0s
[Pipeline] ............ (step 2 of 2) Processing robust, total=   0.0s


Pipeline(memory=None,
         steps=[('mixmax', MinMaxScaler(copy=True, feature_range=(0, 1))),
                ['robust',
                 RobustScaler(copy=True, quantile_range=(25.0, 75.0),
                              with_centering=True, with_scaling=True)]],
         verbose=True)

In [301]:
X_train=scale.transform(X_train)

In [302]:
X_test=scale.transform(X_test)

In [303]:
def lr_schedule(epoch):
    """Learning Rate Schedule

    Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
    Called automatically every epoch as part of callbacks during training.

    # Arguments
        epoch (int): The number of epochs

    # Returns
        lr (float32): learning rate
    """
    lr = 1e-3
    if epoch > 180:
        lr *= 0.5e-3
    elif epoch > 160:
        lr *= 1e-3
    elif epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    print('Learning rate: ', lr)
    return lr

In [333]:
# define neural network model
def sequential_model(initializer='normal', activation='relu', neurons=300, NUM_FEATURES=shape):
    # create model
    model = Sequential()
    model.add(Dense(neurons, input_shape=(NUM_FEATURES,), kernel_initializer=initializer, activation=activation))
    model.add(Dense(neurons, activation=activation))
    model.add(Dense(neurons, activation=activation))
    model.add(Dense(1, activation=activation, kernel_initializer=initializer))
    model.add(T2V(neurons))
    model.add(Dense(1, activation=activation, kernel_initializer=initializer))
    #model.add(LSTM(neurons, activation=activation))
    # Compile model
    adam=keras.optimizers.Adam(lr=lr_schedule(0), amsgrad=True)
    #sgd=keras.optimizers.SGD(learning_rate=0.08, momentum=0.9, nesterov=False)
    model.compile(loss='logcosh', optimizer=adam, metrics=['mae'])
    return model

In [334]:
mcp_save = ModelCheckpoint('ANN_reg_seven_new.hdf5', save_best_only=True, monitor='val_loss', mode='auto')
earlyStopping = EarlyStopping(monitor='val_loss', patience=10,verbose=1, mode='auto')

In [335]:
regressor=KerasRegressor(build_fn=sequential_model, batch_size=64, epochs=100,verbose=1, shuffle=True,
                         validation_split=0.1,validation_freq=1,
                        use_multiprocessing=True) #callbacks=[mcp_save,earlyStopping])

In [336]:

regressor.fit(X_train,y_train)


Learning rate:  0.001
Train on 529 samples, validate on 59 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100


Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100


Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x7f4110e85ed0>

In [337]:
y_train_pred=regressor.predict(X_train)
#X_train[0:]
#list(zip(*X_train))[0]
y_train.shape



(588,)

In [338]:
r2_score(y_train, y_train_pred) #training score

0.5861677600893735

In [339]:
y_pred=regressor.predict(X_test)



In [340]:
pd.DataFrame(zip(y_test,y_pred),columns=['y_test','y_pred'])

Unnamed: 0,y_test,y_pred
0,6.6260,7.007493
1,5.7810,0.392555
2,4.9300,5.016972
3,6.7560,3.284644
4,16.4450,17.839413
...,...,...
142,0.0859,0.392555
143,14.6500,9.418877
144,3.2620,3.574455
145,5.0990,6.324788


In [341]:
r2=r2_score(y_test,y_pred) #testing score/ r^2
r2

0.561940050316341

In [136]:
mae=mean_absolute_error(y_test,y_pred) #mae
mae

2797.461182416228

In [137]:
rmse=np.sqrt(mean_squared_error(y_test,y_pred)) #rmse
rmse

4686.802277097698

In [138]:
#mape=mean_absolute_percentage_error(y_test,y_pred) #mape
#mape

In [139]:
pd.DataFrame(zip(['MAE','RMSE','R^2'],[mae,rmse,r2])).transpose()

Unnamed: 0,0,1,2
0,MAE,RMSE,R^2
1,2797.46,4686.8,-404.997


In [140]:
regressor.model.save('ANN_reg_seven_new.h5')

In [141]:
regressor.model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_16 (Dense)             (None, 300)               2100      
_________________________________________________________________
dense_17 (Dense)             (None, 300)               90300     
_________________________________________________________________
dense_18 (Dense)             (None, 300)               90300     
_________________________________________________________________
dense_19 (Dense)             (None, 1)                 301       
Total params: 183,001
Trainable params: 183,001
Non-trainable params: 0
_________________________________________________________________


In [275]:
### DEFINE T2V LAYER ###


class T2V(Layer):
    
    def __init__(self, output_dim=None, **kwargs):
        self.output_dim = output_dim
        super(T2V, self).__init__(**kwargs)
        
    def build(self, input_shape):

        self.W = self.add_weight(name='W',
                                shape=(input_shape[-1], self.output_dim),
                                initializer='uniform',
                                trainable=True)

        self.P = self.add_weight(name='P',
                                shape=(input_shape[1], self.output_dim),
                                initializer='uniform',
                                trainable=True)

        self.w = self.add_weight(name='w',
                                shape=(input_shape[1], 1),
                                initializer='uniform',
                                trainable=True)

        self.p = self.add_weight(name='p',
                                shape=(input_shape[1], 1),
                                initializer='uniform',
                                trainable=True)

        super(T2V, self).build(input_shape)
        
    def call(self, x):
        
        original = self.w * x + self.p
        sin_trans = K.sin(K.dot(x, self.W) + self.P)
        
        return K.concatenate([sin_trans, original], -1)
    
### CREATE GENERATOR FOR LSTM AND T2V ###

sequence_length = 30 # orig: 24 .  note, 24*7 = 168

def gen_sequence(id_df, seq_length, seq_cols):
    
    data_matrix = id_df[seq_cols].values
    num_elements = data_matrix.shape[0]

    for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):
        yield data_matrix[start:stop, :]

def gen_labels(id_df, seq_length, label):
    
    data_matrix = id_df[label].values
    num_elements = data_matrix.shape[0]
    
    return data_matrix[seq_length:num_elements, :]
### DEFINE MODEL STRUCTURES ###

def set_seed_TF2(seed):
    
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    
    
def T2V_NN(param, dim):
    
    inp = Input(shape=(dim,1))
    x = T2V(param['t2v_dim'])(inp)
    x = LSTM(param['unit'], activation=param['act'])(x)
    x = Dropout(0.5)(x)
    x = Dense(1)(x)
    
    m = Model(inp, x)
    m.compile(loss='mse', optimizer=Adam(lr=param['lr']))
    
    return m


def NN(param, dim):
    
    inp = Input(shape=(dim,1))
    x = LSTM(param['unit'], activation=param['act'])(inp)
    x = Dense(1)(x)
    
    m = Model(inp, x)
    m.compile(loss='mse', optimizer=Adam(lr=param['lr']))
    
    return m
### PREPARE DATA TO FEED MODELS ###
df["Data"] = df["Date"].dt.date
df["Ora solare"] = df["Date"].dt.hour
df.drop(["datetime"],axis=1,inplace=True, errors="ignore")
df = df.dropna()

print(df.shape)
df.head()

X, Y = [], []
for sequence in gen_sequence(df, sequence_length, ['priceUSD']):
    X.append(sequence)
    
for sequence in gen_labels(df, sequence_length, ['priceUSD']):
    Y.append(sequence)
    
X = np.asarray(X)
Y = np.asarray(Y)
### TRAIN TEST SPLIT ###

train_dim = int(0.9*len(df))
X_train, X_test = X[:train_dim], X[train_dim:]
y_train, y_test = Y[:train_dim], Y[train_dim:]

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

### DEFINE PARAM GRID FOR HYPERPARM OPTIMIZATION ###

base_param = {
    'unit': 64,
    't2v_dim': 64,
    'lr': 2e-3, 
    'act': 'relu', 
    'epochs': 100, # 200,
    'batch_size': 256
}

model = NN(param=base_param, dim=sequence_length)
model.fit(X_train, y_train, validation_split=0.2, shuffle=False, epochs=5000)
# pred_t2v = kgs_t2v.best_model.predict(X_test).ravel()
# pred_t2v = model.predict(X_test).ravel()
# print("MAE")
# mean_absolute_error(y_test.ravel(), pred_t2v)

(4020, 742)
(3618, 30, 1) (3618, 1)
(372, 30, 1) (372, 1)
Train on 2894 samples, validate on 724 samples
Epoch 1/5000
Epoch 2/5000
Epoch 3/5000
Epoch 4/5000
Epoch 5/5000
Epoch 6/5000
Epoch 7/5000
Epoch 8/5000

KeyboardInterrupt: 