In [None]:
full_data_folder = '/content/drive/MyDrive/MEAM 517 Project/Training_Data/fullData.npy'
save_nn_models_folder = '/content/drive/MyDrive/MEAM 517 Project/saved_models'
save_numpy_files_folder = '/content/drive/MyDrive/MEAM 517 Project/saved_numpy'

In [None]:
CrossVal = False
Training = True #training after cross val. False if trained neural net saved

In [None]:
import torch
import numpy as np

In [None]:
np.random.seed(1234)
torch.manual_seed(1234)

<torch._C.Generator at 0x7f3aac4babb8>

In [None]:
if torch.cuda.is_available():
    device = torch.device("cpu")
    #device = torch.device("cuda:0") 
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")

Running on the GPU


# **NEURAL NETWORK**

In [None]:
import torch.nn as nn
import torch.nn.functional as F

In [None]:
class NETWORK(nn.Module):
    def __init__(self, input_dim, output_dim, n_layers=None, hidden_layer_size=None):
        super(NETWORK, self).__init__()

        self.n_layers = n_layers
        self.hidden_layer_size = hidden_layer_size

        self.hidden = nn.ModuleList()
        h_sizes = self.hidden_layer_set(input_dim, output_dim)

        for k in range(len(h_sizes)-2):
            self.hidden.append(nn.Linear(h_sizes[k], h_sizes[k+1]))

        self.out = nn.Linear(h_sizes[-2], h_sizes[-1])


    def forward(self, x):
        for layer in self.hidden:
            x = F.relu(layer(x))
        horizon_length_pred = self.out(x)
        return horizon_length_pred
    
    def hidden_layer_set(self, input_dim, output_dim):
        h_sizes = [input_dim]
        for i in range(self.n_layers):
            h_sizes.append(self.hidden_layer_size)
        h_sizes.append(output_dim)
        return h_sizes

#**OPTIMIZATION**

In [None]:
from torch.optim import Adam
from time import time
from tqdm.notebook import tqdm

from torch.utils.data import random_split, TensorDataset, DataLoader
from sklearn.model_selection import KFold

In [None]:
class TRAIN():
    def __init__(self, input_dim, output_dim, max_epochs, state_dict_folder, n_layers_set=None, 
                 hidden_layer_size_set=None, lr_set=None, lr_reduction_set=None, 
                 patience_set=None, batch_size_set=None, network=None, n_layers_optimal=None, 
                 hidden_layer_size_optimal=None, lr_optimal=None, lr_reduction_optimal=None, 
                 patience_optimal=None, batch_size_optimal=None):
        self.net = network
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.max_epochs = max_epochs

        self.optimizer = None
        self.net_state_path = state_dict_folder+'/zador.pth'
        self.error_tracker = 10000 

        self.n_layers_set = n_layers_set
        self.hidden_layer_size_set = hidden_layer_size_set
        self.lr_set = lr_set
        self.lr_reduction_set = lr_reduction_set
        self.patience_set = patience_set
        self.batch_size_set = batch_size_set

        self.n_layers_optimal = n_layers_optimal 
        self.hidden_layer_size_optimal = hidden_layer_size_set
        self.lr_optimal = lr_optimal
        self.lr_reduction_optimal = lr_reduction_optimal
        self.patience_optimal = patience_optimal
        self.batch_size_optimal = batch_size_optimal

        self.lr=None

        try:
            self.CV_error_tracker = np.zeros(( len(n_layers_set), len(hidden_layer_size_set), len(lr_set), len(lr_reduction_set), len(patience_set), len(batch_size_set) ))
        except:
            pass

    def train(self, X, y, train_split, n_layers, hidden_layer_size, lr, lr_reduction, patience, batch_size, print_train=True):
        self.lr = lr
        self.net = NETWORK(self.input_dim, self.output_dim, n_layers, hidden_layer_size).to(device)
        self.optimizer = Adam(self.net.parameters(), lr=self.lr)
        epoch_worsening_count = 0

        train_loader, validation_loader = self.generate_data_loader(X, y, batch_size, train_split)


        for epoch in range(self.max_epochs):
            for _, data in enumerate(tqdm(train_loader) if print_train else train_loader):
                
                inputs, target = data

                self.optimizer.zero_grad()
                prediction = self.net(inputs.float().to(device))
                loss = self.loss_function(prediction, target.to(device).float())
                loss.backward()
                self.optimizer.step()
                            
         
            if epoch % patience == 0:
                epoch_mse = self.model_error(validation_loader)
                if print_train: print('OLD MSE:', self.error_tracker, '\nCURRENT MSE:', epoch_mse)
                if epoch_mse<self.error_tracker:
                    epoch_worsening_count = 0
                    if print_train: print('MODEL WAS IMPROVED\n', 'CURRENT LEARNING RATE:', self.lr) 
                    self.save_checkpoint(epoch_mse, self.lr, self.net.state_dict(), self.net_state_path)
                else:
                    epoch_worsening_count += 1
                    if print_train: print('MODEL WORSENED -> RESET MODEL\n', 'NEW LEARNING RATE:', self.lr*lr_reduction) 
                    self.load_checkpoint(self.net_state_path)
                    self.save_checkpoint(self.error_tracker, self.lr*lr_reduction, self.net.state_dict(), self.net_state_path)
                    self.lr = self.lr*lr_reduction
                    self.optimizer = Adam(self.net.parameters(), lr=self.lr) #error

                if epoch_worsening_count > 3:
                    print('Model Trained (Early Stopping).')
                    break
            if epoch == (self.max_epochs-1): print('Model Trained (Max Epochs).')
        self.error_tracker = 10000 


    def cross_validation(self, n_splits, X, y):
        kf = KFold(n_splits = n_splits)
        num_models_cv = len(n_layers_set) * len(hidden_layer_size_set) * len(lr_set) * len(lr_reduction_set) * len(patience_set) * len(batch_size_set)
        cv_i = 0
        for par1, n_layers in enumerate(self.n_layers_set):
            for par2, hidden_layer_size in enumerate(self.hidden_layer_size_set):
                for par3, lr in enumerate(self.lr_set):
                    for par4, lr_reduction in enumerate(self.lr_reduction_set):
                        for par5, patience in enumerate(self.patience_set):
                            for par6, batch_size in enumerate(self.batch_size_set):
                                CV_error = 0
                                for train_index, test_index in kf.split(X):
                                    X_train, X_test = X[train_index], X[test_index]
                                    y_train, y_test = y[train_index], y[test_index]
                                    self.train(X, y, train_split=0.8, n_layers=n_layers, hidden_layer_size=hidden_layer_size, lr=lr, lr_reduction=lr_reduction, patience=patience, batch_size=batch_size, print_train=False)
                                    test_loader = self.generate_data_loader(X_test, y_test, batch_size)
                                    CV_error_i = self.model_error(test_loader)
                                    CV_error += CV_error_i
                                self.CV_error_tracker[par1, par2, par3, par4, par5, par6] = CV_error / n_splits
                                cv_i += 1
                                print('(', cv_i, '/', num_models_cv, ')     CV-Error:', CV_error / n_splits)

        max_par1, max_par2, max_par3, max_par4, max_par5, max_par6 = np.unravel_index(np.argmin(self.CV_error_tracker), self.CV_error_tracker.shape)

        self.n_layers_optimal = self.n_layers_set[max_par1] 
        self.hidden_layer_size_optimal = self.hidden_layer_size_set[max_par2]
        self.lr_optimal = self.lr_set[max_par3]
        self.lr_reduction_optimal = self.lr_reduction_set[max_par4]
        self.patience_optimal = self.patience_set[max_par5]
        self.batch_size_optimal = self.batch_size_set[max_par6]
                                    
    def loss_function(self, prediction, target):
        loss = nn.MSELoss()
        return loss(prediction.float(), target.float())

    def model_error(self, model_test_loader):
        MSE = 0
        for data in model_test_loader:
            X, y = data
            prediction = self.net(X.float().to(device))
            MSE += np.sum( (prediction.detach().cpu().numpy()-y.detach().cpu().numpy())**2 ) 
        MSE = MSE / len(model_test_loader.dataset)

        return MSE**0.5

    def save_checkpoint(self, error, learning_rate, model_state_dict, PATH):
        torch.save({'error_tracker': error,
                    'learning_rate': learning_rate,
                    'net_state_dict': model_state_dict
                    }, PATH)
        self.load_checkpoint(PATH)
        
    def load_checkpoint(self, PATH):
        checkpoint = torch.load(PATH)
        self.net.load_state_dict(checkpoint['net_state_dict'])
        self.error_tracker = checkpoint['error_tracker']
        self.lr = checkpoint['learning_rate']
    
    def generate_data_loader(self, X, y, batch_size, train_split=None):
        dataset = TensorDataset(torch.tensor(X), torch.tensor(y))
        
        try:
            dataset_size = X.shape[0]
            train_size = int(train_split * dataset_size)
            split_size = dataset_size - train_size
            data_set1, data_set2 = random_split(dataset, [train_size, split_size])
            data_loader1 = DataLoader(data_set1, batch_size=batch_size, shuffle=True)
            data_loader2 = DataLoader(data_set2, batch_size=batch_size, shuffle=False)
            return data_loader1, data_loader2
        except:
            data_loader1 = DataLoader(dataset, batch_size=batch_size, shuffle=False)
            return data_loader1

#**MAIN**

##DATA PREPERATION

In [None]:
data = np.load(full_data_folder)

In [None]:
rows_with_nan = []
for i in range(data.shape[0]):
    if np.isnan(data[i, :]).any(): rows_with_nan.append(i)
data = np.delete(data, rows_with_nan, 0)

X, y = data[:, :-3], data[:, -1]
y = y[:, np.newaxis]
print(X.shape, y.shape)
print('NUMBER OF FEATURES:', X.shape[1])
print('      DATASET SIZE:', X.shape[0])

(43200, 52) (43200, 1)
NUMBER OF FEATURES: 52
      DATASET SIZE: 43200


##CROSS VALIDATION

CROSS VALIDATION PARAMETERS

In [None]:
input_dim = X.shape[1]
output_dim = y.shape[1]
max_epochs = 50
state_dict_folder = save_nn_models_folder

n_layers_set = [1, 2, 3]
hidden_layer_size_set = [50, 100, 200]
lr_set = [0.001]
lr_reduction_set = [1/5, 1/10]
patience_set = [1, 3]
batch_size_set = [64, 128, 256]
n_splits = 3

CROSS VALIDATION

In [None]:
if CrossVal:
    train = TRAIN(input_dim=input_dim, output_dim=output_dim, max_epochs=max_epochs, 
                state_dict_folder=state_dict_folder, n_layers_set=n_layers_set, 
                hidden_layer_size_set=hidden_layer_size_set, lr_set=lr_set, lr_reduction_set=lr_reduction_set, 
                patience_set=patience_set, batch_size_set=batch_size_set)

    train.cross_validation(n_splits, X, y)

    print('  Optimal Number of Layer :', train.n_layers_optimal)
    print('      Optimal Layer Sizes :', train.hidden_layer_size_optimal)
    print('       Optimal Initial LR :', train.lr_optimal)
    print('Optimal LR Reduction Rate :', train.lr_reduction_optimal)
    print('         Optimal Patience :', train.patience_optimal)
    print('        Optimal Bach Size :', train.batch_size_optimal)
    np.save('/content/drive/MyDrive/MEAM517_project/saved_numpy/' + 'optimal_hyp.npy', np.array([train.n_layers_optimal,
                                                                                                train.hidden_layer_size_optimal,
                                                                                                train.lr_optimal,
                                                                                                train.lr_reduction_optimal,
                                                                                                train.patience_optimal,
                                                                                                train.batch_size_optimal]))

##TRAINING

**COLLECTING CV FINDINGS**
- CV was split across the selected batch size to run in parallel 
- Saved information is:
    - Cross validation error of entire process
    - Minimum cross validation error of entire process
    - Optimal hyperparameters of entire process
- Cross vaildation information:
    - Hyperparameters:
            n_layers_set = [1, 2, 3]
            hidden_layer_size_set = [50, 100, 200]
            lr_set = [0.001]
            lr_reduction_set = [1/5, 1/10]
            patience_set = [1, 3]
            batch_size_set = [64, 128, 256]
    - {CV1, CV2, CV3} : batch_size_set{64, 128, 256}

        -> CV split accordingly

In [None]:
CV1 = np.load(save_numpy_files_folder+ '/Griffon_2_CV_Scores.npy')
Min1 = np.load(save_numpy_files_folder+ '/Griffon_2_Minimum_Error.npy')
Opt1 = np.load(save_numpy_files_folder+ '/Griffon_2_optimal_hyp.npy')
Opt1 = np.delete(Opt1, 2, 0)

CV2 = np.load(save_numpy_files_folder+ '/Zador_1_CV_Scores.npy')
Min2 = np.load(save_numpy_files_folder+ '/Zador_1_Minimum_Error.npy')
Opt2 = np.load(save_numpy_files_folder+ '/Zador_1optimal_hyp.npy')
Opt2 = np.delete(Opt2, 2, 0)

CV3 = np.load(save_numpy_files_folder+ '/Zador_2_CV_Scores.npy')
Min3 = np.load(save_numpy_files_folder+ '/Zador_2_Minimum_Error.npy')
Opt3 = np.load(save_numpy_files_folder+ '/Zador_2optimal_hyp.npy')
Opt3 = np.delete(Opt3, 2, 0)

In [None]:
min1 = np.min(CV1[0, :, :, :, :, :])
min1

0.3231479416877929

In [None]:
min2 = np.min(CV2[0, :, :, :, :, :])
min2

0.35635475745214423

In [None]:
min3 = np.min(CV3[0, :, :, :, :, :])
min3

0.33633621671847225

In [None]:
argmin1 =  np.unravel_index(np.argmin(CV1[0, :, :, :, :, :]), CV1[0, :, :, :, :, :].shape)
argmin1

(2, 0, 0, 1, 0)

In [None]:
import pandas as pd

In [None]:
Opt_set = np.vstack((Opt1, Opt2, Opt3))
Opt_set = Opt_set.T
Opt_df_set = pd.DataFrame(data=Opt_set, index = ['Number of Layers', 'Layer Size', 'LR Reuction Rate', 'Patience', 'Batch Size'], columns=['Opt1', 'Opt2', 'Opt3'])
Opt_df_set

Unnamed: 0,Opt1,Opt2,Opt3
Number of Layers,3.0,2.0,2.0
Layer Size,200.0,200.0,200.0
LR Reuction Rate,0.2,0.1,0.2
Patience,3.0,3.0,3.0
Batch Size,64.0,128.0,256.0


In [None]:
Min_set = np.hstack((Min1, Min2, Min3))
Min_set = Min_set[np.newaxis, :]
Min_df = pd.DataFrame(data=Min_set, index = [''], columns=['Opt1', 'Opt2', 'Opt3'])
Min_df

Unnamed: 0,Opt1,Opt2,Opt3
,0.230546,0.26166,0.293718


In [None]:
opt_min_index = np.argmin(Min_set)
Opt = Opt_set[:, opt_min_index]
Opt_df = pd.DataFrame(data=Opt, index = ['Number of Layers', 'Layer Size', 'LR Reuction Rate', 'Patience', 'Batch Size'], columns=['Optimal Hyperparameters'])
Opt_df

Unnamed: 0,Optimal Hyperparameters
Number of Layers,3.0
Layer Size,200.0
LR Reuction Rate,0.2
Patience,3.0
Batch Size,64.0


In [None]:
print(Opt)

[  3.  200.    0.2   3.   64. ]


In [None]:
Opt = [1., 200., 0.2, 3., 64.]
print(Opt)

[1.0, 200.0, 0.2, 3.0, 64.0]


In [None]:
if Training:
    max_epochs = 200
    train = TRAIN(input_dim=input_dim, output_dim=output_dim, max_epochs=max_epochs, state_dict_folder=state_dict_folder)

    train.n_layers_optimal, train.hidden_layer_size_optimal, train.lr_reduction_optimal, train.patience_optimal, train.batch_size_optimal = Opt
    train.lr_optimal = 0.001
    train.n_layers_optimal = int(train.n_layers_optimal)
    train.hidden_layer_size_optimal = int(train.hidden_layer_size_optimal)
    train.patience_optimal = int(train.patience_optimal)
    train.batch_size_optimal = int(train.batch_size_optimal)

    print('  Optimal Number of Layer :', train.n_layers_optimal)
    print('      Optimal Layer Sizes :', train.hidden_layer_size_optimal)
    print('       Optimal Initial LR :', train.lr_optimal)
    print('Optimal LR Reduction Rate :', train.lr_reduction_optimal)
    print('         Optimal Patience :', train.patience_optimal)
    print('        Optimal Bach Size :', train.batch_size_optimal)

  Optimal Number of Layer : 1
      Optimal Layer Sizes : 200
       Optimal Initial LR : 0.001
Optimal LR Reduction Rate : 0.2
         Optimal Patience : 3
        Optimal Bach Size : 64


DEFINING HYPERPARAMETERS

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)
if Training:
    train_split = 0.8
    n_layers = train.n_layers_optimal
    hidden_layer_size = train.hidden_layer_size_optimal
    lr = train.lr_optimal
    lr_reduction = train.lr_reduction_optimal
    patience = train.patience_optimal
    batch_size = train.batch_size_optimal

    train.train(X_train, y_train, train_split, n_layers, hidden_layer_size, lr, lr_reduction, patience, batch_size, print_train=True)

Model Trained (Max Epochs).


In [None]:
torch.save({'Layer Number': train.n_layers_optimal,
            'Layer Size': train.hidden_layer_size_optimal,
            'net_state_dict': train.net.state_dict()
            }, state_dict_folder+'/Final_Model_v3.pth')

##PREDICTIONS

In [None]:
checkpoint = torch.load(state_dict_folder+'/Final_Model_v2.pth')
n_layers = checkpoint['Layer Number']
hidden_layer_size = checkpoint['Layer Size']
net = NETWORK(input_dim, output_dim, n_layers=n_layers, hidden_layer_size=hidden_layer_size)
net.load_state_dict(checkpoint['net_state_dict'])

<All keys matched successfully>

In [None]:
print(n_layers)\

1


In [None]:
testset = TensorDataset(torch.tensor(X_test), torch.tensor(y_test))
test_loader = DataLoader(testset, batch_size=124, shuffle=False)
print(X.shape)

(43200, 52)


In [None]:
    def model_error(self, model_test_loader):
        MSE = 0
        for data in model_test_loader:
            X, y = data
            prediction = self.net(X.float().to(device))
            MSE += np.sum( (prediction.detach().cpu().numpy()-y.detach().cpu().numpy())**2 ) 
        MSE = MSE / len(model_test_loader.dataset)

        return MSE**0.5

In [None]:
MSE = 0

for data in test_loader:
    X_, y_ = data

    prediction = net(X_.float().to(device))
    
    MSE += np.sum( (prediction.detach().cpu().numpy()-y_.detach().cpu().numpy())**2 ) 
MSE = MSE / len(test_loader.dataset)
RMSE = MSE**0.5

In [None]:
print(RMSE)

0.38611365709542383
