# Environment Setup

In [1]:
## Standard libraries
import os
import time
import json
import math
import numpy as np
import scipy
from scipy.linalg import fractional_matrix_power as frac_mat_pow

## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

## Progress bar
from tqdm.notebook import tqdm

## PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
%load_ext tensorboard

import projection
device = projection.setup_gpu()

2023-12-20 21:25:40.635969: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-20 21:25:40.636027: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-20 21:25:40.637336: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-20 21:25:40.644020: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Using device ->  cpu


## Data Loading

In [2]:
problem_instance = torch.load("problem.pt")

N = problem_instance.N
T = problem_instance.T 
d = problem_instance.d
Mu = problem_instance.mu
Varsigma = problem_instance.varsigma 
Sigma = problem_instance.Sigma 
Lambda = problem_instance.Lambda 
memory = problem_instance.memory
X = problem_instance.X
Drift = problem_instance.Drift
Diffusion = problem_instance.Diffusion
Z = problem_instance.input_output_pairs

## Dataset creation for each model

In [3]:
class CNN_Dataset(data.Dataset):
    def __init__(self, X, Y):
        super().__init__()
        self.generate_dataset(X, Y)

    def generate_dataset(self, X, Y):
        self.data = X
        self.size = len(self.data)
        # labels = []
        # for index in range(len(Y)):
        #     label = []
        #     label.extend(Y[index][0])
        #     matrix_upper_values = []
        #     for row in range(Y[index][1].shape[0]):
        #         matrix_upper_values.extend(Y[index][1][row][row:])
        #     label.extend(matrix_upper_values)
        #     label = np.array(label)
        #     labels.append(label)
        # self.label = torch.from_numpy(np.array(labels)).type(torch.FloatTensor)
        self.data = Y

    def __len__(self):
        return self.size

    def __getitem__(self, index):
        data_point = self.data[index]
        data_label = self.label[index]
        return data_point, data_label

In [4]:
CNNs_datasets = []
for index in range(1,T+1):
    # X_Index = []
    # Y_Index = []
    # for n in range(N):
    #     X_Index.append(torch.cat((Z_N[n][0][index], Z_N[n][0][index+1]),0)) #Z_N[n][0][index]
    #     Y_Index.append(Z_N[n][1][index])
    # dataset_index = CNN_Dataset(X_Index, Y_Index)
    dataset_index = CNN_Dataset(Z[index][0], Z[index][1])
    CNNs_datasets.append(dataset_index)

In [None]:
# # TESTING
# dataset = CNNs_datasets[1]

# print("Size of dataset:", len(dataset))
# print("Dataset : ", [dataset[i] for i in range(len(dataset))])
# print(dataset[0])

In [None]:
# # TESTING
# data_loader = data.DataLoader(dataset, batch_size=1, shuffle=False, drop_last=True)

# for batch in data_loader:
#     data_inputs, data_labels = next(iter(data_loader))
#     print("Data inputs", data_inputs.shape, "\n", data_inputs)
#     print("Data labels", data_labels.shape, "\n", data_labels)

## Convolutional Network Architecture

### CNN Model Architecture Definition

In [5]:
class Affine_layer(nn.Module):
    def __init__(self, input_size: int):
        super(Affine_layer, self).__init__()
        # initiating the weights and biases
        self.weights = nn.Parameter(torch.rand(input_size), requires_grad=True)
        self.bias = nn.Parameter(torch.rand(input_size), requires_grad=True)

    def forward(self, input_data: torch.tensor):
        # simple elementwise multiplication and addition
        return ((self.weights * input_data) + self.bias)

In [6]:
class Exp_layer(nn.Module):
    def __init__(self):
        super(Exp_layer, self).__init__()

    def forward(self, input_data: torch.tensor):
        inp = input_data[0] # 1 x d(d+1)/2
        inp = inp[d:]
        inp = inp.to(device)
        temp = torch.zeros(d,d)
        temp = temp.to(device)

        indices = torch.triu_indices(d, d)
        indices.to(device)

        temp[indices[0], indices[1]] = inp
        matrix = torch.linalg.matrix_exp(temp)

        result = input_data
        result[0][d:] = matrix[indices[0], indices[1]]

        return (result)

In [7]:
class MainNetwork(nn.Module):

    def __init__(self, input_size):

        super().__init__()

        # Create the network based on the specified hidden layers
        layers = []

        layers += [nn.Linear(input_size,out_features=input_size*1000)] # Fully Connected layer

        # layers += [nn.Conv1d(in_channels=1, out_channels=1, kernel_size=3, padding='same', padding_mode='replicate')]
        layers += [nn.ReLU()]

        layers += [nn.Conv1d(in_channels=1, out_channels=1, kernel_size=3, padding='same', padding_mode='replicate')]
        layers += [nn.ReLU()]

        # layers += [nn.Conv1d(in_channels=1, out_channels=1, kernel_size=3, padding='same', padding_mode='replicate')]
        # layers += [nn.ReLU()]

        layers += [nn.Linear(in_features=input_size*1000, out_features= (d)+(d*(d+1)//2))] # Fully Connected layer

        layers += [Exp_layer()]

        self.layers = nn.Sequential(*layers)


    def forward(self, input_data):
        result = self.layers(input_data)
        return result

In [None]:
# # TESTING
# # Printing model architecture

# MainNetwork_Model = MainNetwork(d*2)
# print("==== Hyper Network Details ====\n",MainNetwork_Model,"\n")

# for name, param in MainNetwork_Model.named_parameters():
#     print(f"Parameter {name}, shape {param.shape}")

# num_parameters = sum(p.numel() for p in MainNetwork_Model.parameters())
# print("\n Number of parameters = ", num_parameters, "\n")

### Model Training and Evaluation Functions

In [8]:
class CustomLoss(nn.Module):
    def __init__(self):
        super(CustomLoss, self).__init__()

    def forward(self, predictions, targets):
        preds = predictions[0] # 1 x d(d+1)/2
        mu_pred = preds[:d]
        cov_pred = preds[d:]
        cov_pred = cov_pred.to(device)
        temp = torch.zeros(d,d)
        temp = temp.to(device)
        indices = torch.triu_indices(d, d)
        indices.to(device)
        temp[indices[0], indices[1]] = cov_pred
        cov_pred = temp

        trgt = targets[0] # 1 x d(d+1)/2
        mu_trgt = trgt[:d]
        cov_trgt = trgt[d:]
        cov_trgt = cov_trgt.to(device)
        temp = torch.zeros(d,d)
        temp = temp.to(device)
        indices = torch.triu_indices(d, d)
        indices.to(device)
        temp[indices[0], indices[1]] = cov_trgt
        cov_trgt = temp

        loss = 0.0
        # Mean Component
        loss += torch.mean(torch.pow(mu_pred-mu_trgt,2))
        # Covariant Component
        A = torch.matmul(torch.linalg.pinv(cov_pred), cov_trgt)
        A_eigen = torch.linalg.eig(A).eigenvalues
        A_eigen = torch.real(A_eigen)
        A_eigen = torch.log(A_eigen)**2
        A_eigen = torch.mean(A_eigen)
        loss += A_eigen/2

        return loss.mean()

In [9]:
from torch.nn.modules.loss import MSELoss
A = torch.eye(2)
A_eigen = torch.linalg.eig(A).eigenvalues
A_eigen = torch.real(A_eigen)
A_eigen = torch.log(A_eigen)**2
A_eigen = torch.mean(A_eigen)
A_eigen

tensor(0.)

In [10]:
def CNN_train_model_with_logger(model, optimizer, data_loader, loss_module, model_index, num_epochs):
    # Create TensorBoard logger
    logging_dir='logger/CNN_'+str(model_index)
    writer = SummaryWriter(logging_dir)
    model_plotted = False
    # Set model to train mode
    model.train()
    # Training loop
    for epoch in tqdm(range(num_epochs)):
        epoch_loss = 0.0
        for data_inputs, data_labels in data_loader:

            ## Step 1: Move input data to device (only strictly necessary if we use GPU)
            data_inputs = data_inputs.to(device)
            data_labels = data_labels.to(device)

            # For the very first batch, we visualize the computation graph in TensorBoard
            if not model_plotted:
                writer.add_graph(model, data_inputs)
                model_plotted = True

            ## Step 2: Run the model on the input data
            preds = model(data_inputs)
            preds = preds.squeeze(dim=1) # Output is [Batch size, 1], but we want [Batch size]

            ## Step 3: Calculate the loss
            loss = loss_module(preds, data_labels.float())

            ## Step 4: Perform backpropagation
            # Before calculating the gradients, we need to ensure that they are all zero.
            # The gradients would not be overwritten, but actually added to the existing ones.
            optimizer.zero_grad()
            # Perform backpropagation
            loss.backward()

            ## Step 5: Update the parameters
            optimizer.step()

            ## Step 6: Take the running average of the loss
            epoch_loss += loss.item()

        # Add average loss to TensorBoard
        epoch_loss /= len(data_loader)
        writer.add_scalar('training_loss',
                          epoch_loss,
                          global_step = epoch + 1)
    writer.close()

In [11]:
def CNN_train_model(model, optimizer, data_loader, loss_module, num_epochs):
    # Set model to train mode
    model.train()
    # Training loop
    for epoch in tqdm(range(num_epochs)):
        for data_inputs, data_labels in data_loader:

            ## Step 1: Move input data to device (only strictly necessary if we use GPU)
            data_inputs = data_inputs.to(device)
            data_labels = data_labels.to(device)

            ## Step 2: Run the model on the input data
            preds = model(data_inputs)
            preds = preds.squeeze(dim=1) # Output is [Batch size, 1], but we want [Batch size]

            ## Step 3: Calculate the loss
            loss = loss_module(preds, data_labels.float())

            ## Step 4: Perform backpropagation
            # Before calculating the gradients, we need to ensure that they are all zero.
            # The gradients would not be overwritten, but actually added to the existing ones.
            optimizer.zero_grad()
            # Perform backpropagation
            loss.backward()

            ## Step 5: Update the parameters
            optimizer.step()


In [12]:
def CNN_eval_model(model, data_loader, loss_module, model_index):
    # Create TensorBoard logger
    logging_dir='logger/CNN_'+str(model_index)
    writer = SummaryWriter(logging_dir)
    model_plotted = False

    # Set model to eval mode
    model.eval()

    data_index = 0
    num_preds = 0
    losses = []

    with torch.no_grad(): # Deactivate gradients for the following code
        for data_inputs, data_labels in data_loader:
            data_inputs = data_inputs.to(device)
            data_labels = data_labels.to(device)

            preds = model(data_inputs)
            preds = preds.squeeze(dim=1)
            loss = loss_module(preds, data_labels.float())
            losses.append(loss)

            num_preds += data_labels.shape[0]
            data_index += 1
            writer.add_scalar('eval_loss', loss, global_step = data_index)

    writer.close()
    Average_loss = sum(losses) / num_preds
    print("---- EVAL RESULTS ----\n","Average loss : ", Average_loss)

In [13]:
def CNN_predict(model, data):
    model.eval() # Set model to eval mode
    with torch.no_grad(): # Deactivate gradients for the following code
        data = data.to(device)
        preds = model(data)
        preds = preds.squeeze(dim=1)
    return preds

## Creating And Training The CNN Instances

In [14]:
def DataLoader_for_Model(model_index,  test_ratio=0.2, batch_size=1):
    full_dataset = CNNs_datasets[model_index]
    test_size = int(test_ratio * len(full_dataset))
    train_size = len(full_dataset) - test_size
    train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])
    train_data_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    test_data_loader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=False)
    return train_data_loader, test_data_loader

def Model_Maker():
    model = MainNetwork(2*d)
    model = model.to(device)
    return model

def Model_Trainer(model, train_data_loader, test_data_loader, model_index=0, with_logger=False, with_eval=False, num_epochs=100):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_module = nn.MSELoss()
    if with_logger:
        CNN_train_model_with_logger(model, optimizer, train_data_loader, loss_module, model_index, num_epochs)
    else:
        CNN_train_model(model, optimizer, train_data_loader, loss_module, num_epochs)
    if with_eval:
        CNN_eval_model(model, test_data_loader, loss_module, model_index)
    return model

def Model_Param_Extractor(model):
    paramete_vectors_sizes = []
    paramete_vectors_sizes_flatten = []
    paramete_vectors_values = []

    for param_tensor in model.state_dict():
        paramete_vectors_sizes.append(model.state_dict()[param_tensor].shape)
        paramete_vectors_sizes_flatten.append(torch.flatten(model.state_dict()[param_tensor]).shape[0])
        paramete_vectors_values += (torch.flatten(model.state_dict()[param_tensor]).tolist())

    model_params_and_info = [paramete_vectors_sizes, paramete_vectors_sizes_flatten, paramete_vectors_values]
    return model_params_and_info

def MainModel_Saver(model, model_index):
    if not os.path.exists("CNN_Models/"):
        os.mkdir("CNN_Models/")
    PATH = "./CNN_Models/CNN"+str(model_index)+".pt"
    torch.save(model.state_dict(), PATH)
    return

def MainModel_Loader(PATH):
    model = torch.load(PATH)
    model.eval()
    return model

In [15]:
number_of_CNNs = T
CNNs_parameters = []

for index in range(number_of_CNNs):
    print("="*20, "\n", "Start working on CNN", index + 1,  "out of ", number_of_CNNs)
    print("---- Creating Datasets ----")
    CNN_train_data, CNN_test_data = DataLoader_for_Model(index)
    print("---- Creating model instance ----")
    if index == 0:
        CNN_Model = Model_Maker()
    print("---- Training model instance ----")
    if index != 0:
        Model_Trainer(CNN_Model, CNN_train_data, CNN_test_data, index, with_logger=True, with_eval=True, num_epochs=10)
    print("---- Extracting model parameters ----")
    CNN_Params_and_Info = Model_Param_Extractor(CNN_Model)
    CNNs_parameters.append(CNN_Params_and_Info)
    print("---- Saving model state ----")
    MainModel_Saver(CNN_Model, index)
    print("---- Clearing GPU's cache memory ----")
    torch.cuda.empty_cache()
    print("Done with CNN", index, "\n")

 Start working on CNN 1 out of  100
---- Creating Datasets ----


AttributeError: 'CNN_Dataset' object has no attribute 'size'

In [None]:
!kill $(ps -e | grep 'tensorboard' | awk '{print $1}')
%tensorboard --logdir /content/logger/CNN_2/

In [None]:
!kill $(ps -e | grep 'tensorboard' | awk '{print $1}')
%tensorboard --logdir /content/logger/CNN_4/

## Saving Results

In [None]:
import shutil

torch.save(CNNs_parameters, 'CNNs_parameters.pt')
shutil.make_archive('CNN_Models', 'zip', './CNN_Models/')
shutil.make_archive('CNN_Logs', 'zip', './logger/')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# from google.colab import files

# files.download('CNNs_parameters.pt')
# files.download('CNN_Models.zip')
# files.download('CNN_Logs.zip')