In [1]:
import torch
import requests
import os
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch.utils.data import random_split

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
device

'cuda'

In [3]:
Noise_0_dataframe = pd.read_csv("Data/Assignment1/data_0_noise")
Noise_Low_dataframe = pd.read_csv("Data/Assignment1/data_Low_noise")
Noise_High_dataframe = pd.read_csv("Data/Assignment1/data_High_noise")

In [4]:
Noise_0_dataframe.columns

Index(['Open_n_val', 'High_n_val', 'Low_n_val', 'Close_n_val', 'Volume_n_val',
       'SMA_10_val', 'SMA_20_val', 'CMO_14_val', 'High_n-Low_n_val',
       'Open_n-Close_n_val', 'SMA_20-SMA_10_val', 'Close_n_slope_3_val',
       'Close_n_slope_5_val', 'Close_n_slope_10_val', 'Open_n_changelen_val',
       'High_n_changelen_val', 'Low_n_changelen_val', 'Close_n_changelen_val',
       'High_n-Low_n_changelen_val', 'Open_n-Close_n_changelen_val',
       'SMA_20-SMA_10_changelen_val', 'Close_n_slope_3_changelen_val',
       'Close_n_slope_5_changelen_val', 'Close_n_slope_10_changelen_val',
       'row_num', 'day', 'era', 'target_10_val', 'target_5_val'],
      dtype='object')

In [5]:
dataframe=Noise_High_dataframe
target_column_1="target_5_val"
target_column_2="target_10_val"

In [6]:
class_index = list(dataframe[target_column_1].unique())
def encode(value, class_index = class_index):
    return class_index.index(value)

dataframe[target_column_1] = dataframe[target_column_1].apply(encode)

In [7]:
class_index = list(dataframe[target_column_2].unique())
def encode(value, class_index = class_index):
    return class_index.index(value)

dataframe[target_column_2] = dataframe[target_column_2].apply(encode)

## Setting up Dataset & Dataloaders

In [8]:
class CustomDataset(Dataset):
    def __init__(self, dataframe, noise, transform=None, target_transform=None,drop = None,target=None):
        self.dataframe = dataframe
        if drop != None:
            self.X = dataframe.drop(drop, axis=1).values
        else:
            self.X = dataframe.values
        self.y = dataframe[target].values
        self.transform = transform
        self.target_transform = target_transform
        self.noise = noise

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        item, label = self.X[idx], self.y[idx]
        return item, label

    def get_noise(self):
        return self.noise

In [9]:
dataset = CustomDataset(dataframe, "0",drop = ["row_num","day","era","target_10_val","target_5_val","data_type"],target=[target_column_1,target_column_2])
Noise_train, Noise_test = random_split(dataset, [int(0.8 * len(dataset)), len(dataset) - int(0.8 * len(dataset))])

In [10]:
Noise_train_loader = DataLoader(Noise_train, batch_size=64, shuffle=True)
Noise_test_loader = DataLoader(Noise_test, batch_size=64, shuffle=True)

In [11]:
dataframe.columns

Index(['Open_n_val', 'High_n_val', 'Low_n_val', 'Close_n_val', 'Volume_n_val',
       'SMA_10_val', 'SMA_20_val', 'CMO_14_val', 'High_n-Low_n_val',
       'Open_n-Close_n_val', 'SMA_20-SMA_10_val', 'Close_n_slope_3_val',
       'Close_n_slope_5_val', 'Close_n_slope_10_val', 'Open_n_changelen_val',
       'High_n_changelen_val', 'Low_n_changelen_val', 'Close_n_changelen_val',
       'High_n-Low_n_changelen_val', 'Open_n-Close_n_changelen_val',
       'SMA_20-SMA_10_changelen_val', 'Close_n_slope_3_changelen_val',
       'Close_n_slope_5_changelen_val', 'Close_n_slope_10_changelen_val',
       'row_num', 'day', 'era', 'target_10_val', 'target_5_val', 'data_type'],
      dtype='object')

## TWO-MLP

In [12]:
import torch.nn as nn
import torch.nn.init as init

In [13]:
class TWO_MLP(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(TWO_MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.fc3 = nn.Linear(hidden_dim,output_dim)
        self.softmax=nn.Softmax()
        self.relu = nn.ReLU()
        self.sigm =nn.Sigmoid()
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        y_hat_1 = self.fc2(x)
        y_hat_1 = self.softmax(y_hat_1)
        y_hat_2 = self.fc3(x)
        y_hat_2 =self.softmax(y_hat_2)
        return y_hat_1,y_hat_2

In [14]:
import torch.nn as nn

class CustomLoss(nn.Module):
    def __init__(self,reg,no_of_class=12):
        super(CustomLoss, self).__init__()
        self.reg=reg
        self.no_of_class=12

    def forward(self, inputs, targets):
        p=torch.Tensor(inputs[0]).to(device).t()
        # Converting to 64 X 12 to 64 X 1
        # _,p=torch.max(p,1)
        targets=torch.eye(self.no_of_class).to(device)[targets].t()
        tou=torch.Tensor(inputs[1]).to(device)
        tou=tou.t()
        # print("tou",tou)
        # print(tou)
        loss_a= torch.t(targets)@torch.log(tou*(p-targets)+targets)
        loss_b=self.reg*torch.log(tou)
        loss_a=loss_a.diag().t()
        # print("Loss a",loss_a)
        # print("Loss b",loss_b)
        loss=loss_a+loss_b
        # print("loss",loss.shape)
        # print("return",loss.mean().shape)
        return -loss.mean()

    def custom_p(p):
        return p

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=10,verbose=True):
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    model.to(device)
    for epoch in range(num_epochs):
        model.train()
        running_train_loss = 0.0
        correct_train = 0
        total_train = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device).to(torch.float32), labels.to(device).to(torch.long)
            optimizer.zero_grad()
            outputs_0,outputs_1 = model(inputs)
            # print(len(outputs),len(outputs[0]))
            loss1 = criterion(outputs_0, labels[:,0])
            loss2 = criterion(outputs_1, labels[:,1])
            loss  = loss1 + loss2
            loss.backward()
            optimizer.step()
            running_train_loss += loss.item()
            _, predicted = torch.max(outputs_1, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels[:,1]).sum().item()

        train_loss = running_train_loss / len(train_loader)
        train_accuracy = correct_train / total_train
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)

        model.eval()
        running_val_loss = 0.0
        correct_val = 0
        total_val = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device).to(torch.float32), labels.to(device).to(torch.long)
                outputs_0,outputs_1 = model(inputs)
                loss = criterion(outputs_1, labels[:,1])
                running_val_loss += loss.item()

                _, predicted = torch.max(outputs_1, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels[:,1]).sum().item()

        val_loss = running_val_loss / len(val_loader)
        val_accuracy = correct_val / total_val
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

        print(f'Epoch [{epoch+1}/{num_epochs}], '
              f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, '
              f'Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}')

    # Plotting
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.plot(range(1, num_epochs + 1), train_losses, label='Train')
    plt.plot(range(1, num_epochs + 1), val_losses, label='Validation')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(range(1, num_epochs + 1), train_accuracies, label='Train')
    plt.plot(range(1, num_epochs + 1), val_accuracies, label='Validation')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()

    plt.show()

In [16]:
Model = TWO_MLP(24,128,5)
Model = Model.to(device)

In [17]:
optimizer = optim.Adam(Model.parameters(), lr=0.001, weight_decay=0.001)

In [18]:
train_model(Model,torch.nn.CrossEntropyLoss(), optimizer,Noise_train_loader, Noise_test_loader,num_epochs=10)

  y_hat_1 = self.softmax(y_hat_1)
  y_hat_2 =self.softmax(y_hat_2)


# Successive training on target_5 and target_10

In [None]:
class MLP_ONE(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(TWO_MLP, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.softmax=nn.Softmax()
        self.relu = nn.ReLU()
        self.sigm =nn.Sigmoid()
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        y_hat_1 = self.fc2(x)
        y_hat_1 = self.softmax(y_hat_1)
        return y_hat_1