In [1]:
import torch
import requests
import os
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch.utils.data import random_split
from tqdm import tqdm
device = torch.device(
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

In [2]:
Noise_0_dataframe = pd.read_csv("Data/Assignment1/data_0_noise")
Noise_Low_dataframe = pd.read_csv("Data/Assignment1/data_Low_noise")
Noise_High_dataframe = pd.read_csv("Data/Assignment1/data_High_noise")

In [133]:
dataframe=Noise_High_dataframe
validation_dataframe=Noise_High_dataframe
target_columns="target_10_val"

In [134]:
to_encode = "target_10_val"
class_index = list(dataframe[to_encode].unique())
def encode(value, class_index = class_index):
    return class_index.index(value)

dataframe[to_encode] = dataframe[to_encode].apply(encode)
validation_dataframe[to_encode] = validation_dataframe[to_encode].apply(encode)

In [135]:
class CustomDataset(Dataset):
    def __init__(self, dataframe, noise, transform=None, target_transform=None, drop=None, target=None):
        self.dataframe = dataframe
        if drop != None:
            self.X = dataframe.drop(drop, axis=1).values
        else:
            self.X = dataframe.values
        
        self.y = dataframe[target].values
        self.transform = transform
        self.target_transform = target_transform
        self.noise = noise

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        item, label = self.X[idx], self.y[idx]
        return item, label

    def get_noise(self):
        return self.noise

In [136]:
dataset = CustomDataset(dataframe, "0",drop = ["row_num","day","era","target_10_val","target_5_val","data_type"],target=target_columns)
Noise_train, Noise_test = random_split(dataset, [int(0.8 * len(dataset)), len(dataset) - int(0.8 * len(dataset))])
Noise_train_loader = DataLoader(Noise_train, batch_size=128, shuffle=True)
Noise_test_loader = DataLoader(Noise_test, batch_size=128, shuffle=True)

In [137]:
class EncoderClassifier(torch.nn.Module):
    def __init__(self,encoder,latent_dim,linear,subset_size,overlap,activation = torch.nn.ReLU()):
        super().__init__()
        self.encoder = encoder 
        self.linear = [torch.nn.Linear(linear[i],linear[i+1]) for i in range(len(linear)-1)]
        self.linear = torch.nn.Sequential(*[l for layer in self.linear for l in (layer, activation)])
        self.subset_size = subset_size
        self.overlap = overlap
        self.softmax = torch.nn.Softmax(dim = 1)
        self.att_layer = torch.nn.Linear(latent_dim,1)
        self.sigmoid = torch.nn.Sigmoid()
    
    def forward(self,X):
        subsets = []
        num_columns = X.shape[1]
        for i in range(0, num_columns-self.subset_size,self.subset_size-self.overlap):
            subsets.append(X[:,i:i+self.subset_size])
        
        preds = 0
        h = 0
        for subset in subsets:
            pred = self.encoder(subset)
            h = self.att_layer(pred)
            pred = self.linear(pred)
            pred = self.softmax(pred)
            preds += pred
        
        h = h/len(subsets)
        h = self.sigmoid(h)     
        preds = preds/len(subsets)
        
        return preds,h

In [138]:
class SubsetAutoencoder (torch.nn.Module):
    def __init__(self, encoder_sizes,decoder_sizes,activation = torch.nn.ReLU()):
        super().__init__()
        linear_encoder = [torch.nn.Linear(encoder_sizes[i],encoder_sizes[i+1]) for i in range(len(encoder_sizes)-1)]
        linear_decoder = [torch.nn.Linear(decoder_sizes[i],decoder_sizes[i+1]) for i in range(len(decoder_sizes)-1)]
        self.encoder = torch.nn.Sequential(*[l for layer in linear_encoder for l in (layer, activation)])
        self.decoder = torch.nn.Sequential(*[l for layer in linear_decoder for l in (layer, activation)])
        
    def forward(self,X):
        X = self.encoder(X)
        X = self.decoder(X)
        return X
    
    def get_encoder(self):
        return self.encoder
    
    def get_decoder(self):
        return self.decoder

In [139]:
import torch.nn as nn

class CustomLoss(nn.Module):
    def __init__(self,reg,no_of_class=12):
        super(CustomLoss, self).__init__()
        self.reg=reg
        self.no_of_class=no_of_class

    def forward(self, inputs, targets):
        p=torch.Tensor(inputs[0]).to(device).t()
        # Converting to 64 X 12 to 64 X 1
        # _,p=torch.max(p,1)
        targets=torch.eye(self.no_of_class).to(device)[targets].t()
        tou=torch.Tensor(inputs[1]).to(device)
        tou=tou.t()
        # print("tou",tou)
        # print(tou)
        loss_a= torch.t(targets)@torch.log(tou*(p-targets)+targets)
        loss_b=self.reg*torch.log(tou)
        loss_a=loss_a.diag().t()
        # print("Loss a",loss_a)
        # print("Loss b",loss_b)
        loss=loss_a+loss_b
        # print("loss",loss.shape)
        # print("return",loss.mean().shape)
        return -loss.mean()

    def custom_p(p):
        return p

In [141]:
def get_subsets(current_batch,subset_size,overlap):
        subsets = []
        num_columns = current_batch.shape[1]
        for i in range(0, num_columns-subset_size,subset_size-overlap):
            subsets.append(current_batch[:,i:i+subset_size])
        return subsets


def train_ae(model,criterion,optimizer,epochs,trainloader,testloader,subset_size = 10,overlap = 4,lr = 0.001 , verbose = True):
    optimizer = optimizer(model.parameters(), lr=lr)
    train_loss = []
    val_loss = []
    for epoch in range(epochs):
        current_train_loss = 0
        current_accuracy = []
        for data, _ in tqdm(trainloader,desc = "Training Epoch "+str(epoch)):
            data= data.to(device).float()
            subsets = get_subsets(data,subset_size,overlap)
            optimizer.zero_grad()
            recons = []
            subset_loss = 0
            for subset in subsets:
                output = model(subset)
                recons.append(output)
                subset_loss += criterion(data,output)
            subset_loss = subset_loss.mean()
            subset_loss.backward()
            optimizer.step()
        print(f"epoch-{epoch} loss:",subset_loss)
        
def train_classifier(model,criterion,optimizer,epochs,trainloader,testloader,lr=0.001,verbose = True,subset_size = 10,overlap = 2):
    optimizer = optimizer(model.parameters(), lr=lr)
    train_loss = []
    val_loss = []
    for epoch in range(epochs):
        running_train_loss = 0
        total_train = 0
        correct_train = 0
        current_accuracy = []
        for data, target in tqdm(trainloader,desc = "Training Epoch "+str(epoch)):
            data, target = data.to(device).float(), target.to(device).long()
            subsets = get_subsets(data,subset_size,overlap)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output,target)
            loss.backward()
            optimizer.step()
            
            _, predicted = torch.max(output[0], 1)
            total_train += target.size(0)
            correct_train += (predicted == target).sum().item()
            current_accuracy.append(correct_train/total_train)
            running_train_loss+=loss
        
        running_train_loss /= len(trainloader)    
        print(f"epoch-{epoch} loss:{running_train_loss} accuracy:{correct_train/total_train}")

In [142]:
model = SubsetAutoencoder(encoder_sizes=[12,32,16,8],decoder_sizes=[8,16,32,24])
model.to(device)

SubsetAutoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=12, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=16, bias=True)
    (3): ReLU()
    (4): Linear(in_features=16, out_features=8, bias=True)
    (5): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=8, out_features=16, bias=True)
    (1): ReLU()
    (2): Linear(in_features=16, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=24, bias=True)
    (5): ReLU()
  )
)

In [143]:
optimizer = torch.optim.Adam
criterion = torch.nn.MSELoss()
train_ae(model,criterion,optimizer,20,Noise_train_loader,Noise_test_loader,subset_size = 12,overlap = 2)

Training Epoch 0: 100%|██████████| 1560/1560 [00:07<00:00, 222.23it/s]


epoch-0 loss: tensor(0.2082, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 1: 100%|██████████| 1560/1560 [00:06<00:00, 224.30it/s]


epoch-1 loss: tensor(0.2196, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 2: 100%|██████████| 1560/1560 [00:07<00:00, 222.41it/s]


epoch-2 loss: tensor(0.2081, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 3: 100%|██████████| 1560/1560 [00:06<00:00, 224.28it/s]


epoch-3 loss: tensor(0.2107, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 4: 100%|██████████| 1560/1560 [00:06<00:00, 236.96it/s]


epoch-4 loss: tensor(0.1855, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 5: 100%|██████████| 1560/1560 [00:06<00:00, 236.49it/s]


epoch-5 loss: tensor(0.1887, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 6: 100%|██████████| 1560/1560 [00:06<00:00, 224.20it/s]


epoch-6 loss: tensor(0.1974, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 7: 100%|██████████| 1560/1560 [00:06<00:00, 227.68it/s]


epoch-7 loss: tensor(0.1941, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 8: 100%|██████████| 1560/1560 [00:06<00:00, 226.97it/s]


epoch-8 loss: tensor(0.1831, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 9: 100%|██████████| 1560/1560 [00:06<00:00, 225.70it/s]


epoch-9 loss: tensor(0.1848, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 10: 100%|██████████| 1560/1560 [00:06<00:00, 232.29it/s]


epoch-10 loss: tensor(0.1798, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 11: 100%|██████████| 1560/1560 [00:06<00:00, 237.91it/s]


epoch-11 loss: tensor(0.1791, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 12: 100%|██████████| 1560/1560 [00:06<00:00, 241.86it/s]


epoch-12 loss: tensor(0.1803, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 13: 100%|██████████| 1560/1560 [00:06<00:00, 229.80it/s]


epoch-13 loss: tensor(0.1822, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 14: 100%|██████████| 1560/1560 [00:07<00:00, 221.07it/s]


epoch-14 loss: tensor(0.1783, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 15: 100%|██████████| 1560/1560 [00:06<00:00, 248.91it/s]


epoch-15 loss: tensor(0.1752, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 16: 100%|██████████| 1560/1560 [00:06<00:00, 243.24it/s]


epoch-16 loss: tensor(0.1870, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 17: 100%|██████████| 1560/1560 [00:06<00:00, 241.99it/s]


epoch-17 loss: tensor(0.1795, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 18: 100%|██████████| 1560/1560 [00:06<00:00, 239.00it/s]


epoch-18 loss: tensor(0.1693, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 19: 100%|██████████| 1560/1560 [00:06<00:00, 241.64it/s]

epoch-19 loss: tensor(0.1825, device='cuda:0', grad_fn=<MeanBackward0>)





In [144]:
encoder = model.get_encoder()

In [145]:
classifier = EncoderClassifier(encoder,8,[8,5],12,6)
classifier = classifier.to(device)
print(classifier)

EncoderClassifier(
  (encoder): Sequential(
    (0): Linear(in_features=12, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=16, bias=True)
    (3): ReLU()
    (4): Linear(in_features=16, out_features=8, bias=True)
    (5): ReLU()
  )
  (linear): Sequential(
    (0): Linear(in_features=8, out_features=5, bias=True)
    (1): ReLU()
  )
  (softmax): Softmax(dim=1)
  (att_layer): Linear(in_features=8, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [146]:
train_classifier(classifier,CustomLoss(50,no_of_class=5),torch.optim.Adam,20,Noise_train_loader,Noise_test_loader,subset_size = 12,overlap = 6)

Training Epoch 0: 100%|██████████| 1560/1560 [00:08<00:00, 181.75it/s]


epoch-0 loss:2.7344799041748047 accuracy:0.39201221955128207


Training Epoch 1: 100%|██████████| 1560/1560 [00:13<00:00, 119.30it/s]


epoch-1 loss:1.2297755479812622 accuracy:0.4793569711538462


Training Epoch 2: 100%|██████████| 1560/1560 [00:07<00:00, 210.55it/s]


epoch-2 loss:1.1868349313735962 accuracy:0.5060546875


Training Epoch 3: 100%|██████████| 1560/1560 [00:07<00:00, 196.48it/s]


epoch-3 loss:1.1596285104751587 accuracy:0.527774439102564


Training Epoch 4: 100%|██████████| 1560/1560 [00:08<00:00, 191.75it/s]


epoch-4 loss:1.1336559057235718 accuracy:0.5445362580128205


Training Epoch 5: 100%|██████████| 1560/1560 [00:07<00:00, 195.05it/s]


epoch-5 loss:1.1135388612747192 accuracy:0.551747796474359


Training Epoch 6: 100%|██████████| 1560/1560 [00:07<00:00, 203.88it/s]


epoch-6 loss:1.1018449068069458 accuracy:0.5546173878205128


Training Epoch 7: 100%|██████████| 1560/1560 [00:08<00:00, 191.10it/s]


epoch-7 loss:1.0944889783859253 accuracy:0.5579577323717949


Training Epoch 8: 100%|██████████| 1560/1560 [00:07<00:00, 196.07it/s]


epoch-8 loss:1.0870729684829712 accuracy:0.5611378205128205


Training Epoch 9: 100%|██████████| 1560/1560 [00:13<00:00, 116.57it/s]


epoch-9 loss:1.0798307657241821 accuracy:0.5624048477564103


Training Epoch 10: 100%|██████████| 1560/1560 [00:11<00:00, 138.16it/s]


epoch-10 loss:1.0744620561599731 accuracy:0.5659104567307692


Training Epoch 11: 100%|██████████| 1560/1560 [00:09<00:00, 170.93it/s]


epoch-11 loss:1.0699840784072876 accuracy:0.5660606971153846


Training Epoch 12: 100%|██████████| 1560/1560 [00:08<00:00, 183.97it/s]


epoch-12 loss:1.0650140047073364 accuracy:0.5678986378205129


Training Epoch 13: 100%|██████████| 1560/1560 [00:07<00:00, 208.43it/s]


epoch-13 loss:1.061963438987732 accuracy:0.5717247596153846


Training Epoch 14: 100%|██████████| 1560/1560 [00:08<00:00, 176.96it/s]


epoch-14 loss:1.058691143989563 accuracy:0.5733373397435897


Training Epoch 15: 100%|██████████| 1560/1560 [00:07<00:00, 198.61it/s]


epoch-15 loss:1.0545731782913208 accuracy:0.5752453926282052


Training Epoch 16: 100%|██████████| 1560/1560 [00:07<00:00, 204.68it/s]


epoch-16 loss:1.0525054931640625 accuracy:0.5768529647435897


Training Epoch 17: 100%|██████████| 1560/1560 [00:07<00:00, 210.82it/s]


epoch-17 loss:1.0499213933944702 accuracy:0.5791165865384615


Training Epoch 18: 100%|██████████| 1560/1560 [00:08<00:00, 186.79it/s]


epoch-18 loss:1.0466463565826416 accuracy:0.581064703525641


Training Epoch 19: 100%|██████████| 1560/1560 [00:07<00:00, 196.02it/s]

epoch-19 loss:1.0435656309127808 accuracy:0.5849909855769231



