In [3]:
import torch
import requests
import os
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch.utils.data import random_split
from tqdm import tqdm
device = torch.device(
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

In [5]:
Noise_0_dataframe = pd.read_csv("../Data/Assignment1/data_0_noise")
Noise_Low_dataframe = pd.read_csv("../Data/Assignment1/data_Low_noise")
Noise_High_dataframe = pd.read_csv("../Data/Assignment1/data_High_noise")

In [40]:
dataframe=Noise_Low_dataframe
validation_dataframe=Noise_High_dataframe
target_columns="target_10_val"

In [42]:
to_encode = "target_10_val"
class_index = list(dataframe[to_encode].unique())
def encode(value, class_index = class_index):
    return class_index.index(value)

dataframe[to_encode] = dataframe[to_encode].apply(encode)
validation_dataframe[to_encode] = validation_dataframe[to_encode].apply(encode)

In [43]:
class CustomDataset(Dataset):
    def __init__(self, dataframe, noise, transform=None, target_transform=None, drop=None, target=None):
        self.dataframe = dataframe
        if drop != None:
            self.X = dataframe.drop(drop, axis=1).values
        else:
            self.X = dataframe.values
        
        self.y = dataframe[target].values
        self.transform = transform
        self.target_transform = target_transform
        self.noise = noise

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        item, label = self.X[idx], self.y[idx]
        return item, label

    def get_noise(self):
        return self.noise

In [44]:
dataset = CustomDataset(dataframe, "0",drop = ["row_num","day","era","target_10_val","target_5_val","data_type"],target=target_columns)
Noise_train, Noise_test = random_split(dataset, [int(0.8 * len(dataset)), len(dataset) - int(0.8 * len(dataset))])
Noise_train_loader = DataLoader(Noise_train, batch_size=128, shuffle=True)
Noise_test_loader = DataLoader(Noise_test, batch_size=128, shuffle=True)

In [56]:
class EncoderClassifier(torch.nn.Module):
    def __init__(self, encoder, latent_dim, layers, subset_size, overlap, activation=torch.nn.ReLU()):
        super().__init__()
        self.encoder = encoder
        self.layers = []
        linear = [torch.nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1)]
        self.latent_layer = torch.nn.Linear(latent_dim, layers[0])
        
        for i in range(len(linear)):
            if (i == 0 or i == len(linear)-1):
                self.layers.append(linear[i])
            else:
                self.layers.append(linear[i])
                self.layers.append(activation)

        self.layers = torch.nn.Sequential(*self.layers)
        self.subset_size = subset_size
        self.overlap = overlap
        self.softmax = torch.nn.Softmax(dim=1)
        self.att_layer = torch.nn.Linear(latent_dim, 1)
        self.sigmoid = torch.nn.Sigmoid()
        self.lrelu = torch.nn.LeakyReLU()

    def forward(self, X):
        subsets = []
        num_columns = X.shape[1]
        for i in range(0, num_columns-self.subset_size, self.subset_size-self.overlap):
            subsets.append(X[:, i:i+self.subset_size])

        preds = 0
        h = 0
        for subset in subsets:
            pred = self.encoder(subset)
            pred = self.latent_layer(pred)
            h = self.att_layer(pred)
            pred = self.lrelu(pred)
            pred = self.layers(pred)
            pred = self.softmax(pred)
            preds += pred

        h = h/len(subsets)
        h = self.sigmoid(h)
        preds = preds/len(subsets)

        return preds, h

In [22]:
class SubsetAutoencoder (torch.nn.Module):
    def __init__(self, encoder_sizes,decoder_sizes,activation = torch.nn.ReLU()):
        super().__init__()
        linear_encoder = [torch.nn.Linear(encoder_sizes[i],encoder_sizes[i+1]) for i in range(len(encoder_sizes)-1)]
        linear_decoder = [torch.nn.Linear(decoder_sizes[i],decoder_sizes[i+1]) for i in range(len(decoder_sizes)-1)]
        self.encoder = torch.nn.Sequential(*[l for layer in linear_encoder for l in (layer, activation)])
        self.decoder = torch.nn.Sequential(*[l for layer in linear_decoder for l in (layer, activation)])
        
    def forward(self,X):
        X = self.encoder(X)
        X = self.decoder(X)
        return X
    
    def get_encoder(self):
        return self.encoder
    
    def get_decoder(self):
        return self.decoder

In [23]:
import torch.nn as nn

class CustomLoss(nn.Module):
    def __init__(self,reg,no_of_class=12):
        super(CustomLoss, self).__init__()
        self.reg=reg
        self.no_of_class=no_of_class

    def forward(self, inputs, targets):
        p=torch.Tensor(inputs[0]).to(device).t()
        # Converting to 64 X 12 to 64 X 1
        # _,p=torch.max(p,1)
        targets=torch.eye(self.no_of_class).to(device)[targets].t()
        tou=torch.Tensor(inputs[1]).to(device)
        tou=tou.t()
        # print("tou",tou)
        # print(tou)
        loss_a= torch.t(targets)@torch.log(tou*(p-targets)+targets)
        loss_b=self.reg*torch.log(tou)
        loss_a=loss_a.diag().t()
        # print("Loss a",loss_a)
        # print("Loss b",loss_b)
        loss=loss_a+loss_b
        # print("loss",loss.shape)
        # print("return",loss.mean().shape)
        return -loss.mean()

    def custom_p(p):
        return p

In [24]:
def get_subsets(current_batch,subset_size,overlap):
        subsets = []
        num_columns = current_batch.shape[1]
        for i in range(0, num_columns-subset_size,subset_size-overlap):
            subsets.append(current_batch[:,i:i+subset_size])
        return subsets


def train_ae(model,criterion,optimizer,epochs,trainloader,testloader,subset_size = 10,overlap = 4,lr = 0.001 , verbose = True):
    optimizer = optimizer(model.parameters(), lr=lr)
    train_loss = []
    val_loss = []
    for epoch in range(epochs):
        current_train_loss = 0
        current_accuracy = []
        for data, _ in tqdm(trainloader,desc = "Training Epoch "+str(epoch)):
            data= data.to(device).float()
            subsets = get_subsets(data,subset_size,overlap)
            optimizer.zero_grad()
            recons = []
            subset_loss = 0
            for subset in subsets:
                output = model(subset)
                recons.append(output)
                subset_loss += criterion(data,output)
            subset_loss = subset_loss.mean()
            subset_loss.backward()
            optimizer.step()
        print(f"epoch-{epoch} loss:",subset_loss)
        
def train_classifier(model,criterion,optimizer,epochs,trainloader,testloader,lr=0.001,verbose = True,subset_size = 10,overlap = 2):
    optimizer = optimizer(model.parameters(), lr=lr)
    train_loss = []
    val_loss = []
    for epoch in range(epochs):
        running_train_loss = 0
        total_train = 0
        correct_train = 0
        current_accuracy = []
        for data, target in tqdm(trainloader,desc = "Training Epoch "+str(epoch)):
            data, target = data.to(device).float(), target.to(device).long()
            subsets = get_subsets(data,subset_size,overlap)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output,target)
            loss.backward()
            optimizer.step()
            
            _, predicted = torch.max(output[0], 1)
            total_train += target.size(0)
            correct_train += (predicted == target).sum().item()
            current_accuracy.append(correct_train/total_train)
            running_train_loss+=loss
        
        running_train_loss /= len(trainloader)    
        print(f"epoch-{epoch} loss:{running_train_loss} accuracy:{correct_train/total_train}")

In [25]:
model = SubsetAutoencoder(encoder_sizes=[12,32,16,8],decoder_sizes=[8,16,32,24])
model.to(device)

SubsetAutoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=12, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=16, bias=True)
    (3): ReLU()
    (4): Linear(in_features=16, out_features=8, bias=True)
    (5): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=8, out_features=16, bias=True)
    (1): ReLU()
    (2): Linear(in_features=16, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=24, bias=True)
    (5): ReLU()
  )
)

In [26]:
optimizer = torch.optim.Adam
criterion = torch.nn.MSELoss()
train_ae(model,criterion,optimizer,20,Noise_train_loader,Noise_test_loader,subset_size = 12,overlap = 2)

  from .autonotebook import tqdm as notebook_tqdm
Training Epoch 0: 100%|██████████| 1560/1560 [00:08<00:00, 188.39it/s]


epoch-0 loss: tensor(0.1827, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 1: 100%|██████████| 1560/1560 [00:14<00:00, 105.95it/s]


epoch-1 loss: tensor(0.1614, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 2: 100%|██████████| 1560/1560 [00:12<00:00, 120.12it/s]


epoch-2 loss: tensor(0.1546, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 3: 100%|██████████| 1560/1560 [00:13<00:00, 119.56it/s]


epoch-3 loss: tensor(0.1613, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 4: 100%|██████████| 1560/1560 [00:11<00:00, 140.33it/s]


epoch-4 loss: tensor(0.1530, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 5: 100%|██████████| 1560/1560 [00:10<00:00, 151.95it/s]


epoch-5 loss: tensor(0.1555, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 6: 100%|██████████| 1560/1560 [00:12<00:00, 122.67it/s]


epoch-6 loss: tensor(0.1542, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 7: 100%|██████████| 1560/1560 [00:09<00:00, 167.35it/s]


epoch-7 loss: tensor(0.1545, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 8: 100%|██████████| 1560/1560 [00:07<00:00, 205.78it/s]


epoch-8 loss: tensor(0.1544, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 9: 100%|██████████| 1560/1560 [00:12<00:00, 122.85it/s]


epoch-9 loss: tensor(0.1559, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 10: 100%|██████████| 1560/1560 [00:07<00:00, 220.66it/s]


epoch-10 loss: tensor(0.1493, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 11: 100%|██████████| 1560/1560 [00:07<00:00, 211.78it/s]


epoch-11 loss: tensor(0.1455, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 12: 100%|██████████| 1560/1560 [00:06<00:00, 223.05it/s]


epoch-12 loss: tensor(0.1468, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 13: 100%|██████████| 1560/1560 [00:06<00:00, 228.87it/s]


epoch-13 loss: tensor(0.1485, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 14: 100%|██████████| 1560/1560 [00:06<00:00, 237.43it/s]


epoch-14 loss: tensor(0.1498, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 15: 100%|██████████| 1560/1560 [00:06<00:00, 228.95it/s]


epoch-15 loss: tensor(0.1481, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 16: 100%|██████████| 1560/1560 [00:07<00:00, 217.50it/s]


epoch-16 loss: tensor(0.1436, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 17: 100%|██████████| 1560/1560 [00:06<00:00, 235.55it/s]


epoch-17 loss: tensor(0.1385, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 18: 100%|██████████| 1560/1560 [00:07<00:00, 205.91it/s]


epoch-18 loss: tensor(0.1502, device='cuda:0', grad_fn=<MeanBackward0>)


Training Epoch 19: 100%|██████████| 1560/1560 [00:07<00:00, 205.17it/s]

epoch-19 loss: tensor(0.1500, device='cuda:0', grad_fn=<MeanBackward0>)





In [45]:
encoder = model.get_encoder()

In [57]:
classifier = EncoderClassifier(encoder,8,[8,5],12,6)
classifier = classifier.to(device)
print(classifier)

EncoderClassifier(
  (encoder): Sequential(
    (0): Linear(in_features=12, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=16, bias=True)
    (3): ReLU()
    (4): Linear(in_features=16, out_features=8, bias=True)
    (5): ReLU()
  )
  (latent_layer): Linear(in_features=8, out_features=8, bias=True)
  (layers): Sequential(
    (0): Linear(in_features=8, out_features=5, bias=True)
  )
  (softmax): Softmax(dim=1)
  (att_layer): Linear(in_features=8, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (lrelu): LeakyReLU(negative_slope=0.01)
)


In [58]:
train_classifier(classifier,CustomLoss(50,no_of_class=5),torch.optim.Adam,20,Noise_train_loader,Noise_test_loader,subset_size = 12,overlap = 6)

Training Epoch 0: 100%|██████████| 1950/1950 [00:11<00:00, 166.80it/s]


epoch-0 loss:1.0764548778533936 accuracy:0.6514543269230769


Training Epoch 1: 100%|██████████| 1950/1950 [00:10<00:00, 178.55it/s]


epoch-1 loss:0.7552322149276733 accuracy:0.737479967948718


Training Epoch 2: 100%|██████████| 1950/1950 [00:11<00:00, 174.29it/s]


epoch-2 loss:0.737666666507721 accuracy:0.7393429487179487


Training Epoch 3: 100%|██████████| 1950/1950 [00:10<00:00, 177.54it/s]


epoch-3 loss:0.7285879254341125 accuracy:0.7414663461538461


Training Epoch 4: 100%|██████████| 1950/1950 [00:11<00:00, 175.63it/s]


epoch-4 loss:0.7212272882461548 accuracy:0.7417227564102564


Training Epoch 5: 100%|██████████| 1950/1950 [00:11<00:00, 174.80it/s]


epoch-5 loss:0.7141486406326294 accuracy:0.7441346153846153


Training Epoch 6: 100%|██████████| 1950/1950 [00:11<00:00, 175.30it/s]


epoch-6 loss:0.7087969183921814 accuracy:0.7444551282051282


Training Epoch 7: 100%|██████████| 1950/1950 [00:11<00:00, 163.70it/s]


epoch-7 loss:0.7038829922676086 accuracy:0.7446394230769231


Training Epoch 8: 100%|██████████| 1950/1950 [00:11<00:00, 166.31it/s]


epoch-8 loss:0.6993588209152222 accuracy:0.7461658653846154


Training Epoch 9: 100%|██████████| 1950/1950 [00:11<00:00, 164.05it/s]


epoch-9 loss:0.696841299533844 accuracy:0.7463701923076923


Training Epoch 10: 100%|██████████| 1950/1950 [00:11<00:00, 163.23it/s]


epoch-10 loss:0.6933028697967529 accuracy:0.7468349358974359


Training Epoch 11: 100%|██████████| 1950/1950 [00:11<00:00, 170.07it/s]


epoch-11 loss:0.691066324710846 accuracy:0.7474639423076923


Training Epoch 12: 100%|██████████| 1950/1950 [00:11<00:00, 170.24it/s]


epoch-12 loss:0.6883957386016846 accuracy:0.7477564102564103


Training Epoch 13: 100%|██████████| 1950/1950 [00:11<00:00, 169.22it/s]


epoch-13 loss:0.6868290305137634 accuracy:0.7483533653846154


Training Epoch 14: 100%|██████████| 1950/1950 [00:11<00:00, 176.27it/s]


epoch-14 loss:0.6835590600967407 accuracy:0.7492668269230769


Training Epoch 15: 100%|██████████| 1950/1950 [00:11<00:00, 170.02it/s]


epoch-15 loss:0.6803105473518372 accuracy:0.7496474358974359


Training Epoch 16: 100%|██████████| 1950/1950 [00:11<00:00, 176.13it/s]


epoch-16 loss:0.6766517162322998 accuracy:0.7511538461538462


Training Epoch 17: 100%|██████████| 1950/1950 [00:11<00:00, 167.17it/s]


epoch-17 loss:0.6740013957023621 accuracy:0.7522996794871795


Training Epoch 18: 100%|██████████| 1950/1950 [00:11<00:00, 176.53it/s]


epoch-18 loss:0.6719038486480713 accuracy:0.7524959935897436


Training Epoch 19: 100%|██████████| 1950/1950 [00:11<00:00, 162.50it/s]

epoch-19 loss:0.6697731614112854 accuracy:0.7527043269230769



