In [33]:
import torch
import requests
import os
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch.utils.data import random_split
from tqdm import tqdm
device = torch.device(
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

In [4]:
Noise_0_dataframe = pd.read_csv("Data/Assignment1/data_0_noise")
Noise_Low_dataframe = pd.read_csv("Data/Assignment1/data_Low_noise")
Noise_High_dataframe = pd.read_csv("Data/Assignment1/data_High_noise")

In [124]:
dataframe=Noise_Low_dataframe
validation_dataframe=Noise_Low_dataframe
target_columns="era"

In [125]:
to_encode = "era"
class_index = list(dataframe[to_encode].unique())
def encode(value, class_index = class_index):
    return class_index.index(value)

dataframe[to_encode] = dataframe[to_encode].apply(encode)
validation_dataframe[to_encode] = validation_dataframe[to_encode].apply(encode)

In [127]:
class CustomDataset(Dataset):
    def __init__(self, dataframe, noise, transform=None, target_transform=None, drop=None, target=None, encoderdecoder=False):
        self.dataframe = dataframe
        if drop != None:
            self.X = dataframe.drop(drop, axis=1).values
        else:
            self.X = dataframe.values

        if not encoderdecoder:
            self.y = dataframe[target].values
        else:
            self.y = self.X
        self.transform = transform
        self.target_transform = target_transform
        self.noise = noise

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        item, label = self.X[idx], self.y[idx]
        return item, label

    def get_noise(self):
        return self.noise

In [128]:
dataset = CustomDataset(dataframe, "0",drop = ["row_num","day","era","target_10_val","target_5_val","data_type"],target=target_columns)
Noise_train, Noise_test = random_split(dataset, [int(0.8 * len(dataset)), len(dataset) - int(0.8 * len(dataset))])
Noise_train_loader = DataLoader(Noise_train, batch_size=128, shuffle=True)
Noise_test_loader = DataLoader(Noise_test, batch_size=128, shuffle=True)

In [153]:
class MyMLP(torch.nn.Module):
    def __init__(self,ModuleList):
        super(MyMLP, self).__init__()
        module_list = torch.nn.ModuleList(ModuleList)
        self.layers = torch.nn.Sequential(*module_list)
        self.softmax = torch.nn.Softmax(dim=1)
    
    def forward(self,X):
        X = self.layers(X)
        X = self.softmax(X)
        return X

In [43]:
class SubTabEncoderDecoder(torch.nn.Module):
    def __init__(self,encoder_sizes,decoder_sizes,activation = torch.nn.ReLU()):
        super().__init__()
        self.encoder_sizes = encoder_sizes
        self.decoder_sizes = decoder_sizes
        linear_encoder = [torch.nn.Linear(encoder_sizes[i],encoder_sizes[i+1]) for i in range(len(encoder_sizes)-1)]
        linear_decoder = [torch.nn.Linear(decoder_sizes[i],decoder_sizes[i+1]) for i in range(len(decoder_sizes)-1)]
        self.encoder = torch.nn.Sequential(*[l for layer in linear_encoder for l in (layer, activation)])
        self.decoder = torch.nn.Sequential(*[l for layer in linear_decoder for l in (layer, activation)])
        
    def forward(self,x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    
    def get_encoder(self):
        return self.encoder
    
    def get_decoder(self):
        return self.decoder

In [98]:
model = SubTabEncoderDecoder([24,32,16],[16,32,24])
model = model.to(device)

In [168]:
def train(model,criterion,optimizer,train_loader,val_loader,num_epochs = 100,verbos = True, lr = 0.001):
    optimizer = optimizer(model.parameters(), lr=lr)
    train_loss = []
    val_loss = []
    for epoch in range(num_epochs):
        model.train()
        current_train_loss = 0
        current_accuracy = []
        for data, target in tqdm(train_loader,desc = "Training Epoch "+str(epoch)):
            data, target = data.to(device).float(), target.to(device).long()
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            current_train_loss += loss.item()
            current_accuracy.append(torch.sum(torch.argmax(output,dim=1) == target).item()/len(target))
            
        if (verbos):
            print("Epoch: ",epoch," Loss: ",current_train_loss/len(train_loader))
            print("Accuracy: ",np.mean(current_accuracy))
            
        train_loss.append(current_train_loss/len(train_loader))
        model.eval()
        current_val_loss = 0
        current_val_accuracy = []
        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device).float(), target.to(device).long()
                output = model(data)
                loss = criterion(output, target)
                current_val_loss += loss.item()
                current_val_accuracy.append(torch.sum(torch.argmax(output,dim=1) == target).item()/len(target))
            val_loss.append(current_val_loss/len(val_loader))
            if (verbos):
                print("Validation Loss: ",current_val_loss/len(val_loader))
                print("Validation Accuracy: ",np.mean(current_val_accuracy))

In [114]:
encoder = model.get_encoder()
torch.save(encoder, "encoder.pth")

In [179]:
encoder = torch.load("encoder.pth")
for param in encoder.parameters():
    param.requires_grad = False
    print(param)

modules = [encoder]
modules.append(torch.nn.Linear(16, 16))
modules.append(torch.nn.Linear(16,12))
model = MyMLP(modules)
model = model.to(device)

Parameter containing:
tensor([[ 5.4385e-02,  8.8948e-02,  4.4324e-02,  1.4275e-01, -5.7704e-02,
         -2.7618e-01,  7.2716e-02,  8.1099e-02,  2.0728e-01,  2.6289e-01,
          4.9600e-01, -7.6064e-02, -2.2410e-03, -8.9613e-02,  1.0795e-01,
          1.6437e-01, -2.9043e-01,  1.4872e-01,  2.6488e-01,  9.9153e-02,
          4.2092e-01,  1.4136e-01,  4.8081e-02, -4.3162e-02],
        [-1.6995e-01,  4.2841e-02, -8.1688e-02, -6.8102e-02,  4.0787e-02,
          3.1971e-01,  3.5013e-01,  1.4828e-01,  3.4598e-01, -1.2330e-01,
         -3.8470e-02,  2.1789e-01, -1.3521e-01,  1.6085e-01, -9.4290e-02,
          3.1923e-01,  2.3472e-01,  9.9152e-02,  7.9975e-03,  4.6274e-01,
         -1.2566e-01, -5.0012e-02,  4.3919e-02,  4.3021e-01],
        [-1.0232e-01, -4.2814e-03,  2.1391e-01,  4.5100e-03, -1.0470e-01,
          2.6734e-02,  1.2554e-01,  2.1296e-01, -2.4479e-02, -1.5362e-01,
          1.4884e-02, -5.5587e-02,  1.8573e-01,  8.2411e-02,  1.6707e-01,
          5.8941e-02,  2.2441e-02,  2.67

In [178]:
train(model,torch.nn.CrossEntropyLoss(),torch.optim.Adam,Noise_train_loader,Noise_test_loader,num_epochs=100)

Training Epoch 0: 100%|██████████| 1950/1950 [00:05<00:00, 359.14it/s]


Epoch:  0  Loss:  2.3669971600556985
Accuracy:  0.2544471153846154
Validation Loss:  2.2934760622313766
Validation Accuracy:  0.3462634477459016


Training Epoch 1: 100%|██████████| 1950/1950 [00:04<00:00, 409.18it/s]


Epoch:  1  Loss:  2.270648723504482
Accuracy:  0.3690665064102564
Validation Loss:  2.2456896818075025
Validation Accuracy:  0.393890881147541


Training Epoch 2: 100%|██████████| 1950/1950 [00:04<00:00, 400.99it/s]


Epoch:  2  Loss:  2.2306870327240382
Accuracy:  0.40306490384615384
Validation Loss:  2.2215047082940087
Validation Accuracy:  0.4067622950819672


Training Epoch 3: 100%|██████████| 1950/1950 [00:05<00:00, 388.62it/s]


Epoch:  3  Loss:  2.217719182479076
Accuracy:  0.4103485576923077
Validation Loss:  2.2130843142016987
Validation Accuracy:  0.41446273053278687


Training Epoch 4: 100%|██████████| 1950/1950 [00:05<00:00, 383.18it/s]


Epoch:  4  Loss:  2.2093342067033817
Accuracy:  0.41665865384615386
Validation Loss:  2.2059392660367685
Validation Accuracy:  0.419921875


Training Epoch 5: 100%|██████████| 1950/1950 [00:04<00:00, 410.98it/s]


Epoch:  5  Loss:  2.2041846643350063
Accuracy:  0.4198076923076923
Validation Loss:  2.2023264575200003
Validation Accuracy:  0.42333183913934425


Training Epoch 6: 100%|██████████| 1950/1950 [00:04<00:00, 431.29it/s]


Epoch:  6  Loss:  2.200918201911144
Accuracy:  0.42232772435897437
Validation Loss:  2.198693265191844
Validation Accuracy:  0.42482069672131145


Training Epoch 7: 100%|██████████| 1950/1950 [00:04<00:00, 415.65it/s]


Epoch:  7  Loss:  2.198523012063442
Accuracy:  0.42396233974358977
Validation Loss:  2.1962737934511214
Validation Accuracy:  0.4276063012295082


Training Epoch 8: 100%|██████████| 1950/1950 [00:04<00:00, 392.97it/s]


Epoch:  8  Loss:  2.1965549081411115
Accuracy:  0.42582131410256413
Validation Loss:  2.1965509394153218
Validation Accuracy:  0.42477266905737704


Training Epoch 9: 100%|██████████| 1950/1950 [00:04<00:00, 430.41it/s]


Epoch:  9  Loss:  2.194837774619078
Accuracy:  0.42692708333333335
Validation Loss:  2.193482826479146
Validation Accuracy:  0.42755827356557374


Training Epoch 10: 100%|██████████| 1950/1950 [00:04<00:00, 405.32it/s]


Epoch:  10  Loss:  2.193172877629598
Accuracy:  0.428349358974359
Validation Loss:  2.1920139418273674
Validation Accuracy:  0.42971951844262296


Training Epoch 11: 100%|██████████| 1950/1950 [00:04<00:00, 394.54it/s]


Epoch:  11  Loss:  2.1909453099813216
Accuracy:  0.43107772435897435
Validation Loss:  2.190336556219664
Validation Accuracy:  0.4326652151639344


Training Epoch 12: 100%|██████████| 1950/1950 [00:04<00:00, 398.64it/s]


Epoch:  12  Loss:  2.189569254899636
Accuracy:  0.4326522435897436
Validation Loss:  2.188042845393791
Validation Accuracy:  0.433257556352459


Training Epoch 13: 100%|██████████| 1950/1950 [00:04<00:00, 399.14it/s]


Epoch:  13  Loss:  2.1883527655479234
Accuracy:  0.43404647435897437
Validation Loss:  2.187578053259459
Validation Accuracy:  0.43458632172131145


Training Epoch 14: 100%|██████████| 1950/1950 [00:04<00:00, 431.38it/s]


Epoch:  14  Loss:  2.187247787255507
Accuracy:  0.43513221153846154
Validation Loss:  2.186283449169065
Validation Accuracy:  0.4357870133196721


Training Epoch 15: 100%|██████████| 1950/1950 [00:04<00:00, 409.09it/s]


Epoch:  15  Loss:  2.186188565034133
Accuracy:  0.4359815705128205
Validation Loss:  2.1862297155818
Validation Accuracy:  0.4359951331967213


Training Epoch 16: 100%|██████████| 1950/1950 [00:05<00:00, 388.36it/s]


Epoch:  16  Loss:  2.1851418550197894
Accuracy:  0.43720753205128204
Validation Loss:  2.184450410917157
Validation Accuracy:  0.4373078893442623


Training Epoch 17: 100%|██████████| 1950/1950 [00:04<00:00, 413.52it/s]


Epoch:  17  Loss:  2.1841817497595764
Accuracy:  0.43778044871794874
Validation Loss:  2.1839540576348537
Validation Accuracy:  0.43748399077868855


Training Epoch 18: 100%|██████████| 1950/1950 [00:04<00:00, 399.83it/s]


Epoch:  18  Loss:  2.1832224002251257
Accuracy:  0.4387580128205128
Validation Loss:  2.182413242879461
Validation Accuracy:  0.4390368852459016


Training Epoch 19: 100%|██████████| 1950/1950 [00:05<00:00, 346.80it/s]


Epoch:  19  Loss:  2.1822624233441474
Accuracy:  0.43969551282051283
Validation Loss:  2.181828617072496
Validation Accuracy:  0.43993340163934425


Training Epoch 20: 100%|██████████| 1950/1950 [00:05<00:00, 339.27it/s]


Epoch:  20  Loss:  2.1812717882792154
Accuracy:  0.4403605769230769
Validation Loss:  2.18109512231389
Validation Accuracy:  0.4395491803278688


Training Epoch 21: 100%|██████████| 1950/1950 [00:05<00:00, 369.12it/s]


Epoch:  21  Loss:  2.180479074258071
Accuracy:  0.4410857371794872
Validation Loss:  2.1797690376883647
Validation Accuracy:  0.4415023053278688


Training Epoch 22: 100%|██████████| 1950/1950 [00:05<00:00, 332.25it/s]


Epoch:  22  Loss:  2.1796536935904087
Accuracy:  0.44181891025641024
Validation Loss:  2.179684088855493
Validation Accuracy:  0.44180648053278687


Training Epoch 23: 100%|██████████| 1950/1950 [00:05<00:00, 368.63it/s]


Epoch:  23  Loss:  2.1789707562862297
Accuracy:  0.44247195512820514
Validation Loss:  2.178694630255465
Validation Accuracy:  0.44313524590163933


Training Epoch 24: 100%|██████████| 1950/1950 [00:04<00:00, 421.24it/s]


Epoch:  24  Loss:  2.178314090508681
Accuracy:  0.44302483974358975
Validation Loss:  2.1785296274990333
Validation Accuracy:  0.44220671106557374


Training Epoch 25: 100%|██████████| 1950/1950 [00:04<00:00, 411.12it/s]


Epoch:  25  Loss:  2.177713516797775
Accuracy:  0.4437139423076923
Validation Loss:  2.178016988468952
Validation Accuracy:  0.44334336577868855


Training Epoch 26: 100%|██████████| 1950/1950 [00:04<00:00, 394.92it/s]


Epoch:  26  Loss:  2.177171616921058
Accuracy:  0.4442067307692308
Validation Loss:  2.177335857856469
Validation Accuracy:  0.4444159836065574


Training Epoch 27: 100%|██████████| 1950/1950 [00:04<00:00, 396.30it/s]


Epoch:  27  Loss:  2.1766752434999517
Accuracy:  0.444599358974359
Validation Loss:  2.1768032590873907
Validation Accuracy:  0.4427510245901639


Training Epoch 28: 100%|██████████| 1950/1950 [00:04<00:00, 410.58it/s]


Epoch:  28  Loss:  2.176154643450028
Accuracy:  0.4453205128205128
Validation Loss:  2.1758969592266397
Validation Accuracy:  0.44603291495901637


Training Epoch 29: 100%|██████████| 1950/1950 [00:04<00:00, 426.12it/s]


Epoch:  29  Loss:  2.175803512671055
Accuracy:  0.4455889423076923
Validation Loss:  2.1755455351266706
Validation Accuracy:  0.44580878586065575


Training Epoch 30: 100%|██████████| 1950/1950 [00:04<00:00, 401.52it/s]


Epoch:  30  Loss:  2.175504060402895
Accuracy:  0.4454607371794872
Validation Loss:  2.1762741618469112
Validation Accuracy:  0.44431992827868855


Training Epoch 31: 100%|██████████| 1950/1950 [00:04<00:00, 390.91it/s]


Epoch:  31  Loss:  2.1750646017759276
Accuracy:  0.44609375
Validation Loss:  2.175939687451378
Validation Accuracy:  0.4439196977459016


Training Epoch 32: 100%|██████████| 1950/1950 [00:05<00:00, 375.25it/s]


Epoch:  32  Loss:  2.1748364347066635
Accuracy:  0.44603365384615384
Validation Loss:  2.1751119650778223
Validation Accuracy:  0.4456326844262295


Training Epoch 33: 100%|██████████| 1950/1950 [00:04<00:00, 416.94it/s]


Epoch:  33  Loss:  2.174640842584463
Accuracy:  0.4462459935897436
Validation Loss:  2.17448107287532
Validation Accuracy:  0.44598488729508196


Training Epoch 34: 100%|██████████| 1950/1950 [00:04<00:00, 398.83it/s]


Epoch:  34  Loss:  2.1744514791782086
Accuracy:  0.44634214743589745
Validation Loss:  2.1759921077822075
Validation Accuracy:  0.44337538422131145


Training Epoch 35: 100%|██████████| 1950/1950 [00:04<00:00, 400.31it/s]


Epoch:  35  Loss:  2.1741723953149257
Accuracy:  0.4463741987179487
Validation Loss:  2.174782210197605
Validation Accuracy:  0.4449442879098361


Training Epoch 36: 100%|██████████| 1950/1950 [00:04<00:00, 401.22it/s]


Epoch:  36  Loss:  2.174058597026727
Accuracy:  0.4466826923076923
Validation Loss:  2.173992547343989
Validation Accuracy:  0.4463370901639344


Training Epoch 37: 100%|██████████| 1950/1950 [00:04<00:00, 428.22it/s]


Epoch:  37  Loss:  2.1739250069398146
Accuracy:  0.44649439102564104
Validation Loss:  2.174544637809034
Validation Accuracy:  0.44603291495901637


Training Epoch 38: 100%|██████████| 1950/1950 [00:04<00:00, 391.01it/s]


Epoch:  38  Loss:  2.1737858371245555
Accuracy:  0.44685897435897437
Validation Loss:  2.174075817964116
Validation Accuracy:  0.44651319159836067


Training Epoch 39: 100%|██████████| 1950/1950 [00:04<00:00, 392.89it/s]


Epoch:  39  Loss:  2.173456114255465
Accuracy:  0.4470633012820513
Validation Loss:  2.173505510951652
Validation Accuracy:  0.4464171362704918


Training Epoch 40: 100%|██████████| 1950/1950 [00:04<00:00, 426.15it/s]


Epoch:  40  Loss:  2.1733987299601236
Accuracy:  0.4471995192307692
Validation Loss:  2.173035312871464
Validation Accuracy:  0.44742571721311475


Training Epoch 41: 100%|██████████| 1950/1950 [00:04<00:00, 408.89it/s]


Epoch:  41  Loss:  2.173200314350617
Accuracy:  0.4472676282051282
Validation Loss:  2.1730696788576784
Validation Accuracy:  0.44672131147540983


Training Epoch 42: 100%|██████████| 1950/1950 [00:04<00:00, 401.04it/s]


Epoch:  42  Loss:  2.173050876152821
Accuracy:  0.44724358974358974
Validation Loss:  2.1736425664581236
Validation Accuracy:  0.4468974129098361


Training Epoch 43: 100%|██████████| 1950/1950 [00:05<00:00, 349.79it/s]


Epoch:  43  Loss:  2.172976403969985
Accuracy:  0.4472956730769231
Validation Loss:  2.173555323334991
Validation Accuracy:  0.44625704405737704


Training Epoch 44: 100%|██████████| 1950/1950 [00:05<00:00, 386.86it/s]


Epoch:  44  Loss:  2.1728582017849654
Accuracy:  0.4474679487179487
Validation Loss:  2.17245708674681
Validation Accuracy:  0.4480500768442623


Training Epoch 45: 100%|██████████| 1950/1950 [00:04<00:00, 410.25it/s]


Epoch:  45  Loss:  2.172590602361239
Accuracy:  0.44774839743589745
Validation Loss:  2.1725079050806704
Validation Accuracy:  0.4471855788934426


Training Epoch 46: 100%|██████████| 1950/1950 [00:05<00:00, 380.27it/s]


Epoch:  46  Loss:  2.1725989708533655
Accuracy:  0.44756410256410256
Validation Loss:  2.1722887730012173
Validation Accuracy:  0.44854636270491804


Training Epoch 47:  72%|███████▏  | 1397/1950 [00:03<00:01, 393.19it/s]


KeyboardInterrupt: 