In [20]:
from datetime import datetime

import pandas as pd
from sklearn.metrics import roc_auc_score
from tqdm import tqdm
import torch
from torch.utils.data import Dataset, DataLoader, random_split

from utils import *
from nn_architecture_test import Net

class TabularDataset(Dataset):
    """
    Preprocessing of categorical variables by dummy
    Regarding the continuous variable the standardization 
    has already been done.
    """
    
    def __init__(self, csv_file):
        self.df_nn = pd.read_csv(csv_file)
        self.df_nn.drop('id', axis=1, inplace=True)

        self.df_nn = pd.get_dummies(self.df_nn)
        
        self.X = self.df_nn.drop("target", axis=1)
        self.Y = self.df_nn.loc[:, "target"]
        
    def __len__(self):
        return len(self.df_nn)
    
    def __getitem__(self, idx):
        return self.X.iloc[idx].values, self.Y.iloc[idx]
    
csv_file = "../dataset/train.csv"

# Load dataset
dataset = TabularDataset(csv_file)

# Split into training and test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
trainset, testset = random_split(dataset, [train_size, test_size])

# Dataloaders
train_loader = DataLoader(trainset, batch_size=200, shuffle=True)
test_loader = DataLoader(testset, batch_size=200, shuffle=False)

In [22]:
nb_clust = 1

# -1 cause we remove the target
net = Net(634, nb_clust)
print(net)

max_epochs = 5
#loss_fct = nn.CrossEntropyLoss()
loss_fct = nn.BCEWithLogitsLoss()
l_loss = list()
l_loss_test = list()
l_roc_train = list()
l_roc_test = list()

optim = opt.Adam(net.parameters(), lr=0.01)


for epoch in tqdm(range(max_epochs)):
    t0 = datetime.now()
    net.train()
    epoch_loss = 0
    epoch_auc = 0
    for batch, (x, y) in enumerate(train_loader):
        
        optim.zero_grad()
        
        # Predict soft-targets and embeddings
        output = net(x, 1)
        
        loss = loss_fct(output.float(), y.reshape(len(y),1).float())
        auc = roc_auc_score(y.reshape(len(y),1).detach().numpy(), torch.sigmoid(output.float()).detach().numpy())
        loss.backward()
        
        optim.step()
        
        #l_loss.append(loss.item())
        #l_roc_train.append(roc_auc_score(y.detach().numpy(), torch.sigmoid(output).detach().numpy()))
        epoch_loss += loss.item()
        epoch_auc += auc
        
    print(f'Epoch {epoch+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | AUC: {epoch_auc/len(train_loader):.3f}')        
        

Net(
  (fc1): Linear(in_features=634, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=40, bias=True)
  (fc3): Linear(in_features=40, out_features=1, bias=True)
  (bn1): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout(p=0.25, inplace=False)
  (relu): ReLU()
)


 20%|████████████████████████████████▊                                                                                                                                   | 1/5 [01:38<06:35, 98.90s/it]

Epoch 000: | Loss: 0.36497 | AUC: 0.883


 40%|█████████████████████████████████████████████████████████████████▏                                                                                                 | 2/5 [03:20<05:01, 100.60s/it]

Epoch 001: | Loss: 0.35568 | AUC: 0.889


 60%|█████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                 | 3/5 [05:11<03:30, 105.11s/it]

Epoch 002: | Loss: 0.35207 | AUC: 0.892


 80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                | 4/5 [06:56<01:45, 105.00s/it]

Epoch 003: | Loss: 0.34709 | AUC: 0.895


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [08:35<00:00, 103.13s/it]

Epoch 004: | Loss: 0.34323 | AUC: 0.898





In [26]:

with torch.no_grad():
    net.eval()
    epoch_loss = 0
    epoch_auc = 0
    for batch, (x, y) in enumerate(test_loader):
        output = net(x, 1)
        
        #l_roc_test.append(roc_auc_score(y.detach().numpy(), torch.sigmoid(output).detach().numpy()))
        
        loss = loss_fct(output.float(), y.reshape(len(y),1).float())
        auc = roc_auc_score(y.reshape(len(y),1).detach().numpy(), torch.sigmoid(output.float()).detach().numpy())
        
        epoch_loss += loss.item()
        epoch_auc += auc
        
print(f'Epoch {epoch+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | AUC: {epoch_auc/len(train_loader):.3f}')        


In [27]:
print(f'Epoch {epoch+0:03}: | Loss: {epoch_loss/len(train_loader):.5f} | AUC: {epoch_auc/len(train_loader):.3f}')        


Epoch 004: | Loss: 0.08884 | AUC: 0.222
