In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io.arff import loadarff 

from sklearn.model_selection import(
    train_test_split, 
    cross_val_predict,
    KFold,
)

from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import (
    Dataset, 
    DataLoader,
    TensorDataset,
    random_split,
    SubsetRandomSampler,
    ConcatDataset,
)
from torchvision import transforms, datasets


torch.cuda.is_available()

  from .autonotebook import tqdm as notebook_tqdm


True

### GPU HELL YEAH!

In [2]:
def get_device():
    if torch.cuda.is_available():
        device = torch.device('cuda:0')
        print('Rolling on GPU, babe')
        return device
    print('CPU it is...')
    return torch.device('cpu')

device = get_device()

Rolling on GPU, babe


## MLP

In [3]:
class MLP(nn.Module):

    def __init__(self, fc1, fc2):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(28*28, fc1),
            nn.ReLU(),
            nn.Linear(fc1, fc2),
            nn.ReLU(),
            nn.Linear(fc2, 10),
        )
    
    def forward(self, x):
        return F.log_softmax(self.model(x), dim=1)

    def reset(self):
        return self.model.apply(self.weight_reset)

    def weight_reset(self, m):
        if isinstance(m, nn.Linear):
            m.reset_parameters()

def run_epoch(net, device, dataloader, 
              loss_fn, optimizer=None):
    train = False if optimizer is None else True
    net.train() if train else net.eval()

    correct = 0
    loss_arr = list()
    for X, y in dataloader:
        y = y.type(torch.LongTensor)
        X, y = X.to(device), y.to(device)

        if train:
            optimizer.zero_grad()

        output = net(X)
        scores, predictions = torch.max(output.data, 1)
        loss = loss_fn(output, y)
        correct += (predictions == y).sum().item()
        loss_arr.append(loss.item())

        if train:
            loss.backward()
            optimizer.step()

    return loss_arr, correct


Transforming pd.DataFrame to torch.Tensor

In [4]:
class PDtoTensor(Dataset):
 
  def __init__(self, X, y):
    self.X_train = torch.tensor(X.values, dtype=torch.float)
    self.y_train = torch.tensor(y.values.flatten(), dtype=torch.float)
 
  def __len__(self):
    return len(self.y_train)
   
  def __getitem__(self,idx):
    return self.X_train[idx], self.y_train[idx]

### Training process


In [5]:
def prepare_dataloader(X, y, train_idx, test_idx, batch_size=128):
    train = PDtoTensor(X.iloc[train_idx], y.iloc[train_idx])
    test = PDtoTensor(X.iloc[test_idx], y.iloc[test_idx])

    train_loader = DataLoader(train, batch_size=batch_size)
    test_loader = DataLoader(test, batch_size=batch_size)

    return train_loader, test_loader

    
def kfold_train(net: MLP, kf, epochs, X, y):
    scores = list()

    for fold, (train_idx, test_idx) in enumerate(kf.split(X, y)):  
        train_loader, test_loader = prepare_dataloader(X, y, train_idx, test_idx, 256)

        optimizer = optim.Adam(net.parameters(), lr=1e-4)

        for epoch in range(epochs):
            train_loss, train_correct = run_epoch(
                net,
                device,
                train_loader,
                nn.CrossEntropyLoss(), 
                optimizer
            )
            test_loss, test_correct = run_epoch(
                net,
                device,
                test_loader,
                nn.CrossEntropyLoss(), 
                optimizer=None,
            )
        
        test_acc = test_correct / len(test_loader.sampler)

        scores.append(test_acc)
        net.reset()

    print(f'Accuracy mean (std): {np.mean(scores):.4f} ({np.std(scores):.4f})')

    return scores

### Data

In [6]:
# loading data
raw_data = loadarff('mnist_784.arff')
df = pd.DataFrame(raw_data[0])

### Training the Model

Training with 10-fold CV for evaluation and 10 Epochs as a fixed hyperparameter

In [7]:
# Separando dataset
X = df.iloc[:, 0:-1]
y = df.iloc[:, [-1]].astype('int')

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1)

kf = KFold(n_splits=10, random_state=1, shuffle=True)
EPOCHS = 10

# Training the model
net = MLP(500,500).to(device)
kfold_train(net, kf, EPOCHS, X_train, y_train)

Accuracy mean (std): 0.9720 (0.0026)


[0.9766666666666667,
 0.9687301587301588,
 0.9692063492063492,
 0.9728571428571429,
 0.9717460317460317,
 0.9726984126984127,
 0.9685714285714285,
 0.9753968253968254,
 0.9722222222222222,
 0.9715873015873016]

### Validation

Treinando em 90% dos dados e utilizando 10% para validar o modelo

In [8]:
# Validations
train = PDtoTensor(X_train, y_train)
val = PDtoTensor(X_val, y_val)

train_loader = DataLoader(train, batch_size=128)
val_loader = DataLoader(val, batch_size=128)

net = MLP(500,500).to(device)
optimizer = optim.Adam(net.parameters(), lr=1e-5)

for epoch in range(50):
    train_loss, train_correct = run_epoch(
        net,
        device,
        train_loader,
        nn.CrossEntropyLoss(), 
        optimizer
    )
    print(f'Epoch: {epoch+1}/{50} Training Loss: {train_loss[-2]:.3f}')

Epoch: 1/50 Training Loss: 0.424
Epoch: 2/50 Training Loss: 0.191
Epoch: 3/50 Training Loss: 0.132
Epoch: 4/50 Training Loss: 0.116
Epoch: 5/50 Training Loss: 0.104
Epoch: 6/50 Training Loss: 0.093
Epoch: 7/50 Training Loss: 0.080
Epoch: 8/50 Training Loss: 0.070
Epoch: 9/50 Training Loss: 0.061
Epoch: 10/50 Training Loss: 0.053
Epoch: 11/50 Training Loss: 0.045
Epoch: 12/50 Training Loss: 0.039
Epoch: 13/50 Training Loss: 0.033
Epoch: 14/50 Training Loss: 0.028
Epoch: 15/50 Training Loss: 0.023
Epoch: 16/50 Training Loss: 0.018
Epoch: 17/50 Training Loss: 0.014
Epoch: 18/50 Training Loss: 0.012
Epoch: 19/50 Training Loss: 0.010
Epoch: 20/50 Training Loss: 0.008
Epoch: 21/50 Training Loss: 0.006
Epoch: 22/50 Training Loss: 0.005
Epoch: 23/50 Training Loss: 0.004
Epoch: 24/50 Training Loss: 0.004
Epoch: 25/50 Training Loss: 0.003
Epoch: 26/50 Training Loss: 0.003
Epoch: 27/50 Training Loss: 0.002
Epoch: 28/50 Training Loss: 0.002
Epoch: 29/50 Training Loss: 0.002
Epoch: 30/50 Training L

In [9]:
correct = 0
total = 0

with torch.no_grad():
    for data in val_loader:
        X, y = data
        X, y = X.to(device), y.to(device)
        output = net(X)
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1
            total += 1
            
print("Accuracy: ", round(correct/total, 3))

Accuracy:  0.976
