In [116]:
import cProfile
import timeit

import torch
import torch.nn as nn
from torch.functional import F
import torch.utils.data as data
from torch.utils.data import DataLoader, TensorDataset, Subset
from torchvision import transforms, datasets
from torchsummary import summary

import lightning as L
from lightning import Trainer, seed_everything

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import balanced_accuracy_score, accuracy_score, confusion_matrix
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import math
import time
import multiprocessing
import matplotlib.pyplot as plt

from cleanlab import Datalab, filter, rank, dataset
from datasets import load_dataset

In [None]:
# old_model: 975, 93
# model1: 1700, 97
# model2: 550, 89

In [544]:
# Load data sets
transform = transforms.ToTensor()
train_set = datasets.MNIST(root="MNIST", download=True, train=True, transform=transform)
test_set = datasets.MNIST(root="MNIST", download=True, train=False, transform=transform)

# use 20% of training data for validation
train_set_size = int(len(train_set) * 0.8)
valid_set_size = len(train_set) - train_set_size

# split the train set into two
seed = torch.Generator().manual_seed(42)
train_set, valid_set = data.random_split(train_set, [train_set_size, valid_set_size], generator=seed)

trainloader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
validloader = DataLoader(valid_set, batch_size=batch_size, shuffle=True)
# testloader = DataLoader(test_set, batch_size=batch_size, shuffle=True)

print(f"Length of training set: {len(train_set)}\nLength of validation set: {len(valid_set)}\nLength of test set: {len(test_set)}")
print(f"Example entry: ({train_set[0][0][0,0,:]},{train_set[0][1]})\nShape: {train_set[0][0].shape}")

Length of training set: 48000
Length of validation set: 12000
Length of test set: 10000
Example entry: (tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.]),6)
Shape: torch.Size([1, 28, 28])


In [574]:
test_set.dtype

AttributeError: 'MNIST' object has no attribute 'dtype'

In [522]:
class Net(nn.Module):
    def __init__(self):
        super().__init__() 
           
        # Define the CNN as a Sequential module
        # self.conv_layer = nn.Conv2d(1, 2, kernel_size=4, padding=0)
        # horizontal_edge_filter = torch.tensor([[1, 1, 1], [0, 0, 0], [-1, -1, -1]], dtype=torch.float32).view(1, 1, 3, 3)
        # vertical_edge_filter = torch.tensor([[1, 0, -1], [1, 0, -1], [1, 0, -1]], dtype=torch.float32).view(1, 1, 3, 3)

        # # Assign the filters to the convolutional layer
        # self.conv_layer.weight.data[0, 0, :, :] = horizontal_edge_filter
        # self.conv_layer.weight.data[1, 0, :, :] = vertical_edge_filter

        self.cnn = nn.Sequential(
            nn.Conv2d(1, 3, kernel_size=2, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4, stride=2),
            nn.Conv2d(3, 1, kernel_size=5, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4, stride=2)
        )
        self.linear = nn.Sequential(nn.LazyLinear(16), nn.ReLU())

        self.residual = nn.Sequential(
            nn.Conv2d(1, 1, kernel_size=4, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=6, stride=5)
        )
        
        self.output = nn.Linear(16, num_classes)

    def forward(self, x):
        x = self.embeddings(x)
        x = self.output(x)
        return x

    def embeddings(self, x):
        x = torch.flatten(self.cnn(x),1) + torch.flatten(self.residual(x),1)
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = self.linear(x)
        return x

In [689]:
class Net(nn.Module):
    def __init__(self):
        super().__init__() 
           
        self.conv_layer = nn.Conv2d(1, 3, kernel_size=2, padding=2)
        horizontal_edge_filter = torch.tensor([[1, 1], [0, 0]], dtype=torch.float32).view(2,2)
        vertical_edge_filter = torch.tensor([[1, 0], [1, 0]], dtype=torch.float32).view(2,2)
        diagonal_edge_filter = torch.tensor([[1,0], [0,1]], dtype=torch.float32).view(2,2)

        # Assign the filters to the convolutional layer
        self.conv_layer.weight.data[0, :, :] = horizontal_edge_filter
        self.conv_layer.weight.data[1, :, :] = vertical_edge_filter
        self.conv_layer.weight.data[2, :, :] = diagonal_edge_filter

        self.cnn = nn.Sequential(
            self.conv_layer,
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(3, 1, kernel_size=4, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4, stride=2)
        )
        self.linear = nn.Sequential(nn.LazyLinear(16), nn.ReLU())

        self.residual = nn.Conv2d(1, 1, kernel_size=1, stride=4)
        
        self.output = nn.Linear(16, num_classes)

    def forward(self, x):
        x = self.embeddings(x)
        x = self.output(x)
        return x

    def embeddings(self, x):
        # print(self.cnn(x).shape)
        # print(self.residual(x).shape)
        x = torch.flatten(self.cnn(x)+self.residual(x),1)
        x = self.linear(x)
        return x

class EarlyStopper():
    def __init__(self, min_delta=10, patience=6):
        self.min_delta=10
        self.patience=6
        self.min_loss=float('inf')
        self.count=0
    
    def early_stop(self, loss):
        if loss - 10 > self.min_loss:
            self.count += 1
            if self.count >= self.patience:
                return True
        else:
            self.min_loss = loss
        return False

In [690]:
model = Net()
summary(model, (1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 3, 31, 31]              15
            Conv2d-2            [-1, 3, 31, 31]              15
              ReLU-3            [-1, 3, 31, 31]               0
         MaxPool2d-4            [-1, 3, 15, 15]               0
            Conv2d-5            [-1, 1, 16, 16]              49
              ReLU-6            [-1, 1, 16, 16]               0
         MaxPool2d-7              [-1, 1, 7, 7]               0
            Conv2d-8              [-1, 1, 7, 7]               2
            Linear-9                   [-1, 16]             800
             ReLU-10                   [-1, 16]               0
           Linear-11                   [-1, 10]             170
Total params: 1,051
Trainable params: 1,051
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/back

In [691]:
# PARAMETERS
# skipping kfold

num_classes = 10
batch_size = 48
n_epochs = 32

patience = 8
min_delta = 10

model = Net()
summary(model, (1,28,28))

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters())

for epoch in range(n_epochs):  # loop over the dataset multiple times
    # start_epoch = time.time()
    
    model.train()
    running_loss = 0.0

    for _, batch in enumerate(trainloader):

        xbatch, ybatch = batch

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(xbatch)
        loss = criterion(outputs, ybatch)
        loss.backward()
        optimizer.step()
        running_loss += loss
    
    with torch.no_grad():
        val_loss = 0.0
        val_acc = []
        for _, batch in enumerate(valid_loader):
            xbatch, ybatch = batch
            
            outputs = model(xbatch)
            # y_hat = torch.argmax(torch.softmax(outputs,axis=1),axis=1)

            val_loss += criterion(outputs, ybatch)
            # val_acc.append(balanced_accuracy_score(ybatch, y_hat))

    print(f'Epoch {epoch}/{n_epochs}: Train loss: {running_loss}\tVal loss: {val_loss}')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 3, 31, 31]              15
            Conv2d-2            [-1, 3, 31, 31]              15
              ReLU-3            [-1, 3, 31, 31]               0
         MaxPool2d-4            [-1, 3, 15, 15]               0
            Conv2d-5            [-1, 1, 16, 16]              49
              ReLU-6            [-1, 1, 16, 16]               0
         MaxPool2d-7              [-1, 1, 7, 7]               0
            Conv2d-8              [-1, 1, 7, 7]               2
            Linear-9                   [-1, 16]             800
             ReLU-10                   [-1, 16]               0
           Linear-11                   [-1, 10]             170
Total params: 1,051
Trainable params: 1,051
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/back

In [None]:
balanced = []
acc = []
with torch.no_grad():
    for xbatch, ybatch in testloader:
        logits = model(xbatch)
        proba = nn.Softmax(dim=1)(logits)
        pred = torch.argmax(proba, dim=1)
        score = balanced_accuracy_score(pred, ybatch)
        balanced.append(score)
        score = accuracy_score(pred, ybatch)
        acc.append(score)
    

print(sum(balanced)/len(balanced))
print(sum(acc)/len(acc))

In [None]:
class Evaluator():
    def __init__(self, model):
        self.model = model
    
    def predict(self, X):
        self.logits = model(X)
        self.proba = nn.Softmax(dim=1)(self.logits)
        self.y_hat = torch.argmax(self.proba, dim=1)

        return self.y_hat
    
    def evaluate(self, X, y):
        
        self.predict(X)

        print(f"Accuracy: {accuracy_score(y, self.y_hat)}")
        print(f"Balanced Accuracy: {balanced_accuracy_score(y, self.y_hat)}")