In [None]:
import torch
from torch import nn
from torch.nn import functional as F
import warnings
from tqdm import trange
from tqdm import tqdm
from scripts.plot_loss import plot_losses

torch.manual_seed(0)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
warnings.filterwarnings("ignore")

In [None]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.LeakyReLU = nn.LeakyReLU(0.2)
        self.softmax = nn.Softmax()
        
    def forward(self, x):
        x = self.LeakyReLU(self.fc1(x))
        x = self.fc2(x)
        x = self.softmax(x)
        x = torch.argmax(x, dim=1)
        return x

In [None]:
class MLP_sequential(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_size, output_size, bias=False)
        )
        
    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
IMG_SIZE = 24
HIDDEN_SIZE = 128
NUM_CLASSES = 10

model = MLP(input_size=IMG_SIZE, hidden_size=HIDDEN_SIZE, output_size=NUM_CLASSES).to(device)
output = model.forward(torch.randn(1, 24).to(device))
print(output)

In [None]:
IMG_SIZE = 24
HIDDEN_SIZE = 128
NUM_CLASSES = 10

model = MLP_sequential(input_size=IMG_SIZE, hidden_size=HIDDEN_SIZE, output_size=NUM_CLASSES).to(device)
output = model.forward(torch.randn(1, 24).to(device))
print(output)

# Model 1

In [None]:
NUM_EPOCHS = 10

model = MLP_sequential(input_size=IMG_SIZE ** 2 * 3, hidden_size=HIDDEN_SIZE, output_size=NUM_CLASSES).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0005)
criterion = nn.CrossEntropyLoss() # this function is used to calculate the loss and take logits
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=NUM_EPOCHS, eta_min=0.00001)

In [None]:
train_loader = [(torch.randn(1024, 3, 24, 24), torch.randn(1024, 10).argmax(dim=1))]
test_loader = [(torch.randn(1024, 3, 24, 24), torch.randn(1024, 10).argmax(dim=1))]

In [None]:
train_losses, train_accuracies = [], []
test_losses, test_accuracies = [], []
for epoch in trange(NUM_EPOCHS, desc='Epochs'):
    training_loss, training_accuracy = 0., 0.
    model.train()
    for batch in train_loader:
        optimizer.zero_grad()
        images = batch[0].to(device)
        label = batch[1].to(device)
        logits = model(torch.flatten(images, start_dim=1))
        loss = criterion(logits, label)
        loss.backward()
        optimizer.step()
        
        training_loss += loss.item() * images.size(0)
        training_accuracy += (torch.sum(torch.argmax(logits, dim=1) == label)).item()
    if scheduler:
        scheduler.step()
    training_loss /= 1024
    training_accuracy /= 1024
    
    train_losses.append(training_loss)
    train_accuracies.append(training_accuracy)
    
    testing_loss, testing_accuracy = 0., 0.
    model.eval() # set the model to evaluation mode; example we have dropout layer and this mode allow to use right dropout
    for batch in test_loader:
        optimizer.zero_grad()
        images = batch[0].to(device)
        label = batch[1].to(device)
        with torch.no_grad():
            logits = model(torch.flatten(images, start_dim=1))
            loss = criterion(logits, label)
        
        testing_loss += loss.item() * images.size(0)
        testing_accuracy += (torch.sum(torch.argmax(logits, dim=1) == label)).item()
        
    testing_loss /= 1024
    testing_accuracy /= 1024
    
    test_losses.append(testing_loss)
    test_accuracies.append(testing_accuracy)
    
    plot_losses(train_losses, test_losses, train_accuracies, test_accuracies)