__DAT341 - Assignment 5 - Group 69__

This notebook is formatted as such :

- First, all the packages are imported and the data is loaded (first cell)
- Then, the utility functions are defined such as :
    - Training function for our models
    - Evaluation function
    - Blind test set prediction function
- Finally, differents models and techniques will be tried in the following cells.

**1 - Preparation**

In [1]:
# Basic packages
import os
import sys
import time
import numpy as np
from tqdm import tqdm

# PyTorch packages
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

# Torchvision packages
import torchvision
from torchvision.transforms import v2
from torchvision.datasets import ImageFolder
from torchvision.utils import make_grid
from torchvision import models

# Importing data
train_dir = 'a5_data/train'
val_dir = 'a5_data/val'
test_dir = 'a5_data/test_blind'

# Transformations, normalization and augmentation
transform_basic = v2.Compose([
    v2.ToImage(),
    v2.Resize((224, 224)),
    v2.ToDtype(torch.float32, scale=True),  # Normalize expects float input
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform = v2.Compose([
    v2.Resize((224, 224)),
    v2.ToImage(),
    v2.ToDtype(torch.uint8, scale=True),
    #v2.RandomResizedCrop(size=(224, 224), antialias=True),
    v2.RandomHorizontalFlip(),
    v2.ToDtype(torch.float32, scale=True),  # Normalize expects float input
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# No data augmentation
train_dataset_basic = ImageFolder(train_dir, transform=transform_basic)
val_dataset_basic = ImageFolder(val_dir, transform=transform_basic)
test_dataset_basic = ImageFolder(test_dir, transform=transform_basic)

train_loader_basic = DataLoader(train_dataset_basic, batch_size=32, shuffle=True)
val_loader_basic = DataLoader(val_dataset_basic, batch_size=32, shuffle=False)
test_loader_basic = DataLoader(test_dataset_basic, batch_size=32, shuffle=False)
loader_basic = [train_loader_basic, val_loader_basic, test_loader_basic]

# Data augmentation
train_dataset = ImageFolder(train_dir, transform=transform)
val_dataset = ImageFolder(val_dir, transform=transform)
test_dataset = ImageFolder(test_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
loader = [train_loader, val_loader, test_loader]
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

**2 - Utility functions**

In [2]:
class Classifier():
    def __init__(self,model,criterion,optimizer,loader,features_location=None):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.features_location = features_location
        self.loader= loader

        if self.features_location is not None:
            self.train_features = torch.load(f'{self.features_location}/train.pt').to(device)
            self.val_features = torch.load(f'{self.features_location}/val.pt').to(device)
            self.test_features = torch.load(f'{self.features_location}/test.pt').to(device)
            
        
    # To evaluate our model on the validation set
    def validate(self):
        self.model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for i,(images, labels) in tqdm(enumerate(self.loader[1])):
                images, labels = images.to(device), labels.to(device)
                if self.features_location is not None:
                    images = self.val_features[i*32:(i*32 + labels.size(0)),:].to(device)
                outputs = self.model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f'Accuracy on validation set: {100 * correct / total}%')
    
    def predict(self,name):
        if not os.path.exists('predictions'):
            os.makedirs('predictions')
        predictions = []
        with torch.no_grad():
            for i,(images, labels) in tqdm(enumerate(self.loader[2])):
                images, labels = images.to(device), labels.to(device)
                if self.features_location is not None:
                    images = self.test_features[i*32:(i*32 + labels.size(0)),:].to(device)
                outputs = self.model(images)
                _, predicted = torch.max(outputs.data, 1)
                predictions += ['MEL' if label == 0 else 'NV' for label in predicted]

            # Save the predictions to a file
            with open(f'predictions/{name}.txt', 'w') as f:
                for item in predictions:
                    f.write("%s\n" % item)
                    
    def train(self):
        start_time = time.time()
        # Training loop
        num_epochs = 10
        for epoch in range(num_epochs):
            self.model.train()
            running_loss = 0.0
            for i,(images, labels) in tqdm(enumerate(self.loader[0]), total=len(self.loader[0])):
                images, labels = images.to(device), labels.to(device)
                if self.features_location is not None:
                    with torch.no_grad():
                        images = self.train_features[i*32:(i*32 + labels.size(0)),:].to(device)
                self.optimizer.zero_grad()
                outputs = self.model(images).to(device)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()
                
                running_loss += loss.item()
            print(f'Epoch {epoch+1}, Loss: {running_loss/len(self.loader[0])}')

        print(f'Training finished - Elapsed time: {time.time() - start_time} s.')

**3 - Models**

1) Simple CNN - No fancy feature extractor or anything

In [43]:
class SimpleCNN(nn.Module):
  def __init__(self):
    super(SimpleCNN, self).__init__()
    self.conv1 = nn.Conv2d(3, 6, 5)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.fc1 = nn.Linear(16 * 53 * 53, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 2)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = x.view(-1, 16 * 53 * 53)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x
    
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

simple_CNN_basic = Classifier(model, criterion, optimizer, loader_basic)
simple_CNN_basic.train()
simple_CNN_basic.validate()
simple_CNN_basic.predict('simpleCNN_basic')

  0%|          | 0/201 [00:00<?, ?it/s]

100%|██████████| 201/201 [00:13<00:00, 15.15it/s]


Epoch 1, Loss: 0.5584259139957712


100%|██████████| 201/201 [00:13<00:00, 15.04it/s]


Epoch 2, Loss: 0.43734067063129956


100%|██████████| 201/201 [00:13<00:00, 15.18it/s]


Epoch 3, Loss: 0.4212458356844252


100%|██████████| 201/201 [00:13<00:00, 15.25it/s]


Epoch 4, Loss: 0.40885882442863425


100%|██████████| 201/201 [00:13<00:00, 15.37it/s]


Epoch 5, Loss: 0.39722740464839174


100%|██████████| 201/201 [00:12<00:00, 15.58it/s]


Epoch 6, Loss: 0.3878880275867472


100%|██████████| 201/201 [00:12<00:00, 15.59it/s]


Epoch 7, Loss: 0.37895053417528446


100%|██████████| 201/201 [00:12<00:00, 15.62it/s]


Epoch 8, Loss: 0.3714367718394123


100%|██████████| 201/201 [00:12<00:00, 15.61it/s]


Epoch 9, Loss: 0.3620221364260906


100%|██████████| 201/201 [00:13<00:00, 15.16it/s]


Epoch 10, Loss: 0.3591586735043953
Training finished - Elapsed time: 130.94874596595764 s.


40it [00:02, 19.42it/s]


Accuracy on validation set: 79.3929712460064%


43it [00:02, 17.49it/s]


In [19]:
simple_CNN= Classifier(model, criterion, optimizer, loader)
simple_CNN.train()
simple_CNN.validate()
simple_CNN.predict('simpleCNN')

100%|██████████| 201/201 [00:15<00:00, 13.04it/s]


Epoch 1, Loss: 0.37341942130333156


100%|██████████| 201/201 [00:15<00:00, 13.07it/s]


Epoch 2, Loss: 0.36089391917435093


100%|██████████| 201/201 [00:15<00:00, 13.13it/s]


Epoch 3, Loss: 0.35700126629860246


100%|██████████| 201/201 [00:15<00:00, 13.11it/s]


Epoch 4, Loss: 0.3466570258733645


100%|██████████| 201/201 [00:15<00:00, 13.02it/s]


Epoch 5, Loss: 0.3423044511779624


100%|██████████| 201/201 [00:15<00:00, 13.09it/s]


Epoch 6, Loss: 0.33369010805490595


100%|██████████| 201/201 [00:15<00:00, 13.00it/s]


Epoch 7, Loss: 0.3324569397749592


100%|██████████| 201/201 [00:15<00:00, 13.07it/s]


Epoch 8, Loss: 0.3197956100773455


100%|██████████| 201/201 [00:15<00:00, 13.02it/s]


Epoch 9, Loss: 0.3136175352320149


100%|██████████| 201/201 [00:15<00:00, 12.84it/s]


Epoch 10, Loss: 0.31357101973757817
Training finished - Elapsed time: 154.18199253082275 s.


40it [00:02, 17.04it/s]


Accuracy on validation set: 80.5111821086262%


43it [00:02, 14.54it/s]


1.2 Batch normalization



In [20]:
class BatchNormCNN(nn.Module):
  def __init__(self):
    super(BatchNormCNN, self).__init__()
    self.conv1 = nn.Conv2d(3, 6, 5)
    self.bn1 = nn.BatchNorm2d(6)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.bn2 = nn.BatchNorm2d(16)
    self.fc1 = nn.Linear(16 * 53 * 53, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 2)

  def forward(self, x):
    x = self.pool(F.relu(self.bn1(self.conv1(x))))
    x = self.pool(F.relu(self.bn2(self.conv2(x))))
    x = x.view(-1, 16 * 53 * 53)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x
    
model = BatchNormCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

Batchnorm_CNN = Classifier(model, criterion, optimizer,loader)
Batchnorm_CNN.train()
Batchnorm_CNN.validate()
Batchnorm_CNN.predict('BatchNormCNN')

100%|██████████| 201/201 [00:17<00:00, 11.40it/s]


Epoch 1, Loss: 0.4498662324390601


100%|██████████| 201/201 [00:17<00:00, 11.29it/s]


Epoch 2, Loss: 0.3964048334911688


100%|██████████| 201/201 [00:16<00:00, 12.53it/s]


Epoch 3, Loss: 0.3821810374212502


100%|██████████| 201/201 [00:15<00:00, 12.74it/s]


Epoch 4, Loss: 0.3704170362273259


100%|██████████| 201/201 [00:15<00:00, 12.91it/s]


Epoch 5, Loss: 0.3612760957052459


100%|██████████| 201/201 [00:15<00:00, 12.88it/s]


Epoch 6, Loss: 0.34891138526041116


100%|██████████| 201/201 [00:15<00:00, 12.93it/s]


Epoch 7, Loss: 0.33990679866638945


100%|██████████| 201/201 [00:15<00:00, 12.89it/s]


Epoch 8, Loss: 0.3368136321579046


100%|██████████| 201/201 [00:15<00:00, 12.92it/s]


Epoch 9, Loss: 0.3223056897002073


100%|██████████| 201/201 [00:15<00:00, 12.80it/s]


Epoch 10, Loss: 0.3069242251601385
Training finished - Elapsed time: 160.86593914031982 s.


40it [00:02, 16.56it/s]


Accuracy on validation set: 82.50798722044729%


43it [00:02, 14.64it/s]


1.3 - Residual Connections

Here we only do a small one, after we'll use resnet that uses deep residual connections

In [21]:
class BatchNorm_ResidualCNN(nn.Module):
  def __init__(self):
    super(BatchNorm_ResidualCNN, self).__init__()
    self.conv1 = nn.Conv2d(3, 6, 5)
    self.bn1 = nn.BatchNorm2d(6)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.bn2 = nn.BatchNorm2d(16)
    self.fc1 = nn.Linear(16 * 53 * 53, 120)
    self.fc2 = nn.Linear(120, 120)
    self.fc3 = nn.Linear(120, 2)

  def forward(self, x):
    x = self.pool(F.relu(self.bn1(self.conv1(x))))
    x = self.pool(F.relu(self.bn2(self.conv2(x))))
    x = x.view(-1, 16 * 53 * 53)
    
    x = F.relu(self.fc1(x))
    residual = x
    x = F.relu(self.fc2(x) + residual)
    x = self.fc3(x)
    return x
    
model = BatchNorm_ResidualCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

Batchnorm_ResidualCNN = Classifier(model, criterion, optimizer,loader)
Batchnorm_ResidualCNN.train()
Batchnorm_ResidualCNN.validate()
Batchnorm_ResidualCNN.predict('BatchNorm_ResidualCNN')

100%|██████████| 201/201 [00:16<00:00, 12.47it/s]


Epoch 1, Loss: 0.451251354383592


100%|██████████| 201/201 [00:16<00:00, 12.26it/s]


Epoch 2, Loss: 0.396692874123208


100%|██████████| 201/201 [00:15<00:00, 12.60it/s]


Epoch 3, Loss: 0.37624782627791314


100%|██████████| 201/201 [00:15<00:00, 12.94it/s]


Epoch 4, Loss: 0.3677585212448936


100%|██████████| 201/201 [00:15<00:00, 12.89it/s]


Epoch 5, Loss: 0.3551743513909145


100%|██████████| 201/201 [00:15<00:00, 13.02it/s]


Epoch 6, Loss: 0.3466489354324578


100%|██████████| 201/201 [00:15<00:00, 12.81it/s]


Epoch 7, Loss: 0.33520841998840445


100%|██████████| 201/201 [00:15<00:00, 12.81it/s]


Epoch 8, Loss: 0.33748906376349985


100%|██████████| 201/201 [00:16<00:00, 12.44it/s]


Epoch 9, Loss: 0.3265888925215498


100%|██████████| 201/201 [00:15<00:00, 12.61it/s]


Epoch 10, Loss: 0.31321633466292376
Training finished - Elapsed time: 158.53012371063232 s.


40it [00:02, 17.07it/s]


Accuracy on validation set: 83.86581469648563%


43it [00:02, 14.47it/s]


2 - ResNet

In [22]:
# Load a pre-trained resnet18 model
resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# Freeze it
for param in resnet.parameters():
    param.requires_grad = False
    
# Modify the last layer of ResNet to match the input size of the first linear layer
num_ftrs = resnet.fc.in_features
resnet.fc = nn.Identity()

# Define the sequential model with ResNet and additional linear layers
model = nn.Sequential(
    resnet,
    nn.Linear(num_ftrs, 512),   # First linear layer
    nn.ReLU(),                  # ReLU activation
    nn.Linear(512, 256),        # Second linear layer
    nn.ReLU(),                  # ReLU activation
    nn.Linear(256, 2)           # Output layer with output size of 2
)
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

Classifier2 = Classifier(model, criterion, optimizer,loader)
Classifier2.train()
Classifier2.validate()
Classifier2.predict('Resnet18')


100%|██████████| 201/201 [00:15<00:00, 12.70it/s]


Epoch 1, Loss: 0.5371244515649122


100%|██████████| 201/201 [00:15<00:00, 12.93it/s]


Epoch 2, Loss: 0.3796119381539264


100%|██████████| 201/201 [00:15<00:00, 12.95it/s]


Epoch 3, Loss: 0.3552131881909584


100%|██████████| 201/201 [00:15<00:00, 12.92it/s]


Epoch 4, Loss: 0.3330404180940704


100%|██████████| 201/201 [00:15<00:00, 12.85it/s]


Epoch 5, Loss: 0.33641766363856807


100%|██████████| 201/201 [00:15<00:00, 12.83it/s]


Epoch 6, Loss: 0.32405732646213836


100%|██████████| 201/201 [00:15<00:00, 12.84it/s]


Epoch 7, Loss: 0.3244410455226898


100%|██████████| 201/201 [00:15<00:00, 12.87it/s]


Epoch 8, Loss: 0.31206340939547883


100%|██████████| 201/201 [00:15<00:00, 12.83it/s]


Epoch 9, Loss: 0.315157765624535


100%|██████████| 201/201 [00:15<00:00, 12.98it/s]


Epoch 10, Loss: 0.3098829149532674
Training finished - Elapsed time: 156.21048378944397 s.


40it [00:03, 12.97it/s]


Accuracy on validation set: 87.30031948881789%


43it [00:03, 12.81it/s]


3 - VGG19

In [4]:
# get the "features" portion of VGG19 (we will not need the "classifier" portion)
vgg = models.vgg19(weights=models.VGG19_Weights.DEFAULT).features
vgg.eval()

# freeze all parameters
for param in vgg.parameters():
    param.requires_grad_(False)
    
# Define the sequential model with VGG and additional linear layers
model = nn.Sequential(
    vgg,
    nn.Flatten(),               
    nn.Linear(25088, 4096),     
    nn.ReLU(),                  
    nn.Dropout(0.5),            
    nn.Linear(4096, 120),      
    nn.ReLU(),                 
    nn.Dropout(0.5),            
    nn.Linear(120, 2)         
)
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
criterion = nn.CrossEntropyLoss()
Classifier3 = Classifier(model, criterion, optimizer,loader)
Classifier3.train()
Classifier3.validate()
Classifier3.predict('VGG19')

100%|██████████| 201/201 [00:52<00:00,  3.83it/s]


Epoch 1, Loss: 0.43497797327848214


100%|██████████| 201/201 [00:44<00:00,  4.55it/s]


Epoch 2, Loss: 0.32485222571821354


100%|██████████| 201/201 [00:44<00:00,  4.54it/s]


Epoch 3, Loss: 0.29901145873081625


100%|██████████| 201/201 [00:45<00:00,  4.47it/s]


Epoch 4, Loss: 0.2774289721873269


100%|██████████| 201/201 [00:49<00:00,  4.06it/s]


Epoch 5, Loss: 0.26203544471246093


100%|██████████| 201/201 [00:44<00:00,  4.53it/s]


Epoch 6, Loss: 0.2461916766504743


100%|██████████| 201/201 [00:44<00:00,  4.56it/s]


Epoch 7, Loss: 0.23423738147488873


100%|██████████| 201/201 [00:43<00:00,  4.57it/s]


Epoch 8, Loss: 0.22293791093339968


100%|██████████| 201/201 [00:43<00:00,  4.57it/s]


Epoch 9, Loss: 0.19925953299548496


100%|██████████| 201/201 [00:44<00:00,  4.56it/s]


Epoch 10, Loss: 0.1862778169895286
Training finished - Elapsed time: 455.8945097923279 s.


40it [00:08,  4.94it/s]


Accuracy on validation set: 85.78274760383387%


43it [00:08,  4.85it/s]


4 - State of the art : DINOv2

In [27]:
dinov2 = torch.hub.load("facebookresearch/dinov2", "dinov2_vits14")

# freeze all parameters
for param in dinov2.parameters():
    param.requires_grad_(False)
    
# Define the sequential model with VGG and additional linear layers
model = nn.Sequential(
    dinov2,
    nn.Linear(384, 512),   # First linear layer
    nn.ReLU(),                  # ReLU activation
    nn.Linear(512, 256),        # Second linear layer
    nn.ReLU(),                  # ReLU activation
    nn.Linear(256, 2)           # Output layer with output size of 2
)
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

Classifier4 = Classifier(model, criterion, optimizer,loader)
Classifier4.train()
Classifier4.validate()
Classifier4.predict('Dinov2')

Using cache found in /home/quoniam/.cache/torch/hub/facebookresearch_dinov2_main
100%|██████████| 201/201 [00:28<00:00,  7.06it/s]


Epoch 1, Loss: 0.41485689543372956


100%|██████████| 201/201 [00:30<00:00,  6.68it/s]


Epoch 2, Loss: 0.310777237055017


100%|██████████| 201/201 [00:34<00:00,  5.89it/s]


Epoch 3, Loss: 0.28709890319043724


100%|██████████| 201/201 [00:31<00:00,  6.28it/s]


Epoch 4, Loss: 0.2803039468491255


100%|██████████| 201/201 [00:31<00:00,  6.39it/s]


Epoch 5, Loss: 0.25701381318011685


100%|██████████| 201/201 [00:31<00:00,  6.39it/s]


Epoch 6, Loss: 0.2519658329474985


100%|██████████| 201/201 [00:30<00:00,  6.59it/s]


Epoch 7, Loss: 0.24342039283087005


100%|██████████| 201/201 [00:31<00:00,  6.46it/s]


Epoch 8, Loss: 0.23745266670611367


100%|██████████| 201/201 [00:30<00:00,  6.65it/s]


Epoch 9, Loss: 0.22375167795081638


100%|██████████| 201/201 [00:30<00:00,  6.66it/s]


Epoch 10, Loss: 0.2172925591061068
Training finished - Elapsed time: 309.6904399394989 s.


40it [00:05,  6.85it/s]


Accuracy on validation set: 88.81789137380191%


43it [00:06,  6.64it/s]


Let's now try to save vgg19 features and then loading them in the training process

In [5]:
def get_features(loader,name,model,model_name):
    if os.path.exists(f'.features/{model_name}/{name}.pt'):
        return torch.load(f'.features/{model_name}/{name}.pt')
    
    features = []
    for batch, _ in tqdm(loader, desc=f'Extracting features from {name}'):
        batch = batch.to(device)
        # Extract the features
        with torch.no_grad():
            feature_batch = model(batch)
        features.append(feature_batch)
    features = torch.cat(features)
    if not os.path.exists('.features'):
        os.makedirs('.features')
    if not os.path.exists(f'.features/{model_name}'):
        os.makedirs(f'.features/{model_name}')
    torch.save(features,f'.features/{model_name}/{name}.pt')
    return features

train_features = get_features(train_loader,'train',vgg,'vgg')
print(train_features.shape)
val_features = get_features(val_loader,'val',vgg,'vgg')
test_features = get_features(test_loader,'test',vgg,'vgg')

# Define the sequential model with VGG and additional linear layers
model = nn.Sequential(
    nn.Flatten(),               # Flatten the output of VGG
    nn.Linear(25088, 4096),     # First linear layer
    nn.ReLU(),                  # ReLU activation
    nn.Dropout(0.5),            # Dropout with p=0.5
    nn.Linear(4096, 120),      # Second linear layer
    nn.ReLU(),                  # ReLU activation
    nn.Dropout(0.5),            # Dropout with p=0.5
    nn.Linear(120, 2)          # Output layer with output size of 2
)
model.to(device)

optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

Classifier5 = Classifier(model, criterion, optimizer,loader,'.features/vgg')
Classifier5.train()
Classifier5.validate()
Classifier5.predict('VGG19_saved_features')

torch.Size([6426, 512, 7, 7])


100%|██████████| 201/201 [00:15<00:00, 12.86it/s]


Epoch 1, Loss: 0.6955374010759799


100%|██████████| 201/201 [00:15<00:00, 12.78it/s]


Epoch 2, Loss: 0.6946930568016584


100%|██████████| 201/201 [00:15<00:00, 12.77it/s]


Epoch 3, Loss: 0.694805552117267


100%|██████████| 201/201 [00:15<00:00, 12.76it/s]


Epoch 4, Loss: 0.6941017876217022


100%|██████████| 201/201 [00:15<00:00, 12.90it/s]


Epoch 5, Loss: 0.6937692052689358


100%|██████████| 201/201 [00:15<00:00, 13.10it/s]


Epoch 6, Loss: 0.6930531966152476


100%|██████████| 201/201 [00:15<00:00, 12.97it/s]


Epoch 7, Loss: 0.693863444363893


100%|██████████| 201/201 [00:15<00:00, 13.00it/s]


Epoch 8, Loss: 0.6936160076909991


100%|██████████| 201/201 [00:16<00:00, 11.89it/s]


Epoch 9, Loss: 0.6933531458698102


100%|██████████| 201/201 [00:15<00:00, 13.23it/s]


Epoch 10, Loss: 0.6935973787189122
Training finished - Elapsed time: 156.8512303829193 s.


40it [00:02, 17.46it/s]


Accuracy on validation set: 43.45047923322684%


43it [00:02, 15.62it/s]


Now, the loss won't go down. Why?


torch.Size([6426, 25088])
