In [2]:
# unzip files
# !unzip "/home/jovyan/work/Assignment_4/files.zip" -d "./files"

In [4]:
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision as torchvision
from pytorch_lightning.callbacks import EarlyStopping
from torch.nn.functional import cross_entropy
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader, random_split
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR
from pytorch_lightning import loggers as pl_loggers
from torchmetrics import Accuracy
from pytorch_lightning import LightningModule, Trainer

In [15]:
# Exercise 4: Data Loading
class DataLoading:
    def __init__(self):
        self.filePath = "./files/files/"
        self.batch_size = 16

    def load_data(self):

        train_set = torchvision.datasets.ImageFolder(root=self.filePath + "train",
                                                     transform=torchvision.transforms.Compose(
                                                         [torchvision.transforms.ToTensor(),
                                                          torchvision.transforms.Resize((224,224))]))

        test_set = torchvision.datasets.ImageFolder(root=self.filePath + "test",
                                                    transform=torchvision.transforms.Compose(
                                                        [torchvision.transforms.ToTensor(),
                                                         torchvision.transforms.Resize((224,224))]))
        trainlength = round(0.9 * len(train_set))
        vallength = round(0.1 * len(train_set))
        train_set, val_set = random_split(train_set, [trainlength, vallength])

        train_loader = DataLoader(train_set, batch_size=self.batch_size, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=self.batch_size, shuffle=False)
        test_loader = DataLoader(test_set, batch_size=self.batch_size, shuffle=False)

        return train_loader, val_loader, test_loader

In [17]:
class CustomVGG16(pl.LightningModule):
# Exercise 1: Convolutional Neural Network Architecture Definition
    def __init__(self):
        super(CustomVGG16, self).__init__()
        
        # Load the Pretrained VGG16 model
        self.vgg16 = torchvision.models.vgg16(pretrained=True)
        
        # Freeze the layers of the pretrained VGG16 model
        for param in self.vgg16.parameters():
            param.requires_grad = False
        
        # Replace the last layer of the pre-trained VGG16 model with a custom fully connected 
        # layer containing the appropriate number of output neurons w.r.t. classes
        in_features = self.vgg16.classifier[-1].in_features       
        self.vgg16.classifier[-1] = nn.Linear(in_features=4096, out_features=5)
        
        self.train_acc = Accuracy(task="multiclass", num_classes=5)
        self.val_acc = Accuracy(task="multiclass", num_classes=5)
        self.test_acc = Accuracy(task="multiclass", num_classes=5)
        self.train_acc_history = []
        self.test_step_outputs = []
        
    def forward(self, x):
        return self.vgg16(x)
    
# Exercise 2: Optimizer
    def configure_optimizers(self):
        optimizer = Adam(self.parameters(), lr=0.01)
        lr_scheduler = StepLR(optimizer=optimizer, step_size=1)
        return [optimizer], [lr_scheduler]
    
# Exercise 3: Training, Validation and Test Step
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = cross_entropy(y_hat, y)
        
        # Update train accuracy
        self.train_acc(y_hat, y)
        
        # Log training loss and accuracy
        self.log("train_loss", loss, on_step=True)
        self.log("train_acc", self.train_acc, on_epoch=True, prog_bar=True)
        
        self.train_acc(y_hat, y)
        return loss
    
    def on_train_epoch_end(self):
        # Compute and log the train accuracy for the epoch
        train_acc_epoch = self.train_acc.compute()
        # Store the accuracy in the history list
        self.train_acc_history.append(train_acc_epoch.item())
        print(f"Epoch {self.current_epoch} - Train Accuracy: {train_acc_epoch:.4f}")
        # Reset metric for the next epoch
        self.train_acc.reset()
        
    def on_train_end(self):
        # Log the max train accuracy achieved
        max_train_acc = max(self.train_acc_history)
        print(f"Max Train Accuracy Achieved: {max_train_acc:.4f}")

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        val_loss = cross_entropy(y_hat, y)
        self.val_acc(y_hat, y)
        self.log("val_loss", val_loss, on_step=False, on_epoch=True)
        self.log("val_acc", self.val_acc, on_epoch=True)
        
    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        test_loss = cross_entropy(y_hat, y)
        self.log("test_loss", test_loss, on_step=False, on_epoch=True)
        self.log("test_acc", self.test_acc, on_step=False, on_epoch=True)
        self.test_step_outputs.append({"test_loss": test_loss, "test_acc": self.test_acc(y_hat, y)})

    def on_test_epoch_end(self):
        avg_loss = torch.stack([x["test_loss"] for x in self.test_step_outputs]).mean()
        avg_acc = torch.stack([x["test_acc"] for x in self.test_step_outputs]).mean()
        self.log("avg_test_loss", avg_loss)
        self.log("avg_test_acc", avg_acc)
        self.test_step_outputs.clear()  # Clear the outputs after logging
            
if __name__ == "__main__":
    # Main function of script
    num_epochs = 100
    
    data_load = DataLoading()
    train_loader, val_loader, test_loader = data_load.load_data()
    
# Exercise 5: Training and Evaluation
    
    model = CustomVGG16()
    
    tb_logger = pl_loggers.TensorBoardLogger(save_dir="logs/")
        
    trainer = Trainer(devices=1, accelerator="auto", log_every_n_steps=10, max_epochs=num_epochs, logger=tb_logger,
                         callbacks=EarlyStopping(monitor="val_loss", patience=5))

    trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)
    trainer.test(ckpt_path="best", dataloaders=test_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params
-------------------------------------------------
0 | vgg16     | VGG                | 134 M 
1 | train_acc | MulticlassAccuracy | 0     
2 | val_acc   | MulticlassAccuracy | 0     
3 | test_acc  | MulticlassAccuracy | 0     
-------------------------------------------------
20.5 K    Trainable params
134 M     Non-trainable params
134 M     Total params
537.124   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=127` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 0 - Train Accuracy: 0.5514


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 1 - Train Accuracy: 0.7516


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 2 - Train Accuracy: 0.7906


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 3 - Train Accuracy: 0.7997


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 4 - Train Accuracy: 0.7958


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 5 - Train Accuracy: 0.7867


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 6 - Train Accuracy: 0.7893


Validation: |          | 0/? [00:00<?, ?it/s]

Epoch 7 - Train Accuracy: 0.8075
Max Train Accuracy Achieved: 0.8075


Restoring states from the checkpoint path at logs/lightning_logs/version_1/checkpoints/epoch=7-step=392.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at logs/lightning_logs/version_1/checkpoints/epoch=7-step=392.ckpt
/opt/conda/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=127` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

In [None]:
# Exercise 6: Results
# Train Accuracy: 80.75%
# Test Accuracy: 64.44%