In [18]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms.v2 as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

import mlflow
import lightning as L

from functools import partial

In [19]:
ROOT_PATH = "./datasets/classification/"
TRAIN_PATH = ROOT_PATH + "train/"
VAL_PATH = ROOT_PATH + "val/"
IMAGE_SIZE = (128, 128)

In [20]:
to_tensor = transforms.Compose([
    transforms.ToImage(),
    transforms.ToDtype(torch.float32, scale=True),
])

train_transforms = transforms.Compose([
    to_tensor,
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=15),
])

val_transforms = transforms.Compose([
    to_tensor,
    transforms.Resize(IMAGE_SIZE),
])

In [21]:
train_dataset = ImageFolder(TRAIN_PATH, transform=train_transforms)
val_dataset = ImageFolder(VAL_PATH, transform=val_transforms)

In [22]:
BATCH_SIZE = 32

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=11)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=11)

In [39]:
conv2d = partial(nn.Conv2d, kernel_size=3, stride=1, padding="same")

class ConvBlock(nn.Module):
    def __init__(self, inp, l1, l2):
        super().__init__()
        self.layers = nn.Sequential(
            conv2d(inp, l1),
            nn.ReLU(),
            nn.BatchNorm2d(l1),
            conv2d(l1, l2),
            nn.ReLU(),
            nn.BatchNorm2d(l2),
            nn.MaxPool2d(2),
        )
    
    def forward(self, x):
        return self.layers(x)


class CNN(L.LightningModule):
    def __init__(self, lr, momentum, criterion=F.cross_entropy):
        super().__init__()
        
        self.save_hyperparameters()
        
        self.layers = nn.Sequential(
            ConvBlock(3, 8, 16),
            ConvBlock(16, 32, 64),
            ConvBlock(64, 128, 256),
            nn.AvgPool2d(16),
            nn.Flatten(),
            nn.Linear(256, 5)
        )
        
        self.step_losses = []
    
    def forward(self, x):
        return self.layers(x)
    
    def training_step(self, batch, batch_idx):
        X, y = batch
        y_hat = self(X)
        loss = self.hparams.criterion(y_hat, y)
        self.step_losses.append(loss)
        return loss
    
    def configure_optimizers(self):
        optimizer = optim.SGD(self.parameters(), lr=self.hparams.lr, momentum=self.hparams.momentum)
        return optimizer
    
    def on_train_epoch_end(self):
        mean_loss = torch.stack(self.step_losses).mean()
        mlflow.log_metric("train_loss", mean_loss.item(), step=self.current_epoch)

In [29]:
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("/cv-project-1")

2025/02/01 04:41:26 INFO mlflow.tracking.fluent: Experiment with name '/cv-project-1' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/235196971337957232', creation_time=1738377686377, experiment_id='235196971337957232', last_update_time=1738377686377, lifecycle_stage='active', name='/cv-project-1', tags={}>

In [40]:
model = CNN(0.01, 0.9)
trainer = L.Trainer(max_epochs=10)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [41]:
with mlflow.start_run() as run:
    params = {
        "epochs": trainer.max_epochs,
        "lr": model.hparams.lr,
        "momentum": model.hparams.momentum,
        "criterion": model.hparams.criterion.__name__,
        "optimizer": "SGD",
    }
    
    mlflow.log_params(params)
    
    from torchinfo import summary
    
    with open("model_summary.txt", "w") as f:
        f.write(str(summary(model)))
    
    trainer.fit(model=model, train_dataloaders=train_dataloader)
    
    mlflow.pytorch.log_model(model, "model")

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name   | Type       | Params | Mode 
----------------------------------------------
0 | layers | Sequential | 395 K  | train
----------------------------------------------
395 K     Trainable params
0         Non-trainable params
395 K     Total params
1.583     Total estimated model params size (MB)
31        Modules in train mode
0         Modules in eval mode
/home/abdelazizwf/.pyenv/versions/3.11.4/envs/cv_project/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (37) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 9: 100%|██████████| 37/37 [00:04<00:00,  8.95it/s, v_num=3]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 37/37 [00:04<00:00,  8.77it/s, v_num=3]




🏃 View run gentle-shark-403 at: http://localhost:5000/#/experiments/235196971337957232/runs/bbf7f02d220248da9b422c5a20498525
🧪 View experiment at: http://localhost:5000/#/experiments/235196971337957232
