# 1. Install package

In [1]:
! mkdir models

mkdir: cannot create directory ‘models’: File exists


In [1]:
! rm -rf training-data-retrieval
! git clone https://github.com/artemgalyan/training-data-retrieval.git
! cd training-data-retrieval && pip install -q .

Cloning into 'training-data-retrieval'...
remote: Enumerating objects: 92, done.[K
remote: Counting objects: 100% (92/92), done.[K
remote: Compressing objects: 100% (56/56), done.[K
remote: Total 92 (delta 48), reused 75 (delta 31), pack-reused 0 (from 0)[K
Receiving objects: 100% (92/92), 11.42 KiB | 1.27 MiB/s, done.
Resolving deltas: 100% (48/48), done.
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for src (pyproject.toml) ... [?25l[?25hdone


# 2. Dataset

## 2.1 Download dataset

In [2]:
! gdown 1zEacYbvBOo3PWZtf9g0ONbTwLos0Kl9u
! unzip -q histology.zip

Downloading...
From (original): https://drive.google.com/uc?id=1zEacYbvBOo3PWZtf9g0ONbTwLos0Kl9u
From (redirected): https://drive.google.com/uc?id=1zEacYbvBOo3PWZtf9g0ONbTwLos0Kl9u&confirm=t&uuid=d4e15b51-4b8f-4cf7-aa4f-0372f0aa81ed
To: /content/histology.zip
100% 2.49G/2.49G [00:37<00:00, 66.5MB/s]


## 2.2 Dataset classes

In [2]:
USED_CLASSES = ['epi', 'nrm']

In [3]:
from glob import glob

import cv2

from torch import Tensor
from torch.utils.data import Dataset


class HistologyDataset(Dataset):
  def __init__(self, split: str, used_classes: list[str], transforms) -> None:
    self.split = split
    self.num_classes = len(used_classes)

    self.images = []
    self.labels = []

    self.classes = used_classes
    self.name_to_idx = {c: i for i, c in enumerate(self.classes)}
    self.transforms = transforms

    for clazz in self.classes:
      files = list(glob(f'{split}/{clazz}/*.png'))
      self.images.extend(files)
      self.labels.extend([clazz] * len(files))

  def __len__(self) -> int:
    return len(self.images)

  def __getitem__(self, idx: int):
    image = cv2.imread(self.images[idx])[..., ::-1].astype('float32') / 255
    label = self.name_to_idx[self.labels[idx]]
    return self.transforms(image), label

In [4]:
import torchvision.transforms.v2 as T

train_transforms = T.Compose([
    T.ToTensor(),
    T.RandomHorizontalFlip(),
    T.RandomRotation(degrees=15),
    T.RandomResizedCrop(size=(128, 128))
])

test_transforms = T.Compose([
    T.ToTensor(),
    T.Resize(size=(128, 128))
])


train_dataset = HistologyDataset('train', USED_CLASSES, train_transforms)
test_dataset = HistologyDataset('test', USED_CLASSES, test_transforms)
len(train_dataset), len(test_dataset)



(8403, 3600)

In [5]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=64, num_workers=2, shuffle=True, drop_last=True)
val_loader = DataLoader(test_dataset, batch_size=64, num_workers=2, shuffle=False)

# 3. Model

In [17]:
from torchinfo import summary

from src.models import ClassificationResNet

model = ClassificationResNet(
    num_classes=train_dataset.num_classes,
    configuration=[
        (8, 1), # 64x64
        (12, 1), # 32x32
        (20, 1), # 16x16
        (32, 1), # 8x8
        (48, 1), # 4x4
        (64, 1), # 2x2
    ],
    image_channels=3
)

summary(model, input_shape=(1, 3, 128, 128))

Layer (type:depth-idx)                        Param #
ClassificationResNet                          --
├─BCEWithLogitsLoss: 1-1                      --
├─Sequential: 1-2                             --
│    └─ModuleList: 2-1                        --
│    │    └─Conv2d: 3-1                       224
│    │    └─ResidualBlock: 3-2                3,128
│    │    └─ResidualBlock: 3-3                1,784
│    │    └─ResidualBlock: 3-4                6,228
│    │    └─ResidualBlock: 3-5                3,972
│    │    └─ResidualBlock: 3-6                15,500
│    │    └─ResidualBlock: 3-7                10,940
│    │    └─ResidualBlock: 3-8                37,088
│    │    └─ResidualBlock: 3-9                27,872
│    │    └─ResidualBlock: 3-10               80,208
│    │    └─ResidualBlock: 3-11               62,544
│    │    └─ResidualBlock: 3-12               139,712
│    │    └─ResidualBlock: 3-13               111,040
├─Linear: 1-3                                 18
Total params: 500

# 4. Training

In [18]:
NAME = 'res_net'

In [8]:
! wandb login

[34m[1mwandb[0m: Currently logged in as: [33mahalian[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [19]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [20]:
import lightning as L

from lightning.pytorch.callbacks import ModelCheckpoint, ProgressBar, StochasticWeightAveraging, EarlyStopping
from lightning.pytorch.loggers import WandbLogger


trainer = L.Trainer(
  accelerator=device,
  precision=16,
  max_epochs=30,
  logger=WandbLogger(
      project='Training data retrieval',
      name=NAME
  ),
  check_val_every_n_epoch=5,
  callbacks=[
      ModelCheckpoint(
          monitor='val_loss',
          dirpath='models',
          filename='{epoch}-{val_loss:.2f}',
          save_top_k=3,
          save_last=True
      ),
      ProgressBar(),
  ]
)

/usr/local/lib/python3.10/dist-packages/lightning/fabric/connector.py:571: `precision=16` is supported for historical reasons but its usage is discouraged. Please set your precision to 16-mixed instead!
INFO: Using 16bit Automatic Mixed Precision (AMP)
INFO:lightning.pytorch.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [21]:
trainer.fit(
    model=model,
    train_dataloaders=train_loader,
    val_dataloaders=val_loader
)

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/loggers/wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory /content/models exists and is not empty.
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name       | Type              | Params | Mode 
---------------------------------------------------------
0 | loss       | BCEWithLogitsLoss | 0      | train
1 | modules    | Sequential        | 500 K  | train
2 | classifier | Linear            | 18     | train
---------------------------------------------------------
500 K     Trainable params
0         Non-trainable params
500 K     Total params
2.001     Total estimated model p

TypeError: Module.modules() takes 1 positional argument but 3 were given