# ResNET
#### Experiments using Resnet models and fine-tuning with image dataset.

In [1]:
from scripts.models.resnet import ResNetClassifier

In [2]:
import os

from pathlib import Path

from dotenv import load_dotenv
load_dotenv()
root_data = os.getenv("KAGGLE_FILES_DIR")
dataset_path = Path(os.getcwd(), "..", root_data, 'processed')

#### Prepare trainer, callbacks from Pytorch Lightning

In [3]:
import pytorch_lightning as pl

checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet18",
    filename="resnet18-model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

# Instantiate Lightning trainer and train model
trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


### ResNet18 Model
- binary classification
- optimizer: Adam
- learning rate: 1e-4
- tune all layers
- loss function: BCEWithLogitsLoss

In [4]:
resnet18 = ResNetClassifier(
    num_classes=1,
    resnet_version=18,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adam",
    lr=1e-4,
    batch_size=32,
    tune_fc_only=False,
)


In [7]:
trainer.fit(resnet18)


  | Name         | Type              | Params
---------------------------------------------------
0 | loss_fn      | BCEWithLogitsLoss | 0     
1 | accuracy     | BinaryAccuracy    | 0     
2 | resnet_model | ResNet            | 11.2 M
---------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.708    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.560


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.012 >= min_delta = 0.001. New best score: 0.548


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.019 >= min_delta = 0.001. New best score: 0.529


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.031 >= min_delta = 0.001. New best score: 0.498


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.017 >= min_delta = 0.001. New best score: 0.480


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.006 >= min_delta = 0.001. New best score: 0.475


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.001. New best score: 0.473


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.010 >= min_delta = 0.001. New best score: 0.463


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.001. New best score: 0.461


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.461. Signaling Trainer to stop.


In [8]:
trainer.test(resnet18)

Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.6620050668716431, 'test_acc': 0.7771162390708923}]

Results are promising, but we can try to improve the model by fine-tuning the last layer only.

In [12]:

checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet18-fc-only",
    filename="resnet18-model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

# Instantiate Lightning trainer and train model
trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


resnet18 = ResNetClassifier(
    num_classes=1,
    resnet_version=18,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adam",
    lr=1e-4,
    batch_size=32,
    tune_fc_only=True,
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [13]:
trainer.fit(resnet18)


  | Name         | Type              | Params
---------------------------------------------------
0 | loss_fn      | BCEWithLogitsLoss | 0     
1 | accuracy     | BinaryAccuracy    | 0     
2 | resnet_model | ResNet            | 11.2 M
---------------------------------------------------
513       Trainable params
11.2 M    Non-trainable params
11.2 M    Total params
44.708    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.582


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.001. New best score: 0.580


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.001. New best score: 0.579


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.001. New best score: 0.577


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.001. New best score: 0.576


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.576. Signaling Trainer to stop.


In [14]:
trainer.test(resnet18)

Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.5764658451080322, 'test_acc': 0.7273219227790833}]

Results are worse, so return with training all layers.
Next, I check SGD optimizer.


In [20]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet18-sgd",
    filename="resnet18-model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


resnet18 = ResNetClassifier(
    num_classes=1,
    resnet_version=18,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="sgd",
    lr=1e-4,
    batch_size=32,
    tune_fc_only=False,
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [17]:
trainer.fit(resnet18)


  | Name         | Type              | Params
---------------------------------------------------
0 | loss_fn      | BCEWithLogitsLoss | 0     
1 | accuracy     | BinaryAccuracy    | 0     
2 | resnet_model | ResNet            | 11.2 M
---------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.708    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.582


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.003 >= min_delta = 0.001. New best score: 0.579


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.001. New best score: 0.578


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.001. New best score: 0.576


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.001. New best score: 0.575


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.001. New best score: 0.574


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.001. New best score: 0.572


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.001. New best score: 0.571


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.001. New best score: 0.569


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.001. New best score: 0.568


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.001. New best score: 0.566


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.001. New best score: 0.565


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.001. New best score: 0.564


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.001. New best score: 0.563


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.001. New best score: 0.561


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.561. Signaling Trainer to stop.


In [18]:
trainer.test(resnet18)

Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.5607972145080566, 'test_acc': 0.7389045357704163}]

Results are worse and training time is much longer, so now with AdamW optimizer.

In [5]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet18-adamw",
    filename="resnet18-model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


resnet18 = ResNetClassifier(
    num_classes=1,
    resnet_version=18,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adamw",
    lr=1e-4,
    batch_size=32,
    tune_fc_only=False,
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [6]:
trainer.fit(resnet18)


  | Name         | Type              | Params
---------------------------------------------------
0 | loss_fn      | BCEWithLogitsLoss | 0     
1 | accuracy     | BinaryAccuracy    | 0     
2 | resnet_model | ResNet            | 11.2 M
---------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.708    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.570


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.017 >= min_delta = 0.001. New best score: 0.553


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.009 >= min_delta = 0.001. New best score: 0.544


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.011 >= min_delta = 0.001. New best score: 0.534


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.001. New best score: 0.532


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.032 >= min_delta = 0.001. New best score: 0.500


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.027 >= min_delta = 0.001. New best score: 0.473


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.011 >= min_delta = 0.001. New best score: 0.462


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.462. Signaling Trainer to stop.


In [7]:
trainer.test(resnet18)

Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.5112970471382141, 'test_acc': 0.7824204564094543}]

Accuracy 0.78 - a bit better than Adam optimizer. So AdamW stays.  
Next I will try learning rate od 1e-3.

In [8]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet18-adamw-1e-3",
    filename="resnet18-model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


resnet18 = ResNetClassifier(
    num_classes=1,
    resnet_version=18,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adamw",
    lr=1e-3,
    batch_size=32,
    tune_fc_only=False,
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [9]:
trainer.fit(resnet18)


  | Name         | Type              | Params
---------------------------------------------------
0 | loss_fn      | BCEWithLogitsLoss | 0     
1 | accuracy     | BinaryAccuracy    | 0     
2 | resnet_model | ResNet            | 11.2 M
---------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.708    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.591


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.004 >= min_delta = 0.001. New best score: 0.587


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.037 >= min_delta = 0.001. New best score: 0.550


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.006 >= min_delta = 0.001. New best score: 0.544


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.027 >= min_delta = 0.001. New best score: 0.517


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.001. New best score: 0.512


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.023 >= min_delta = 0.001. New best score: 0.489


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.007 >= min_delta = 0.001. New best score: 0.482


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.016 >= min_delta = 0.001. New best score: 0.466


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.001. New best score: 0.461


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.461. Signaling Trainer to stop.


In [12]:
trainer.test(resnet18)

Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.4995814263820648, 'test_acc': 0.7792812585830688}]

Previous model has the same results but with much shorter training time.
So I will stick with 1e-4 learning rate, AdamW optimizer and training all layers.
Next is changing ResNet size.

In [14]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet34-adamw",
    filename="resnet34-model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


resnet34 = ResNetClassifier(
    num_classes=1,
    resnet_version=34,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adamw",
    lr=1e-4,
    batch_size=32,
    tune_fc_only=False,
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [15]:
trainer.fit(resnet34)


  | Name         | Type              | Params
---------------------------------------------------
0 | loss_fn      | BCEWithLogitsLoss | 0     
1 | accuracy     | BinaryAccuracy    | 0     
2 | resnet_model | ResNet            | 21.3 M
---------------------------------------------------
21.3 M    Trainable params
0         Non-trainable params
21.3 M    Total params
85.141    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.558


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.006 >= min_delta = 0.001. New best score: 0.552


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.021 >= min_delta = 0.001. New best score: 0.531


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.036 >= min_delta = 0.001. New best score: 0.495


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.011 >= min_delta = 0.001. New best score: 0.484


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.023 >= min_delta = 0.001. New best score: 0.461


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.001. New best score: 0.459


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.001. New best score: 0.454


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.454. Signaling Trainer to stop.


In [16]:
trainer.test(resnet34)

Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.47373509407043457, 'test_acc': 0.7960597276687622}]

Result are better, but not much.
I will try ResNet50 model and decide on the best model.

In [6]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet50-adamw",
    filename="resnet50-model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


resnet50 = ResNetClassifier(
    num_classes=1,
    resnet_version=50,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adamw",
    lr=1e-4,
    batch_size=32,
    tune_fc_only=False,
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [7]:
trainer.fit(resnet50)


  | Name         | Type              | Params
---------------------------------------------------
0 | loss_fn      | BCEWithLogitsLoss | 0     
1 | accuracy     | BinaryAccuracy    | 0     
2 | resnet_model | ResNet            | 23.5 M
---------------------------------------------------
23.5 M    Trainable params
0         Non-trainable params
23.5 M    Total params
94.040    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.564


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.001. New best score: 0.560


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.012 >= min_delta = 0.001. New best score: 0.547


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.010 >= min_delta = 0.001. New best score: 0.537


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.010 >= min_delta = 0.001. New best score: 0.528


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.038 >= min_delta = 0.001. New best score: 0.489


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.013 >= min_delta = 0.001. New best score: 0.477


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.009 >= min_delta = 0.001. New best score: 0.467


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.003 >= min_delta = 0.001. New best score: 0.464


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.464. Signaling Trainer to stop.


In [8]:
trainer.test(resnet50)

Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.5501084923744202, 'test_acc': 0.7820956707000732}]

## ResNet18 with weights

In [5]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet18-adamw-weighted",
    filename="model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


resnet = ResNetClassifier(
    num_classes=1,
    resnet_version=18,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adamw",
    lr=1e-4,
    batch_size=32,
    tune_fc_only=False,
    pos_weight=2.66,
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(resnet)

/Users/isulim/Sages/retino-cnn/.venv/lib/python3.12/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:653: Checkpoint directory /Users/isulim/Sages/retino-cnn/experiments/models/resnet18-adamw-weighted exists and is not empty.

  | Name         | Type              | Params
---------------------------------------------------
0 | loss_fn      | BCEWithLogitsLoss | 0     
1 | accuracy     | BinaryAccuracy    | 0     
2 | precision    | BinaryPrecision   | 0     
3 | recall       | BinaryRecall      | 0     
4 | f1           | BinaryF1Score     | 0     
5 | resnet_model | ResNet            | 11.2 M
---------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.708    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.955


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.004 >= min_delta = 0.001. New best score: 0.952


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.038 >= min_delta = 0.001. New best score: 0.914


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.051 >= min_delta = 0.001. New best score: 0.863


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.026 >= min_delta = 0.001. New best score: 0.837


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.010 >= min_delta = 0.001. New best score: 0.827


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.025 >= min_delta = 0.001. New best score: 0.802


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.001. New best score: 0.798


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.798. Signaling Trainer to stop.


In [5]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet18-adamw-weighted-binary-metrics",
    filename="model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


resnet = ResNetClassifier(
    num_classes=1,
    resnet_version=18,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adamw",
    lr=1e-4,
    batch_size=32,
    tune_fc_only=False,
    pos_weight=2.66,
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [6]:
trainer.fit(resnet)


  | Name         | Type              | Params
---------------------------------------------------
0 | loss_fn      | BCEWithLogitsLoss | 0     
1 | accuracy     | BinaryAccuracy    | 0     
2 | precision    | BinaryPrecision   | 0     
3 | recall       | BinaryRecall      | 0     
4 | f1           | BinaryF1Score     | 0     
5 | resnet_model | ResNet            | 11.2 M
---------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.708    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.989


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.036 >= min_delta = 0.001. New best score: 0.954


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.014 >= min_delta = 0.001. New best score: 0.940


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.019 >= min_delta = 0.001. New best score: 0.921


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.034 >= min_delta = 0.001. New best score: 0.888


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.047 >= min_delta = 0.001. New best score: 0.841


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.014 >= min_delta = 0.001. New best score: 0.827


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.008 >= min_delta = 0.001. New best score: 0.819


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.015 >= min_delta = 0.001. New best score: 0.804


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.804. Signaling Trainer to stop.


In [7]:
trainer.test(resnet)

Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.9600087404251099,
  'test_acc': 0.7606624960899353,
  'test_prec': 0.27603375911712646,
  'test_rec': 0.14332106709480286,
  'test_f1': 0.18669018149375916}]

In [4]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet18/adamw-binary-metrics",
    filename="model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


resnet = ResNetClassifier(
    num_classes=1,
    resnet_version=18,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adamw",
    lr=1e-4,
    batch_size=32,
    tune_fc_only=False,
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [5]:
trainer.fit(resnet)

/Users/isulim/Sages/retino-cnn/.venv/lib/python3.12/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:653: Checkpoint directory /Users/isulim/Sages/retino-cnn/experiments/models/resnet18/adamw-binary-metrics exists and is not empty.

  | Name         | Type              | Params
---------------------------------------------------
0 | loss_fn      | BCEWithLogitsLoss | 0     
1 | accuracy     | BinaryAccuracy    | 0     
2 | resnet_model | ResNet            | 11.2 M
---------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.708    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.553


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.001. New best score: 0.548


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.007 >= min_delta = 0.001. New best score: 0.541


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.036 >= min_delta = 0.001. New best score: 0.504


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.011 >= min_delta = 0.001. New best score: 0.493


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.024 >= min_delta = 0.001. New best score: 0.470


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.008 >= min_delta = 0.001. New best score: 0.462


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.462. Signaling Trainer to stop.


In [6]:
trainer.test(resnet)

Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.4916456341743469, 'test_acc': 0.7943277955055237}]

In [7]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet18/adamw/binary-metrics/1e3",
    filename="model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


resnet18_3 = ResNetClassifier(
    num_classes=1,
    resnet_version=18,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adamw",
    lr=1e-3,
    batch_size=32,
    tune_fc_only=False,
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [8]:
trainer.fit(resnet18_3)

trainer.test(resnet18_3)

/Users/isulim/Sages/retino-cnn/.venv/lib/python3.12/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:653: Checkpoint directory /Users/isulim/Sages/retino-cnn/experiments/models/resnet18/adamw/binary-metrics/1e3 exists and is not empty.

  | Name         | Type              | Params
---------------------------------------------------
0 | loss_fn      | BCEWithLogitsLoss | 0     
1 | accuracy     | BinaryAccuracy    | 0     
2 | resnet_model | ResNet            | 11.2 M
---------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.708    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.583


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.019 >= min_delta = 0.001. New best score: 0.564


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.001. New best score: 0.559


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.001. New best score: 0.554


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.004 >= min_delta = 0.001. New best score: 0.549


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.004 >= min_delta = 0.001. New best score: 0.545


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.001. New best score: 0.540


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.015 >= min_delta = 0.001. New best score: 0.525


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.012 >= min_delta = 0.001. New best score: 0.513


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.003 >= min_delta = 0.001. New best score: 0.510


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.024 >= min_delta = 0.001. New best score: 0.487


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.006 >= min_delta = 0.001. New best score: 0.481


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.006 >= min_delta = 0.001. New best score: 0.475


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.007 >= min_delta = 0.001. New best score: 0.469


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.469. Signaling Trainer to stop.


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.534894585609436, 'test_acc': 0.7764667868614197}]

In [9]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet34/adamw/binary-metrics/1e3",
    filename="model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


resnet34_adamw_1e3 = ResNetClassifier(
    num_classes=1,
    resnet_version=34,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adamw",
    lr=1e-3,
    batch_size=32,
    tune_fc_only=False,
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [10]:
trainer.fit(resnet34_adamw_1e3)

trainer.test(resnet34_adamw_1e3)

/Users/isulim/Sages/retino-cnn/.venv/lib/python3.12/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:653: Checkpoint directory /Users/isulim/Sages/retino-cnn/experiments/models/resnet34/adamw/binary-metrics/1e3 exists and is not empty.

  | Name         | Type              | Params
---------------------------------------------------
0 | loss_fn      | BCEWithLogitsLoss | 0     
1 | accuracy     | BinaryAccuracy    | 0     
2 | resnet_model | ResNet            | 21.3 M
---------------------------------------------------
21.3 M    Trainable params
0         Non-trainable params
21.3 M    Total params
85.141    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.618


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.035 >= min_delta = 0.001. New best score: 0.583


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.001. New best score: 0.578


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.015 >= min_delta = 0.001. New best score: 0.563


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.001. New best score: 0.558


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.002 >= min_delta = 0.001. New best score: 0.556


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.001. New best score: 0.555


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.003 >= min_delta = 0.001. New best score: 0.552


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.009 >= min_delta = 0.001. New best score: 0.543


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.031 >= min_delta = 0.001. New best score: 0.512


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.014 >= min_delta = 0.001. New best score: 0.498


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.013 >= min_delta = 0.001. New best score: 0.486


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.016 >= min_delta = 0.001. New best score: 0.469


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.011 >= min_delta = 0.001. New best score: 0.458


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.458. Signaling Trainer to stop.


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.480674147605896, 'test_acc': 0.7924875617027283}]

In [11]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet34/adam/binary-metrics/1e3",
    filename="model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


resnet34_adam_1e3 = ResNetClassifier(
    num_classes=1,
    resnet_version=34,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adam",
    lr=1e-3,
    batch_size=32,
    tune_fc_only=False,
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [12]:
trainer.fit(resnet34_adam_1e3)

trainer.test(resnet34_adam_1e3)


  | Name         | Type              | Params
---------------------------------------------------
0 | loss_fn      | BCEWithLogitsLoss | 0     
1 | accuracy     | BinaryAccuracy    | 0     
2 | resnet_model | ResNet            | 21.3 M
---------------------------------------------------
21.3 M    Trainable params
0         Non-trainable params
21.3 M    Total params
85.141    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.580


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.021 >= min_delta = 0.001. New best score: 0.559


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.001 >= min_delta = 0.001. New best score: 0.558


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.004 >= min_delta = 0.001. New best score: 0.554


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.004 >= min_delta = 0.001. New best score: 0.550


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.024 >= min_delta = 0.001. New best score: 0.527


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.023 >= min_delta = 0.001. New best score: 0.504


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.012 >= min_delta = 0.001. New best score: 0.491


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.017 >= min_delta = 0.001. New best score: 0.475


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.008 >= min_delta = 0.001. New best score: 0.467


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.009 >= min_delta = 0.001. New best score: 0.458


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.008 >= min_delta = 0.001. New best score: 0.450


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.450. Signaling Trainer to stop.


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.49680984020233154, 'test_acc': 0.7715955972671509}]

In [13]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet34/adam/binary-metrics/1e4",
    filename="model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


resnet34_adam_1e4 = ResNetClassifier(
    num_classes=1,
    resnet_version=34,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adam",
    lr=1e-4,
    batch_size=32,
    tune_fc_only=False,
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [14]:
trainer.fit(resnet34_adam_1e4)

trainer.test(resnet34_adam_1e4)


  | Name         | Type              | Params
---------------------------------------------------
0 | loss_fn      | BCEWithLogitsLoss | 0     
1 | accuracy     | BinaryAccuracy    | 0     
2 | resnet_model | ResNet            | 21.3 M
---------------------------------------------------
21.3 M    Trainable params
0         Non-trainable params
21.3 M    Total params
85.141    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.555


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.006 >= min_delta = 0.001. New best score: 0.549


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.010 >= min_delta = 0.001. New best score: 0.539


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.010 >= min_delta = 0.001. New best score: 0.529


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.005 >= min_delta = 0.001. New best score: 0.525


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.030 >= min_delta = 0.001. New best score: 0.494


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.019 >= min_delta = 0.001. New best score: 0.475


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.008 >= min_delta = 0.001. New best score: 0.467


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.008 >= min_delta = 0.001. New best score: 0.459


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.459. Signaling Trainer to stop.


Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.4854128360748291, 'test_acc': 0.8033124208450317}]

Resnet50 model gave similar result as Resnet34, but training time was much longer.

Next, I try to use pretrained ResNet model with additional Sigmoid layer after FC layer.

In [1]:
import os
import pytorch_lightning as pl
from pathlib import Path

from dotenv import load_dotenv
load_dotenv()
root_data = os.getenv("KAGGLE_FILES_DIR")
dataset_path = Path(os.getcwd(), "..", root_data, 'processed')

from scripts.models.resnet import ResNetClassifier

checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet18/adam/binary-output",
    filename="model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


model = ResNetClassifier(
    num_classes=1,
    resnet_version=18,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adam",
    lr=1e-3,
    batch_size=32,
    tune_fc_only=False,
    pretrained=True
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [2]:
trainer.fit(model)

/Users/isulim/Sages/retino-cnn/.venv/lib/python3.12/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:653: Checkpoint directory /Users/isulim/Sages/retino-cnn/experiments/models/resnet18/adam/binary-output exists and is not empty.

  | Name         | Type           | Params
------------------------------------------------
0 | loss_fn      | BCELoss        | 0     
1 | accuracy     | BinaryAccuracy | 0     
2 | resnet_model | ResNet         | 11.2 M
------------------------------------------------
11.2 M    Trainable params
0         Non-trainable params
11.2 M    Total params
44.708    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.482


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.033 >= min_delta = 0.001. New best score: 0.449


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.004 >= min_delta = 0.001. New best score: 0.445


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.012 >= min_delta = 0.001. New best score: 0.433


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.014 >= min_delta = 0.001. New best score: 0.419


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.016 >= min_delta = 0.001. New best score: 0.403


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.004 >= min_delta = 0.001. New best score: 0.398


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.398. Signaling Trainer to stop.


In [3]:
trainer.test(model)

Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.4929027557373047, 'test_acc': 0.8302662968635559}]

In [1]:
import os
import pytorch_lightning as pl
from pathlib import Path

from dotenv import load_dotenv
load_dotenv()
root_data = os.getenv("KAGGLE_FILES_DIR")
dataset_path = Path(os.getcwd(), "..", root_data, 'processed')

from scripts.models.resnet import ResNetClassifier

checkpoint_callback = pl.callbacks.ModelCheckpoint(
    dirpath="../experiments/models/resnet34/adam/binary-output",
    filename="model-{epoch}-{val_loss:.3f}-{val_acc:0.3f}",
    monitor="val_loss",
    save_top_k=2,
    mode="min",
    save_last=True,
)

early_stopping = pl.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=True,
    mode='min'
)

trainer_args = {
    "accelerator": "mps",
    "max_epochs": 100,
    "callbacks": [checkpoint_callback, early_stopping],
    "precision": 32,
}
trainer = pl.Trainer(**trainer_args)


model = ResNetClassifier(
    num_classes=1,
    resnet_version=34,
    train_path=Path(dataset_path, "train"),
    val_path=Path(dataset_path, "val"),
    test_path=Path(dataset_path, "test"),
    optimizer="adam",
    lr=1e-3,
    batch_size=32,
    tune_fc_only=False,
    pretrained=True
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [2]:
trainer.fit(model)

/Users/isulim/Sages/retino-cnn/.venv/lib/python3.12/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:653: Checkpoint directory /Users/isulim/Sages/retino-cnn/experiments/models/resnet34/adam/binary-output exists and is not empty.

  | Name         | Type           | Params
------------------------------------------------
0 | loss_fn      | BCELoss        | 0     
1 | accuracy     | BinaryAccuracy | 0     
2 | resnet_model | ResNet         | 21.3 M
------------------------------------------------
21.3 M    Trainable params
0         Non-trainable params
21.3 M    Total params
85.141    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved. New best score: 0.471


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.023 >= min_delta = 0.001. New best score: 0.447


Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.010 >= min_delta = 0.001. New best score: 0.437


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric val_loss improved by 0.056 >= min_delta = 0.001. New best score: 0.381


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.381. Signaling Trainer to stop.


In [3]:
trainer.test(model)

Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.42474567890167236, 'test_acc': 0.8421736359596252}]

In [4]:
best_model = ResNetClassifier.load_from_checkpoint("../experiments/models/resnet34/adam/binary-output/model-epoch=6-val_loss=0.381-val_acc=0.849.ckpt")

In [5]:
trainer.test(best_model)

Testing: |          | 0/? [00:00<?, ?it/s]

[{'test_loss': 0.3917483687400818, 'test_acc': 0.8453128337860107}]

Best model: 84.53% accuracy, ResNet34 pretrained.

Export model to ONNX format and Python pickle.

In [7]:
import pickle
with open("../models/resnet34-model.pkl", "wb") as f:
    pickle.dump(best_model, f)

In [8]:
data_loader = best_model.test_dataloader()
sample_batch = next(iter(data_loader))[0]

In [9]:
sample_image = sample_batch[0]
model = best_model.to("mps")
sample_input = sample_image.unsqueeze(0).to("mps")

In [11]:
best_model.to_onnx("../models/resnet34-model.onnx", sample_input, export_params=True)