In [1]:
import os
import sys
import pandas as pd
import torch
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks.progress import TQDMProgressBar
import tempfile
import numpy as np
import matplotlib.pyplot as plt
import logging

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dir_path = os.path.dirname(os.path.abspath(os.getcwd()))

In [3]:
sys.path.append(os.path.join(dir_path, "src"))

In [4]:
from data_module import ImageDataModule
from resnet import Resnet50

In [5]:
data_channels = {
    "image_path": os.path.join(dir_path, "images", "raw/"),
    "train_dir": os.path.join(dir_path, "data", "train/"),
    "test_dir": os.path.join(dir_path, "data", "test/"),
    "valid_dir": os.path.join(dir_path, "data", "valid/")
}

In [6]:
data_channels

{'image_path': '/home/main/cosc525_final_project/images/raw/',
 'train_dir': '/home/main/cosc525_final_project/data/train/',
 'test_dir': '/home/main/cosc525_final_project/data/test/',
 'valid_dir': '/home/main/cosc525_final_project/data/valid/'}

In [7]:
num_classes = 19
num_epochs = 10
batch_size = 32
learning_rate = 0.0001
weight_decay = 0.0001
margin = 0.1
alpha = 32
k = 3


In [8]:
data_module = ImageDataModule(**data_channels, batch_size=batch_size)

In [9]:
model = Resnet50(
    embedding_size=512,
    num_classes=num_classes,
    lr=learning_rate, 
    weight_decay=weight_decay,
    margin=margin,
    alpha=alpha,
    k=k,
    bn_freeze=False
)



In [10]:
model

Resnet50(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (

In [11]:
model.criterion.mrg

0.1

In [12]:
model.criterion.alpha

32

In [13]:
dirpath = tempfile.mkdtemp()

In [14]:
progress_cb= TQDMProgressBar(refresh_rate=20)

In [15]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=0.0001, patience=(num_epochs/2), verbose=False, mode="min")

In [16]:
trainer = Trainer(
    fast_dev_run=False,
    max_epochs=num_epochs,
    accelerator="auto",
    precision="16-mixed",
    devices=1 if torch.cuda.is_available() else None,
    callbacks=[progress_cb, early_stop_callback]
)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [17]:
torch.set_float32_matmul_precision("medium")

In [18]:
trainer.fit(model, data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type         | Params
-------------------------------------------
0 | model     | ResNet       | 26.6 M
1 | criterion | Proxy_Anchor | 9.7 K 
-------------------------------------------
26.6 M    Trainable params
0         Non-trainable params
26.6 M    Total params
106.463   Total estimated model params size (MB)
2023-05-13 12:02:18.748915: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-13 12:02:18.904988: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

Epoch 5: 100%|██████████| 244/244 [01:29<00:00,  2.72it/s, v_num=108, train_loss_step=3.310, val_loss=10.70, val_recall@3=0.256, train_loss_epoch=4.380]


In [19]:
trainer.validate(model,data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation DataLoader 0: 100%|██████████| 31/31 [00:09<00:00,  3.32it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        val_loss            10.691999435424805
      val_recall@3          0.2669404447078705
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'val_loss': 10.691999435424805, 'val_recall@3': 0.2669404447078705}]

In [20]:
model.eval()

Resnet50(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (

In [21]:
len(data_module.test_dataloader())

974

In [22]:
with torch.no_grad():
    for idx, batch in enumerate(data_module.test_dataloader()):
        if idx == 0:
            embeddings = model(batch["image"])
            print(embeddings)
            break

tensor([[-3.3167e-02, -9.5203e-03,  2.3766e-03,  5.7854e-02, -4.8207e-03,
          6.4753e-02,  6.6371e-02, -2.0104e-02, -5.4059e-03,  1.2048e-02,
         -5.0111e-02,  3.6570e-03,  1.6672e-02,  2.2422e-02,  6.5586e-02,
         -1.5019e-02, -1.6007e-02, -2.5012e-02, -2.5085e-02, -3.5543e-02,
          6.5518e-02, -6.8236e-02,  2.3967e-02, -5.1299e-03, -2.1761e-02,
         -3.6878e-02, -2.9499e-02, -1.4033e-02,  7.1215e-02, -5.6402e-02,
         -2.0701e-03, -4.1505e-02, -5.8771e-02, -6.8263e-04, -5.7690e-03,
          1.8560e-02,  1.5932e-02, -7.1682e-02,  7.6275e-02,  2.9989e-02,
         -3.0244e-03, -4.6602e-02,  7.7938e-02,  3.7640e-02, -6.4677e-02,
          7.7180e-02,  4.2991e-03, -3.6744e-02,  4.4626e-02,  1.6101e-02,
          7.9797e-02,  4.6904e-02, -7.3361e-03,  3.6558e-02, -1.4768e-02,
          7.0872e-03, -5.3857e-03, -2.0959e-02, -5.4978e-02, -5.3016e-03,
          9.3884e-02, -8.7184e-03, -5.1840e-02,  3.6736e-03,  6.3440e-02,
          3.7912e-02, -2.5796e-02,  2.