In [1]:
import os
import sys
import pandas as pd
import torch
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks.progress import TQDMProgressBar
import tempfile
import numpy as np
import matplotlib.pyplot as plt
import logging

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dir_path = os.path.dirname(os.path.abspath(os.getcwd()))

In [3]:
sys.path.append(os.path.join(dir_path, "src"))

In [4]:
from data_module import ImageDataModule
from resnet import Resnet50
from utils import evaluate_cos

In [5]:
data_channels = {
    "image_path": os.path.join(dir_path, "images", "raw/"),
    "train_dir": os.path.join(dir_path, "data", "train/"),
    "test_dir": os.path.join(dir_path, "data", "test/"),
    "valid_dir": os.path.join(dir_path, "data", "valid/")
}

In [6]:
data_channels

{'image_path': '/home/main/cosc525_final_project/images/raw/',
 'train_dir': '/home/main/cosc525_final_project/data/train/',
 'test_dir': '/home/main/cosc525_final_project/data/test/',
 'valid_dir': '/home/main/cosc525_final_project/data/valid/'}

In [7]:
num_classes = 19
num_epochs = 25
batch_size = 32
learning_rate = 0.0001

In [8]:
data_module = ImageDataModule(**data_channels, batch_size=batch_size)

In [9]:
model = Resnet50(embedding_size=512, num_classes=num_classes, lr=learning_rate, bn_freeze=False)



In [10]:
model

Resnet50(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (

In [11]:
dirpath = tempfile.mkdtemp()

In [12]:
progress_cb= TQDMProgressBar(refresh_rate=20)

In [13]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=0.001, patience=7, verbose=False, mode="min")

In [14]:
trainer = Trainer(
    fast_dev_run=False,
    max_epochs=num_epochs,
    accelerator="auto",
    precision="16-mixed",
    devices=1 if torch.cuda.is_available() else None,
    callbacks=[progress_cb, early_stop_callback]
)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [15]:
torch.set_float32_matmul_precision("medium")

In [16]:
trainer.fit(model, data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type         | Params
-------------------------------------------
0 | model     | ResNet       | 26.6 M
1 | criterion | Proxy_Anchor | 9.7 K 
-------------------------------------------
26.6 M    Trainable params
0         Non-trainable params
26.6 M    Total params
106.463   Total estimated model params size (MB)
2023-04-11 16:46:19.708046: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-11 16:46:19.869197: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

Epoch 7: 100%|██████████| 244/244 [01:05<00:00,  3.75it/s, v_num=39, train_loss_step=1.930, val_recall@1=19.00, train_loss_epoch=3.310]


In [17]:
trainer.validate(model,data_module)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation DataLoader 0: 100%|██████████| 31/31 [00:03<00:00,  9.31it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     Validate metric           DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        val_loss             13.36413288116455
      val_recall@1                 19.0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'val_loss': 13.36413288116455, 'val_recall@1': 19.0}]

In [18]:
model.eval()

Resnet50(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (

In [19]:
len(data_module.test_dataloader())

974

In [20]:
with torch.no_grad():
    for idx, batch in enumerate(data_module.test_dataloader()):
        if idx == 0:
            embeddings = model(batch["image"])
            print(embeddings)

tensor([[-8.1233e-02, -4.5965e-02, -2.7784e-02, -3.8943e-02,  1.0456e-02,
          3.2268e-02,  9.0221e-03, -3.0279e-02,  3.5316e-03,  4.0999e-02,
          6.6741e-02, -1.7034e-02,  2.8618e-02, -6.5048e-02,  4.2315e-03,
         -6.5483e-02, -6.9958e-02, -2.3881e-02,  1.5663e-02, -3.6068e-02,
         -3.5423e-03,  1.4557e-02,  8.0127e-02,  3.7797e-03,  5.7838e-02,
          4.3639e-02, -5.9120e-02, -9.8586e-03, -1.0542e-01, -3.2927e-02,
         -3.4020e-02,  2.6795e-02,  2.5762e-02,  3.3061e-02,  3.2211e-02,
         -2.0721e-02, -2.9884e-02, -1.9569e-02, -1.6217e-02,  3.5991e-03,
         -1.2331e-02,  8.6835e-02, -1.8339e-02, -2.2576e-02,  1.5994e-03,
         -7.4349e-02,  7.8263e-02, -1.5512e-02, -4.8689e-02, -5.8430e-03,
         -2.3817e-03,  7.3729e-03,  7.4372e-02, -5.6063e-03, -7.4735e-02,
         -5.9212e-02, -5.6503e-02,  3.1147e-02, -5.1967e-03,  2.8714e-02,
         -3.5339e-02, -1.7844e-02, -9.0230e-03,  5.5152e-02,  2.6785e-02,
         -7.9803e-03, -4.6617e-02, -7.

: 