In [1]:
!nvidia-smi

Fri Nov 17 22:40:39 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 546.17                 Driver Version: 546.17       CUDA Version: 12.3     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                     TCC/WDDM  | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce GTX 1660 Ti   WDDM  | 00000000:01:00.0  On |                  N/A |
| N/A   42C    P8               6W /  80W |    407MiB /  6144MiB |      4%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
import torch
import cv2

In [3]:

DEVICE = 'cuda' if torch.cuda.is_available() else "cpu"
MODEL_ARCH = 'yolo_nas_s'

In [4]:
DEVICE

'cuda'

In [5]:
from super_gradients.training import models
from super_gradients.training import Trainer
from super_gradients.training import dataloaders
from super_gradients.training.dataloaders.dataloaders import (
    coco_detection_yolo_format_train,
    coco_detection_yolo_format_val
)

from super_gradients.training.losses import PPYoloELoss
from super_gradients.training.metrics import (
    DetectionMetrics_050_095,
    DetectionMetrics_050
)

from super_gradients.training.models.detection_models.pp_yolo_e import PPYoloEPostPredictionCallback
from tqdm.auto import tqdm

import os
import requests
import zipfile
import cv2
import matplotlib.pyplot as plt
import numpy as np
import random
import glob

In [15]:
ROOT_DIR = ''
train_imgs_dir = 'data/annotated data/train-images'
train_labels_dir = 'data/annotated data/train-annotations'
val_imgs_dir = 'data/annotated data/val-images'
val_labels_dir = 'data/annotated data/val-annotations'
test_imgs_dir = 'data/annotated data/test-images'
test_labels_dir = 'data/annotated data/test-annotations'
classes = ['protrusion']

dataset_params = {
    'data_dir':ROOT_DIR,
    'train_images_dir':train_imgs_dir,
    'train_labels_dir':train_labels_dir,
    'val_images_dir':val_imgs_dir,
    'val_labels_dir':val_labels_dir,
    'test_images_dir':test_imgs_dir,
    'test_labels_dir':test_labels_dir,
    'classes':classes
}

In [7]:

# Global parameters.
EPOCHS = 50
BATCH_SIZE = 16
WORKERS = 8

In [17]:
train_data = coco_detection_yolo_format_train(
    dataset_params={
        'data_dir': dataset_params['data_dir'],
        'images_dir': dataset_params['train_images_dir'],
        'labels_dir': dataset_params['train_labels_dir'],
        'classes': dataset_params['classes']
    },
    dataloader_params={
        'batch_size':BATCH_SIZE,
        'num_workers':WORKERS
    }
)

val_data = coco_detection_yolo_format_val(
    dataset_params={
        'data_dir': dataset_params['data_dir'],
        'images_dir': dataset_params['val_images_dir'],
        'labels_dir': dataset_params['val_labels_dir'],
        'classes': dataset_params['classes']
    },
    dataloader_params={
        'batch_size':BATCH_SIZE,
        'num_workers':WORKERS
    }
)

test_data = coco_detection_yolo_format_val(
    dataset_params={
        'data_dir': dataset_params['data_dir'],
        'images_dir': dataset_params['test_images_dir'],
        'labels_dir': dataset_params['test_labels_dir'],
        'classes': dataset_params['classes']
    },
    dataloader_params={
        'batch_size':BATCH_SIZE,
        'num_workers':WORKERS
    }
)

Indexing dataset annotations: 100%|██████████| 79/79 [00:00<00:00, 2456.37it/s]
Indexing dataset annotations: 100%|██████████| 5/5 [00:00<00:00, 625.18it/s]


In [9]:
'''
try later
train_data.dataset.transforms

'''

'\ntry later\ntrain_data.dataset.transforms\n\n'

In [10]:
train_params = {
    'silent_mode': False,
    "average_best_models":True,
    "warmup_mode": "linear_epoch_step",
    "warmup_initial_lr": 1e-6,
    "lr_warmup_epochs": 3,
    "initial_lr": 5e-4,
    "lr_mode": "cosine",
    "cosine_final_lr_ratio": 0.1,
    "optimizer": "Adam",
    "optimizer_params": {"weight_decay": 0.0001},
    "zero_weight_decay_on_bias_and_bn": True,
    "ema": True,
    "ema_params": {"decay": 0.9, "decay_type": "threshold"},
    "max_epochs": EPOCHS,
    "mixed_precision": True,
    "loss": PPYoloELoss(
        use_static_assigner=False,
        num_classes=len(dataset_params['classes']),
        reg_max=16
    ),
    "valid_metrics_list": [
        DetectionMetrics_050(
            score_thres=0.1,
            top_k_predictions=300,
            num_cls=len(dataset_params['classes']),
            normalize_targets=True,
            post_prediction_callback=PPYoloEPostPredictionCallback(
                score_threshold=0.01,
                nms_top_k=1000,
                max_predictions=300,
                nms_threshold=0.7
            )
        ),
        DetectionMetrics_050_095(
            score_thres=0.1,
            top_k_predictions=300,
            num_cls=len(dataset_params['classes']),
            normalize_targets=True,
            post_prediction_callback=PPYoloEPostPredictionCallback(
                score_threshold=0.01,
                nms_top_k=1000,
                max_predictions=300,
                nms_threshold=0.7
            )
        )
    ],
    "metric_to_watch": 'mAP@0.50:0.95'
}

In [11]:
models_to_train = [
    'yolo_nas_s',
#    'yolo_nas_m',
#    'yolo_nas_l'
]

CHECKPOINT_DIR = 'checkpoints'

for model_to_train in models_to_train:
    trainer = Trainer(
        experiment_name=model_to_train,
        ckpt_root_dir=CHECKPOINT_DIR
    )

    model = models.get(
        model_to_train,
        num_classes=len(dataset_params['classes']),
        pretrained_weights="coco"
    )

    trainer.train(
        model=model,
        training_params=train_params,
        train_loader=train_data,
        valid_loader=val_data
    )



Train epoch 0: 100%|██████████| 4/4 [00:34<00:00,  8.68s/it, PPYoloELoss/loss=nan, PPYoloELoss/loss_cls=nan, PPYoloELoss/loss_dfl=nan, PPYoloELoss/loss_iou=nan, gpu_mem=5.83]
Validating: 100%|██████████| 1/1 [00:11<00:00, 11.24s/it]
Train epoch 1: 100%|██████████| 4/4 [00:29<00:00,  7.39s/it, PPYoloELoss/loss=nan, PPYoloELoss/loss_cls=nan, PPYoloELoss/loss_dfl=nan, PPYoloELoss/loss_iou=nan, gpu_mem=5.86]
Validating epoch 1: 100%|██████████| 1/1 [00:11<00:00, 11.25s/it]
Train epoch 2: 100%|██████████| 4/4 [00:28<00:00,  7.25s/it, PPYoloELoss/loss=nan, PPYoloELoss/loss_cls=nan, PPYoloELoss/loss_dfl=nan, PPYoloELoss/loss_iou=nan, gpu_mem=5.79]
Validating epoch 2: 100%|██████████| 1/1 [00:11<00:00, 11.88s/it]
Train epoch 3: 100%|██████████| 4/4 [00:31<00:00,  7.81s/it, PPYoloELoss/loss=nan, PPYoloELoss/loss_cls=nan, PPYoloELoss/loss_dfl=nan, PPYoloELoss/loss_iou=nan, gpu_mem=5.83]
Validating epoch 3: 100%|██████████| 1/1 [00:11<00:00, 11.23s/it]
Train epoch 4: 100%|██████████| 4/4 [00:29<0

In [18]:
best_model = models.get('yolo_nas_s',
                        num_classes=len(dataset_params['classes']),
                        checkpoint_path='./checkpoints/yolo_nas_s/RUN_20231117_224048_830684/ckpt_best.pth'
                        )

In [19]:
trainer.test(
    model=best_model,
    test_loader=test_data,
    test_metrics_list=DetectionMetrics_050(
        score_thres=0.1,
        top_k_predictions=300,
        num_cls=len(dataset_params['classes']),
        normalize_targets=True,
        post_prediction_callback=PPYoloEPostPredictionCallback(
            score_threshold=0.01,
            nms_top_k=1000,
            max_predictions=300,
            nms_threshold=0.7
        )
    )
)

Testing: 100%|██████████| 1/1 [00:36<00:00, 36.24s/it]

{'PPYoloELoss/loss_cls': nan,
 'PPYoloELoss/loss_iou': nan,
 'PPYoloELoss/loss_dfl': nan,
 'PPYoloELoss/loss': nan,
 'Precision@0.50': 0.0,
 'Recall@0.50': 0.0,
 'mAP@0.50': 0.0,
 'F1@0.50': 0.0}