## Notebook 6: fancy model with performance metrics ##

In [1]:
# Imports
import os
from pathlib import Path
import numpy as np
import pandas as pd
import cv2
import glob

# Matplotlib for plotting
from matplotlib import pyplot as plt
from matplotlib.pyplot import cm

# PyTorch packages
import torch
import torch.nn as nn
import lightning.pytorch as pl
from lightning.pytorch import Trainer, seed_everything
from lightning.pytorch.callbacks import ModelCheckpoint
import torchmetrics

# Albumentations library
import albumentations as alb

# Appearance of the Notebook
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
np.set_printoptions(linewidth=110)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
import dentexmodel as dm
from dentexmodel.fileutils import FileOP
from dentexmodel.imageproc import ImageData
from dentexmodel.torchdataset import DatasetFromDF, load_and_process_image
print(f'dentexmodel package version:  {dm.__version__}')

dentexmodel package version:  0.0.post1.dev36+gb44e647.d20240101


In [2]:
# Path settings 
dentex_dir = os.path.join(os.environ['HOME'], 'data', 'dentex')
data_dir = os.path.join(dentex_dir, 'dentex_disease')
image_dir = os.path.join(data_dir, 'quadrant-enumeration-disease', 'xrays', 'crop')
data_file_name = 'dentex_disease_datasplit.parquet'
data_file = os.path.join(dentex_dir, data_file_name)

In [3]:
# %% Package and GPU checks
print(f'PyTorch version:              {torch.__version__}')
print(f'PyTorch Lightning version:    {pl.__version__}')
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(torch.cuda.is_available())
print(f'Number of GPUs found:  {torch.cuda.device_count()}')
print(f'Current device ID:     {torch.cuda.current_device()}')
print(f'GPU device name:       {torch.cuda.get_device_name(0)}')
print(f'CUDNN version:         {torch.backends.cudnn.version()}')
torch.set_float32_matmul_precision(precision='high')

PyTorch version:              2.1.2+cu121
PyTorch Lightning version:    2.1.3
True
Number of GPUs found:  1
Current device ID:     0
GPU device name:       NVIDIA GeForce RTX 3060 Laptop GPU
CUDNN version:         8902


### Create PyTorch datasets from data frame ###

In [4]:
data_df = pd.read_parquet(data_file)
# Convert class names to labels
cl_names = sorted(list(data_df['label'].unique()))
# Let's assign number to the classes
label_dict = dict(zip(cl_names, range(len(cl_names))))
cl_dict = dict(zip(label_dict.values(), label_dict.keys()))

# Add the class labels to the data frame
seed = np.random.seed(123)
data_df = data_df.\
                assign(cl=data_df['label'].apply(lambda l: label_dict.get(l))).\
                sample(frac=1, random_state=seed).\
                reset_index(drop=True)

# Show the class labels
display(pd.DataFrame(label_dict, index=[0]).iloc[0])

# Save the data frame with the class labels
data_file_name_cl = 'dentex_disease_datasplit_cl.parquet'
data_file = os.path.join(dentex_dir, data_file_name_cl)
data_df.to_parquet(data_file)

Caries               0
Deep Caries          1
Impacted             2
Periapical Lesion    3
Name: 0, dtype: int64

In [5]:
# Select the samples for training, validation and testing from our data frame
train_df = data_df.loc[data_df['dataset']=='train']
val_df = data_df.loc[data_df['dataset']=='val']
test_df = data_df.loc[data_df['dataset']=='test']

train_samples = sorted(list(train_df['box_name'].unique()))
print(f'Found {len(train_samples)} samples in the training set.')
val_samples = sorted(list(val_df['box_name'].unique()))
print(f'Found {len(val_samples)} samples in the validation set.')
test_samples = sorted(list(test_df['box_name'].unique()))
print(f'Found {len(test_samples)} samples in the test set.')
print()

Found 3349 samples in the training set.
Found 60 samples in the validation set.
Found 120 samples in the test set.



In [6]:
# Augmentations
# Image augmentations is part of the PyTorch dataset

# The output of this transformation must match the required input size for the model
max_image_size = 550
im_size = 224

# Definition of the image augmentations for the training set
train_transform = alb.Compose([
    alb.Resize(im_size + 32, im_size + 32),
    alb.RandomCrop(im_size, im_size),
    alb.HorizontalFlip(),
    alb.ShiftScaleRotate(),
    alb.Blur(),
    alb.RandomGamma(),
    alb.Sharpen(),
    alb.GaussNoise(),
    alb.CoarseDropout(16, 32, 32),
    alb.CLAHE(),
    alb.Normalize(mean=ImageData().image_net_mean, 
                  std=ImageData().image_net_std)])

# Vor validation and testing, we do not want any augmentations
# but we will still need the correct input size and image normalization
val_transform = alb.Compose([
    alb.Resize(im_size, im_size),
    alb.Normalize(mean=ImageData().image_net_mean, 
                  std=ImageData().image_net_std)])

In [7]:
# Create the data sets from the data frame
train_dataset = DatasetFromDF(data=train_df,
                              file_col='box_file',
                              label_col='cl',
                              max_image_size=max_image_size,
                              transform=train_transform,
                              validate=True)

val_dataset = DatasetFromDF(data=val_df,
                            file_col='box_file',
                            label_col='cl',
                            max_image_size=max_image_size,
                            transform=val_transform,
                            validate=True)

test_dataset = DatasetFromDF(data=test_df,
                             file_col='box_file',
                             label_col='cl',
                             max_image_size=max_image_size,
                             transform=val_transform,
                             validate=True)

### Load model from checkpoint ###

In [12]:
# We try the metrics with a trained model
from dentexmodel.models.toothmodel_fancy import ToothModel
link = 'https://dsets.s3.amazonaws.com/dentex/toothmodel1_50.ckpt'
checkpoint_file = FileOP().download_from_url(url=link, download_dir=dentex_dir)
model = ToothModel.load_from_checkpoint(checkpoint_file, map_location=device,
                                        val_dataset=val_dataset,
                                        test_dataset=test_dataset,
                                        batch_size = 16,
                                        num_classes = 4,
                                        num_workers= 4)

# Load a test batch
dl = model.test_dataloader()
test_image_batch, test_label_batch = next(iter(dl))
display(test_image_batch.shape)
display(test_label_batch.shape)

# Forward - pass on the test batch
pred = model(test_image_batch.cuda())

File extension is unexpected .ckpt.


File: .ckpt loaded.


torch.Size([16, 3, 224, 224])

torch.Size([16])

### Train the fancy model with tensorboard logging ###

In [13]:
# Create the trainer object and train the model for a few epochs
from lightning.pytorch.loggers import TensorBoardLogger

model_name = 'fancy'
model_version = 1
max_epochs = 6
check_val_every_n_epoch = 2
checkpoint_every_n_epoch = 2

log_dir = os.path.join(dentex_dir, 'log')
Path(log_dir).mkdir(parents=True, exist_ok=True)
checkpoint_dir = os.path.join(log_dir, model_name, f'version_{model_version}', 'checkpoints')

logger = TensorBoardLogger(save_dir=log_dir,
                           name=model_name,
                           version=123)

checkpoint_callback = ModelCheckpoint(
    dirpath=checkpoint_dir,
    filename='{epoch}',
    save_last=True,
    every_n_epochs=checkpoint_every_n_epoch,
    save_on_train_epoch_end=True,
    save_top_k=-1)

seed_everything(234, workers=True)
tr = Trainer(max_epochs=max_epochs,
             deterministic=True,
             accelerator='gpu',
             default_root_dir=log_dir,
             callbacks=[checkpoint_callback],
             logger=logger,
             check_val_every_n_epoch=check_val_every_n_epoch)

Seed set to 234
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [15]:
# Run the training
tr.fit(model)

2024-01-12 12:22:39.957970: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-12 12:22:39.978694: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-12 12:22:39.978713: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-12 12:22:39.979258: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-12 12:22:39.983386: I tensorflow/core/platform/cpu_feature_guar

Sanity Checking: |                                                                                            …



Training: |                                                                                                   …



Validation: |                                                                                                 …

Validation: |                                                                                                 …



Validation: |                                                                                                 …

`Trainer.fit` stopped: `max_epochs=6` reached.
