## Setup

In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
import shutil
import tempfile
import collections
import zipfile
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from anomalib.config import get_configurable_parameters
from anomalib.data import get_datamodule
from anomalib.data.inference import InferenceDataset
from anomalib.models import get_model
from anomalib.post_processing.post_process import compute_mask
from anomalib.utils.callbacks import LoadModelCallback, get_callbacks
from anomalib.utils.loggers import configure_logger, get_experiment_logger
from pytorch_lightning import Trainer, seed_everything
from skimage.morphology import dilation
from skimage.segmentation import find_boundaries
from torch.utils.data import DataLoader


  from .autonotebook import tqdm as notebook_tqdm
  warn(f"Failed to load image Python extension: {e}")


## Settings

In [3]:
idrid_zip_location = '/home/toon/Downloads/B. Disease Grading.zip'
data_folder = Path('../data')

patchcore_config_path = 'cfg/patchcore_config.yaml'
patchcore_output_dir = Path('../models')

## Data preparation

The IDRiD dataset consists of 3 parts (segmentation, disease grading and localization), which can be downloaded from https://ieee-dataport.org/open-access/indian-diabetic-retinopathy-image-dataset-idrid (after registration). In this notebook we will only use 'B. Disease Grading.zip'. We copy all images of healthy retina to data_folder/idrid/normal, these will be used to train the model. We also copy a small sample of images of retina with signs of diabetic retinopathy, which will be used to test the model.

In [4]:
with tempfile.TemporaryDirectory() as tmp_data_dir:
    with zipfile.ZipFile(idrid_zip_location, 'r') as idrid_zip_ref:
        idrid_zip_ref.extractall(tmp_data_dir)

        tmp_data_dir = Path(tmp_data_dir)
        gt_path = tmp_data_dir / 'B. Disease Grading' / '2. Groundtruths'
        original_img_path = tmp_data_dir / 'B. Disease Grading' / '1. Original Images'

        normal_img_dst = data_folder / 'idrid' / 'normal'
        normal_img_dst.mkdir(exist_ok=True, parents=True)

        dr_img_dst = data_folder / 'idrid' / 'dr'
        dr_img_dst.mkdir(exist_ok=True, parents=True)

        for csv_fn, dir_name in [('a. IDRiD_Disease Grading_Training Labels.csv', 'a. Training Set'), ('b. IDRiD_Disease Grading_Testing Labels.csv', 'b. Testing Set')]:
            gt_csv = pd.read_csv(gt_path / csv_fn)

            no_retinopathy = gt_csv[gt_csv['Retinopathy grade'] == 0]
            for _, no_retinopathy_img in no_retinopathy.iterrows():
                shutil.copy(original_img_path / dir_name/ (no_retinopathy_img['Image name'] + '.jpg'), normal_img_dst / f"{dir_name.strip()}_{no_retinopathy_img['Image name']}.jpg")
            
            for retinopathy_grade in range(1,5):
                cur_dst = dr_img_dst / str(retinopathy_grade)
                cur_dst.mkdir(exist_ok=True, parents=True)

                retinopathy_subset = gt_csv[gt_csv['Retinopathy grade'] == retinopathy_grade]
                for _, retinopathy_img in retinopathy_subset.iterrows(): 
                    shutil.copy(original_img_path / dir_name/ (retinopathy_img['Image name'] + '.jpg'), cur_dst / f"{dir_name.strip()}_{retinopathy_img['Image name']}.jpg")

## Model training

In [7]:
config = get_configurable_parameters(model_name='patchcore', config_path=patchcore_config_path)
if config.project.seed:
    seed_everything(config.project.seed)

datamodule = get_datamodule(config)
model = get_model(config)
experiment_logger = get_experiment_logger(config)
callbacks = get_callbacks(config)



In [8]:
trainer = Trainer(**config.trainer, logger=experiment_logger, callbacks=callbacks)
trainer.fit(model=model, datamodule=datamodule)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
  rank_zero_warn(

  | Name                  | Type                     | Params
-------------------------------------------------------------------
0 | image_threshold       | AdaptiveThreshold        | 0     
1 | pixel_threshold       | AdaptiveThreshold        | 0     
2 | model                 | Patchco

Epoch 0:  17%|█▋        | 1/6 [00:09<00:47,  9.43s/it, loss=nan]



Epoch 0:  83%|████████▎ | 5/6 [00:27<00:05,  5.58s/it, loss=nan]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


## Evaluate on some images showing signs of diabetic retinopathy

In [14]:
def get_prediction(img_path, model_config_path, model_weight_path):
    config = get_configurable_parameters(config_path=model_config_path)
    config.trainer.resume_from_checkpoint = model_weight_path
    config.visualization.show_images = False
    config.visualization.save_images = False

    model = get_model(config)
    callbacks = get_callbacks(config)

    trainer = Trainer(callbacks=callbacks, **config.trainer)
    transform_config = config.dataset.transform_config.val if "transform_config" in config.dataset.keys() else None

    dataset = InferenceDataset(img_path, image_size=tuple(config.dataset.image_size), transform_config=transform_config)
    dataloader = DataLoader(dataset)
    
    return trainer.predict(model=model, dataloaders=[dataloader])


def add_boundary(image, anomaly_map, threshold=0.5, thickness=20, color=(255, 0, 0)):
     marked = np.copy(image)
     anomaly_mask = compute_mask(anomaly_map, threshold)
     boundaries = find_boundaries(anomaly_mask)
     outlines = dilation(boundaries, np.ones((thickness, thickness)))
     marked[outlines] = color
     return marked

def add_anomaly_map(image, model_result):
    img = cv2.imread(img_path)[:,:,::-1] # BGR to RGB
    anomaly_map = np.squeeze(model_result[0]['anomaly_maps'].numpy())
    anomaly_map_original_size = cv2.resize(anomaly_map, (image.shape[1], image.shape[0]))
    return anomaly_map_original_size        

In [15]:
model_weight_path = patchcore_output_dir / 'patchcore' / 'idrid' / 'weights' / 'model.ckpt'
patchcore_results = collections.defaultdict(dict)
for grade in range(1,5):
    img_folder = data_folder / 'idrid' / 'dr' / str(grade) if grade > 0 else data_folder / 'idrid' / 'normal'
    img_paths = sorted((img_folder).glob('*.jpg'))[:4]
    for img_path in img_paths:
        patchcore_results[grade][img_path] = get_prediction(img_path, patchcore_config_path, model_weight_path)

  rank_zero_deprecation(
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..
  rank_zero_warn(


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  1.44it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:01<00:00,  1.22s/it]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:01<00:00,  1.03s/it]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:01<00:00,  1.13s/it]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  1.19it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:01<00:00,  1.05s/it]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  1.21it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  1.03it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  1.02it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:01<00:00,  1.38s/it]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  1.20it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  1.01it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  1.13it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_test_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(limit_predict_batches=1.0)` was configured so 100% of the batches will be used..
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  1.07it/s]


In [16]:
import time
for grade in range(1,5):
    grade_results = patchcore_results[grade]

    plt.figure(figsize=(10,15))
    for img_idx, img_fn in enumerate(grade_results):
        img = cv2.imread(img_fn.as_posix())[:,:,::-1] # BGR RGB conversion
      
        anomaly_map = np.squeeze(grade_results[img_fn][0]['anomaly_maps'].numpy())
        anomaly_map_original_size = cv2.resize(anomaly_map, (img.shape[1], img.shape[0]))
        segmentation_boundary = add_boundary(img, anomaly_map_original_size, threshold=0.3)

        plt.subplot(len(grade_results), 2, img_idx*2 + 1)
        plt.imshow(img)
        plt.imshow(anomaly_map_original_size, alpha=0.5)
        plt.axis('off')
        
        plt.subplot(len(grade_results), 2, img_idx*2 + 2)
        plt.imshow(segmentation_boundary)
        plt.axis('off')

    plt.subplots_adjust(wspace=0.05, hspace=0.001)
    plt.suptitle(f'PatchCore results for DR grade {grade}', y=0.9)
    plt.show()

after computing anomaly masks 2.1434154510498047
after finding boundaries 2.4010021686553955
after dilation 9.456212282180786
after computing anomaly masks 1.939211368560791
after finding boundaries 2.207524538040161
after dilation 9.42588210105896
after computing anomaly masks 1.971982479095459
after finding boundaries 2.2391531467437744
after dilation 9.69978380203247
after computing anomaly masks 2.393697500228882
after finding boundaries 2.723341464996338
after dilation 13.081697225570679


KeyboardInterrupt: 