In [1]:
# import basic dependencies 
import pandas as pd
import numpy as np
import os
import shutil
from tqdm import tqdm
from PIL.Image import open as pil_open
import matplotlib.pyplot as plt
import random
import json

# import dependencies for evaluation
from transformers import AutoFeatureExtractor, ViTForImageClassification, Trainer, TrainingArguments
from datasets import load_dataset, load_metric, ClassLabel, Features, Image, Value
from sklearn.metrics import confusion_matrix
import numpy as np
import torch

In [2]:
tqdm.pandas()

In [3]:
master_df = pd.read_csv("data/labels.csv")
master_df["path"] = "data/images/" + master_df["image"] + ".jpeg"
master_df["exist"] = master_df["path"].progress_apply(os.path.exists)
master_df["level"] = master_df["level"].progress_apply(lambda x: x if x == 0 else 1)
master_df = master_df[master_df["exist"] == True]
master_df = master_df[["image", "path", "level"]]
master_df.head(10)

100%|█████████████████████████████████████████████████████████████████████████| 35126/35126 [00:02<00:00, 15118.87it/s]
100%|███████████████████████████████████████████████████████████████████████| 35126/35126 [00:00<00:00, 1112656.88it/s]


Unnamed: 0,image,path,level
0,10_left,data/images/10_left.jpeg,0
1,10_right,data/images/10_right.jpeg,0
2,13_left,data/images/13_left.jpeg,0
3,13_right,data/images/13_right.jpeg,0
4,15_left,data/images/15_left.jpeg,1
5,15_right,data/images/15_right.jpeg,1
6,16_left,data/images/16_left.jpeg,1
7,16_right,data/images/16_right.jpeg,1
8,17_left,data/images/17_left.jpeg,0
9,17_right,data/images/17_right.jpeg,1


In [4]:
train_df = pd.read_csv("data/train_labels.csv")
train_df.head()

Unnamed: 0,image,path,level,updated_path
0,4593_right,data/images/4593_right.jpeg,0,data\preprocessed\train\no_dr\4593_right.jpeg
1,18542_right,data/images/18542_right.jpeg,0,data\preprocessed\train\no_dr\18542_right.jpeg
2,16973_right,data/images/16973_right.jpeg,0,data\preprocessed\train\no_dr\16973_right.jpeg
3,33928_right,data/images/33928_right.jpeg,0,data\preprocessed\train\no_dr\33928_right.jpeg
4,36713_right,data/images/36713_right.jpeg,0,data\preprocessed\train\no_dr\36713_right.jpeg


In [5]:
test_df = pd.read_csv("data/test_labels.csv")
test_df.head()

Unnamed: 0,image,path,level,updated_path
0,16076_right,data/images/16076_right.jpeg,0,data\preprocessed\test\no_dr\16076_right.jpeg
1,5680_right,data/images/5680_right.jpeg,0,data\preprocessed\test\no_dr\5680_right.jpeg
2,31209_right,data/images/31209_right.jpeg,0,data\preprocessed\test\no_dr\31209_right.jpeg
3,44088_left,data/images/44088_left.jpeg,0,data\preprocessed\test\no_dr\44088_left.jpeg
4,41035_left,data/images/41035_left.jpeg,0,data\preprocessed\test\no_dr\41035_left.jpeg


In [6]:
train_df_images = list(train_df["image"].values.tolist())
test_df_images = list(test_df["image"].values.tolist())
train_test_images = train_df_images + test_df_images

In [7]:
test_df["level"].value_counts()

0    558
1    558
Name: level, dtype: int64

In [8]:
master_df = master_df[~master_df["image"].isin(train_test_images)]
print(master_df["level"].value_counts())
master_df.head()

0    21158
1     7636
Name: level, dtype: int64


Unnamed: 0,image,path,level
0,10_left,data/images/10_left.jpeg,0
1,10_right,data/images/10_right.jpeg,0
2,13_left,data/images/13_left.jpeg,0
3,13_right,data/images/13_right.jpeg,0
5,15_right,data/images/15_right.jpeg,1


In [9]:
# Calculate the desired number of samples for each level
level_counts = master_df["level"].value_counts()
desired_samples = 560

# Initialize an empty list to store the selected samples
selected_samples = []

# Select the desired number of samples for each level
for level in level_counts.index:
    level_df = master_df[master_df["level"] == level]
    selected_samples.append(level_df.sample(desired_samples, random_state=0))

In [10]:
eval_df = pd.concat(selected_samples)
print(eval_df["level"].value_counts())
eval_df.head()

0    560
1    560
Name: level, dtype: int64


Unnamed: 0,image,path,level
32186,40650_left,data/images/40650_left.jpeg,0
29214,36986_left,data/images/36986_left.jpeg,0
9694,12210_left,data/images/12210_left.jpeg,0
15184,19014_left,data/images/19014_left.jpeg,0
32411,40913_right,data/images/40913_right.jpeg,0


In [11]:
dr_dict = {0: "no_dr", 1: "with_dr"}

In [12]:
# Iterate over the rows of the dataframe
eval_new_path = list()
for index, row in tqdm(eval_df.iterrows(), total=eval_df.shape[0]):
    level = row["level"]
    file_path = row["path"]
    
    # Create a folder for the level if it doesn't exist
    level_folder = "data\\preprocessed\\eval\\" + str(dr_dict[level])
    if not os.path.exists(level_folder):
        os.makedirs(level_folder)
    
    # Save the file to the level folder
    destination_path = os.path.join(level_folder, os.path.basename(file_path))
    shutil.copy(file_path, destination_path)
    
    # Open the image
    image = pil_open(destination_path)
    
    # Crop the image to hxh, centered on the middle of the image
    w, h = image.size
    left = (w - h) / 2
    top = 0
    right = (w + h) / 2
    bottom = h
    image = image.crop((left, top, right, bottom))
    
    # Resize the image from hxh to 512x512
    new_dim = 512
    image = image.resize((new_dim, new_dim))
    
    # Save the cropped image back to the destination path
    image.save(destination_path)
    eval_new_path.append(destination_path)

100%|██████████████████████████████████████████████████████████████████████████████| 1120/1120 [00:18<00:00, 59.80it/s]


In [13]:
eval_df["updated_path"] = eval_new_path
# eval_df.to_csv("data/preprocessed/eval_labels.csv", index=False)

In [14]:
torch.cuda.set_device(0)
torch.cuda.current_device()

0

In [15]:
features = Features(
    {
        "label": ClassLabel(
            num_classes=2,
            names=["no_dr", "with_dr"],
        ),
        "image": Image()
    }
)

In [16]:
evaluation = "C:\\Users\\Admin\\Documents\\Python Scripts\\diabetic_retinopathy\\data\\preprocessed\\eval\\**" 
dataset_eval = load_dataset(path="data/preprocessed", data_files={evaluation}, features=features)
labels = dataset_eval["train"].features["label"].names

Resolving data files:   0%|          | 0/1120 [00:00<?, ?it/s]

Using custom data configuration preprocessed-fe7bea385f0ce62c


Downloading and preparing dataset imagefolder/preprocessed to C:/Users/Admin/.cache/huggingface/datasets/imagefolder/preprocessed-fe7bea385f0ce62c/0.0.0/37fbb85cc714a338bea574ac6c7d0b5be5aff46c1862c1989b20e0771199e93f...


Downloading data files:   0%|          | 0/1120 [00:00<?, ?it/s]

Downloading data files: 0it [00:00, ?it/s]

Extracting data files: 0it [00:00, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset imagefolder downloaded and prepared to C:/Users/Admin/.cache/huggingface/datasets/imagefolder/preprocessed-fe7bea385f0ce62c/0.0.0/37fbb85cc714a338bea574ac6c7d0b5be5aff46c1862c1989b20e0771199e93f. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

In [17]:
dataset_eval

DatasetDict({
    train: Dataset({
        features: ['label', 'image'],
        num_rows: 1120
    })
})

In [18]:
def transform(example_batch):
    # Take a list of PIL images and turn them to pixel values
    inputs = feature_extractor([x.convert("RGB").resize((224,224)) for x in example_batch["image"]], return_tensors="pt")

    # Don't forget to include the labels!
    inputs["labels"] = example_batch["label"]
    return inputs

In [19]:
prepared_ds_eval = dataset_eval.with_transform(transform)

In [20]:
def collate_fn(batch):
    return {
        "pixel_values": torch.stack([x["pixel_values"] for x in batch]),
        "labels": torch.tensor([x["labels"] for x in batch])
    }

In [28]:
metric = load_metric("accuracy")
def compute_metrics(p):
    return {
        "accuracy": metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids),
        "confusion_matrix": confusion_matrix(p.label_ids, np.argmax(p.predictions, axis=1))
    }

In [22]:
model_names = [name for name in os.listdir(".") if os.path.isdir(name) and "vit-dr" in name]
model_names

['vit-dr-base-lr_1e-05-cos-w-res-wd_0.01',
 'vit-dr-base-lr_3e-05-cos-w-res-wd_0.01',
 'vit-dr-sd-lr_1e-05-cos-w-res-wd_0.01',
 'vit-dr-sd-lr_3e-05-cos-w-res-wd_0.01']

In [23]:
for model_name in model_names:
    path = model_name+"/trainer_state.json"
    epochs = list()

    with open(path) as file:
        #readlines() reads the file as string and loads() loads it into a dict
        obj = json.loads(''.join(file.readlines()))["log_history"]
        for epoch in obj:
            if "eval_accuracy" in epoch:
                epochs.append(epoch)
    epoch_df = pd.DataFrame(epochs)
    epoch_df["model"] = model_name
    epoch_df = epoch_df[["model", "step", "eval_loss", "eval_accuracy"]]
    display(epoch_df)

Unnamed: 0,model,step,eval_loss,eval_accuracy
0,vit-dr-base-lr_1e-05-cos-w-res-wd_0.01,100,0.821503,0.516895
1,vit-dr-base-lr_1e-05-cos-w-res-wd_0.01,200,0.801081,0.571689
2,vit-dr-base-lr_1e-05-cos-w-res-wd_0.01,300,0.792707,0.60274
3,vit-dr-base-lr_1e-05-cos-w-res-wd_0.01,400,0.750175,0.63653
4,vit-dr-base-lr_1e-05-cos-w-res-wd_0.01,500,0.775402,0.625571
5,vit-dr-base-lr_1e-05-cos-w-res-wd_0.01,600,0.728327,0.648402
6,vit-dr-base-lr_1e-05-cos-w-res-wd_0.01,700,0.716609,0.654795
7,vit-dr-base-lr_1e-05-cos-w-res-wd_0.01,800,0.73952,0.654795
8,vit-dr-base-lr_1e-05-cos-w-res-wd_0.01,900,0.74382,0.649315
9,vit-dr-base-lr_1e-05-cos-w-res-wd_0.01,1000,0.722425,0.657534


Unnamed: 0,model,step,eval_loss,eval_accuracy
0,vit-dr-base-lr_3e-05-cos-w-res-wd_0.01,100,0.995195,0.520548
1,vit-dr-base-lr_3e-05-cos-w-res-wd_0.01,200,0.796279,0.608219
2,vit-dr-base-lr_3e-05-cos-w-res-wd_0.01,300,0.711154,0.63379
3,vit-dr-base-lr_3e-05-cos-w-res-wd_0.01,400,0.68345,0.659361
4,vit-dr-base-lr_3e-05-cos-w-res-wd_0.01,500,0.718489,0.657534
5,vit-dr-base-lr_3e-05-cos-w-res-wd_0.01,600,0.757318,0.67032
6,vit-dr-base-lr_3e-05-cos-w-res-wd_0.01,700,0.713632,0.692237
7,vit-dr-base-lr_3e-05-cos-w-res-wd_0.01,800,0.769052,0.688584
8,vit-dr-base-lr_3e-05-cos-w-res-wd_0.01,900,0.75204,0.684932
9,vit-dr-base-lr_3e-05-cos-w-res-wd_0.01,1000,0.749811,0.693151


Unnamed: 0,model,step,eval_loss,eval_accuracy
0,vit-dr-sd-lr_1e-05-cos-w-res-wd_0.01,100,1.048261,0.527854
1,vit-dr-sd-lr_1e-05-cos-w-res-wd_0.01,200,0.775979,0.595434
2,vit-dr-sd-lr_1e-05-cos-w-res-wd_0.01,300,0.78558,0.589041
3,vit-dr-sd-lr_1e-05-cos-w-res-wd_0.01,400,0.766931,0.606393
4,vit-dr-sd-lr_1e-05-cos-w-res-wd_0.01,500,0.726594,0.63653
5,vit-dr-sd-lr_1e-05-cos-w-res-wd_0.01,600,0.719086,0.652968
6,vit-dr-sd-lr_1e-05-cos-w-res-wd_0.01,700,0.709554,0.652055
7,vit-dr-sd-lr_1e-05-cos-w-res-wd_0.01,800,0.759712,0.649315
8,vit-dr-sd-lr_1e-05-cos-w-res-wd_0.01,900,0.74189,0.643836
9,vit-dr-sd-lr_1e-05-cos-w-res-wd_0.01,1000,0.727547,0.650228


Unnamed: 0,model,step,eval_loss,eval_accuracy
0,vit-dr-sd-lr_3e-05-cos-w-res-wd_0.01,100,1.099175,0.540639
1,vit-dr-sd-lr_3e-05-cos-w-res-wd_0.01,200,0.773464,0.59726
2,vit-dr-sd-lr_3e-05-cos-w-res-wd_0.01,300,0.785473,0.6
3,vit-dr-sd-lr_3e-05-cos-w-res-wd_0.01,400,0.733104,0.627397
4,vit-dr-sd-lr_3e-05-cos-w-res-wd_0.01,500,0.687079,0.663014
5,vit-dr-sd-lr_3e-05-cos-w-res-wd_0.01,600,0.784282,0.647489
6,vit-dr-sd-lr_3e-05-cos-w-res-wd_0.01,700,0.676528,0.66758
7,vit-dr-sd-lr_3e-05-cos-w-res-wd_0.01,800,0.712234,0.677626
8,vit-dr-sd-lr_3e-05-cos-w-res-wd_0.01,900,0.776739,0.67032
9,vit-dr-sd-lr_3e-05-cos-w-res-wd_0.01,1000,0.724376,0.678539


In [29]:
eval_metrics = list()
for model_name_or_path in model_names:
    feature_extractor = AutoFeatureExtractor.from_pretrained(model_name_or_path)
    model = ViTForImageClassification.from_pretrained(
        model_name_or_path,
        num_labels=len(labels),
        id2label={str(i): c for i, c in enumerate(labels)},
        label2id={c: str(i) for i, c in enumerate(labels)},
        ignore_mismatched_sizes=True
    )

    training_args = TrainingArguments(
        output_dir=f"output",
        per_device_train_batch_size=32,
        per_device_eval_batch_size=32,
        evaluation_strategy="steps",
        max_steps=1000,
        save_steps=100,
        eval_steps=100,
        save_total_limit=10,
        remove_unused_columns=False,
        push_to_hub=False,
        load_best_model_at_end=True,
        logging_steps=5,
        report_to="none",
        lr_scheduler_type="cosine_with_restarts",
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        data_collator=collate_fn,
        compute_metrics=compute_metrics,
        train_dataset=prepared_ds_eval["train"],
        eval_dataset=prepared_ds_eval["train"],
        tokenizer=feature_extractor,
    )
    
    metrics = trainer.evaluate(prepared_ds_eval['train'])
    eval_metrics.append(metrics)

max_steps is given, it will override any value given in num_train_epochs
***** Running Evaluation *****
  Num examples = 1120
  Batch size = 32


loading configuration file vit-dr-base-lr_3e-05-cos-w-res-wd_0.01\preprocessor_config.json
Feature extractor ViTFeatureExtractor {
  "do_normalize": true,
  "do_resize": true,
  "feature_extractor_type": "ViTFeatureExtractor",
  "image_mean": [
    0.5,
    0.5,
    0.5
  ],
  "image_std": [
    0.5,
    0.5,
    0.5
  ],
  "resample": 2,
  "size": 224
}

loading configuration file vit-dr-base-lr_3e-05-cos-w-res-wd_0.01\config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224",
  "architectures": [
    "ViTForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "no_dr",
    "1": "with_dr"
  },
  "image_size": 224,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "no_dr": "0",
    "with_dr": "1"
  },
  "layer_norm_eps": 1e-12,
  "model_type": "vit",
  "num_attention_heads": 12,
  "num_channels": 

loading configuration file vit-dr-sd-lr_1e-05-cos-w-res-wd_0.01\preprocessor_config.json
Feature extractor ViTFeatureExtractor {
  "do_normalize": true,
  "do_resize": true,
  "feature_extractor_type": "ViTFeatureExtractor",
  "image_mean": [
    0.5,
    0.5,
    0.5
  ],
  "image_std": [
    0.5,
    0.5,
    0.5
  ],
  "resample": 2,
  "size": 224
}

loading configuration file vit-dr-sd-lr_1e-05-cos-w-res-wd_0.01\config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224",
  "architectures": [
    "ViTForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "no_dr",
    "1": "with_dr"
  },
  "image_size": 224,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "no_dr": "0",
    "with_dr": "1"
  },
  "layer_norm_eps": 1e-12,
  "model_type": "vit",
  "num_attention_heads": 12,
  "num_channels": 3,
 

loading configuration file vit-dr-sd-lr_3e-05-cos-w-res-wd_0.01\preprocessor_config.json
Feature extractor ViTFeatureExtractor {
  "do_normalize": true,
  "do_resize": true,
  "feature_extractor_type": "ViTFeatureExtractor",
  "image_mean": [
    0.5,
    0.5,
    0.5
  ],
  "image_std": [
    0.5,
    0.5,
    0.5
  ],
  "resample": 2,
  "size": 224
}

loading configuration file vit-dr-sd-lr_3e-05-cos-w-res-wd_0.01\config.json
Model config ViTConfig {
  "_name_or_path": "google/vit-base-patch16-224",
  "architectures": [
    "ViTForImageClassification"
  ],
  "attention_probs_dropout_prob": 0.0,
  "encoder_stride": 16,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 768,
  "id2label": {
    "0": "no_dr",
    "1": "with_dr"
  },
  "image_size": 224,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "no_dr": "0",
    "with_dr": "1"
  },
  "layer_norm_eps": 1e-12,
  "model_type": "vit",
  "num_attention_heads": 12,
  "num_channels": 3,
 

In [30]:
def highlight_max(val):
    f = "font-weight: bold" 
    #condition
    m = val["eval_accuracy"] == 0.7
    # DataFrame of styles
    df1 = pd.DataFrame('', index=val.index, columns=val.columns)
    # set columns by condition
    df1 = df1.mask(m, f)
    return df1

In [31]:
eval_metrics

[{'eval_loss': 0.7349197864532471,
  'eval_accuracy': {'accuracy': 0.6553571428571429},
  'eval_confusion_matrix': array([[529,  31],
         [355, 205]], dtype=int64),
  'eval_runtime': 23.9958,
  'eval_samples_per_second': 46.675,
  'eval_steps_per_second': 1.459},
 {'eval_loss': 0.6955434679985046,
  'eval_accuracy': {'accuracy': 0.6508928571428572},
  'eval_confusion_matrix': array([[530,  30],
         [361, 199]], dtype=int64),
  'eval_runtime': 11.0272,
  'eval_samples_per_second': 101.567,
  'eval_steps_per_second': 3.174},
 {'eval_loss': 0.6853053569793701,
  'eval_accuracy': {'accuracy': 0.6544642857142857},
  'eval_confusion_matrix': array([[521,  39],
         [348, 212]], dtype=int64),
  'eval_runtime': 11.1602,
  'eval_samples_per_second': 100.357,
  'eval_steps_per_second': 3.136},
 {'eval_loss': 0.6213908791542053,
  'eval_accuracy': {'accuracy': 0.7},
  'eval_confusion_matrix': array([[515,  45],
         [291, 269]], dtype=int64),
  'eval_runtime': 11.037,
  'eval_sa

In [54]:
epoch_df = pd.DataFrame(eval_metrics)
epoch_df["base_model_name"] = "ViT base patch-16 224"
epoch_df["finetuned_model_name"] = "ViT-DR"
epoch_df["finetune_data"] = ["base" if "base" in model_name else "base + diffusion augmentation" for model_name in model_names]
epoch_df["learning_rate"] = [1e-05 if "1e" in model_name else 3e-05 for model_name in model_names]
epoch_df["eval_accuracy"] = epoch_df["eval_accuracy"].apply(lambda x: x["accuracy"])
epoch_df["eval_confusion_matrix"] = epoch_df["eval_confusion_matrix"].apply(lambda x: x.flatten().tolist())
epoch_df[["eval_tn", "eval_fp", "eval_fn", "eval_tp"]] = pd.DataFrame(epoch_df["eval_confusion_matrix"].tolist(), index=epoch_df.index)
epoch_df = epoch_df[["base_model_name", "finetuned_model_name", "finetune_data", "learning_rate", "eval_loss", "eval_accuracy",  "eval_tn", "eval_fp", "eval_fn", "eval_tp"]]
epoch_df = epoch_df.style.apply(highlight_max, axis=None)
display(epoch_df)

Unnamed: 0,base_model_name,finetuned_model_name,finetune_data,learning_rate,eval_loss,eval_accuracy,eval_tn,eval_fp,eval_fn,eval_tp
0,ViT base patch-16 224,ViT-DR,base,1e-05,0.73492,0.655357,529,31,355,205
1,ViT base patch-16 224,ViT-DR,base,3e-05,0.695543,0.650893,530,30,361,199
2,ViT base patch-16 224,ViT-DR,base + diffusion augmentation,1e-05,0.685305,0.654464,521,39,348,212
3,ViT base patch-16 224,ViT-DR,base + diffusion augmentation,3e-05,0.621391,0.7,515,45,291,269
