In [3]:
import pandas as pd
import os
import shutil

In [2]:
#Loading the dataset IDs and labels
train_df = pd.read_csv('data/APTOS/train.csv')
val_df = pd.read_csv('data/APTOS/valid.csv')
test_df = pd.read_csv('data/APTOS/test.csv')

# Show
print("Train CSV:")
print(train_df.head())
print("\nValidation CSV:")
print(val_df.head())
print("\nTest CSV:")
print(test_df.head())

Train CSV:
        id_code  diagnosis
0  1ae8c165fd53          2
1  1b329a127307          1
2  1b32e1d775ea          4
3  1b3647865779          0
4  1b398c0494d1          0

Validation CSV:
        id_code  diagnosis
0  000c1434d8d7          2
1  001639a390f0          4
2  0024cdab0c1e          1
3  002c21358ce6          0
4  005b95c28852          0

Test CSV:
        id_code  diagnosis
0  e4dcca36ceb4          0
1  e4e343eaae2a          2
2  e4f12411fd85          4
3  e50b0174690d          0
4  e5197d77ec68          0


In [3]:
print(train_df['diagnosis'].value_counts())
print(val_df['diagnosis'].value_counts())
print(test_df['diagnosis'].value_counts())

diagnosis
0    1434
2     808
1     300
4     234
3     154
Name: count, dtype: int64
diagnosis
0    172
2    104
1     40
4     28
3     22
Name: count, dtype: int64
diagnosis
0    199
2     87
4     33
1     30
3     17
Name: count, dtype: int64


In [4]:
# Consolidating different grades of diabetic retinopathy
train_df['diagnosis'] = train_df['diagnosis'].replace({0: 'Normal', 1: 'DR', 2: 'DR', 3: 'DR', 4: 'DR'})
val_df['diagnosis'] = val_df['diagnosis'].replace({0: 'Normal', 1: 'DR', 2: 'DR', 3: 'DR', 4: 'DR'})
test_df['diagnosis'] = test_df['diagnosis'].replace({0: 'Normal', 1: 'DR', 2: 'DR', 3: 'DR', 4: 'DR'})

In [5]:
print(train_df['diagnosis'].value_counts())
print(val_df['diagnosis'].value_counts())
print(test_df['diagnosis'].value_counts())

diagnosis
DR        1496
Normal    1434
Name: count, dtype: int64
diagnosis
DR        194
Normal    172
Name: count, dtype: int64
diagnosis
Normal    199
DR        167
Name: count, dtype: int64


In [6]:
train_norm_df = train_df[train_df['diagnosis'] == 'Normal']
train_dr_df = train_df[train_df['diagnosis'] == 'DR']
val_norm_df = val_df[val_df['diagnosis'] == 'Normal']
val_dr_df = val_df[val_df['diagnosis'] == 'DR']
test_norm_df = test_df[test_df['diagnosis'] == 'Normal']
test_dr_df = test_df[test_df['diagnosis'] == 'DR']

In [8]:
# Orgamize into file structure required by RETFound
os.makedirs('./data/train/Normal/', exist_ok=True)
os.makedirs('./data/val/Normal/', exist_ok=True)
os.makedirs('./data/test/Normal/', exist_ok=True)
os.makedirs('./data/train/DR/', exist_ok=True)
os.makedirs('./data/val/DR/', exist_ok=True)
os.makedirs('./data/test/DR/', exist_ok=True)

for i in train_norm_df['id_code']:
    shutil.copy(f'./data/APTOS/train_images/{i}.png', f'./data/train/Normal/{i}.png')
    
for i in train_dr_df['id_code']:
    shutil.copy(f'./data/APTOS/train_images/{i}.png', f'./data/train/DR/{i}.png')

for i in val_norm_df['id_code']:
    shutil.copy(f'./data/APTOS/val_images/{i}.png', f'./data/val/Normal/{i}.png')
    
for i in val_dr_df['id_code']:
    shutil.copy(f'./data/APTOS/val_images/{i}.png', f'./data/val/DR/{i}.png')

for i in test_norm_df['id_code']:
    shutil.copy(f'./data/APTOS/test_images/{i}.png', f'./data/test/Normal/{i}.png')
    
for i in test_dr_df['id_code']:
    shutil.copy(f'./data/APTOS/test_images/{i}.png', f'./data/test/DR/{i}.png')

In [9]:
g_1020 = pd.read_csv('data/G1020/G1020.csv')
g_1020.head()


Unnamed: 0,imageID,binaryLabels
0,image_0.jpg,0
1,image_1.jpg,0
2,image_3.jpg,0
3,image_4.jpg,0
4,image_5.jpg,0


In [10]:
g_1020['binaryLabels'].value_counts()

binaryLabels
0    724
1    296
Name: count, dtype: int64

In [11]:
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(g_1020, test_size=0.1, stratify=g_1020['binaryLabels'], random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.25, stratify=train_df['binaryLabels'], random_state=42)


In [12]:
train_df['binaryLabels'] = train_df['binaryLabels'].replace({0: 'Normal', 1: 'Glaucoma'})
val_df['binaryLabels'] = val_df['binaryLabels'].replace({0: 'Normal', 1: 'Glaucoma'})
test_df['binaryLabels'] = test_df['binaryLabels'].replace({0: 'Normal', 1: 'Glaucoma'})

train_norm_df = train_df[train_df['binaryLabels'] == 'Normal']
train_glaucoma_df = train_df[train_df['binaryLabels'] == 'Glaucoma']
val_norm_df = val_df[val_df['binaryLabels'] == 'Normal']
val_glaucoma_df = val_df[val_df['binaryLabels'] == 'Glaucoma']
test_norm_df = test_df[test_df['binaryLabels'] == 'Normal']
test_glaucoma_df = test_df[test_df['binaryLabels'] == 'Glaucoma']



In [18]:
# Create directories if they don't exist
os.makedirs('./data/train/Glaucoma/', exist_ok=True)
os.makedirs('./data/val/Glaucoma/', exist_ok=True)
os.makedirs('./data/test/Glaucoma/', exist_ok=True)

for i in train_norm_df['imageID']:
    shutil.copy(f'./data/G1020/Images_Square/{i}', f'./data/train/Normal/{i}')

for i in train_glaucoma_df['imageID']:
    shutil.copy(f'./data/G1020/Images_Square/{i}', f'./data/train/Glaucoma/{i}')

for i in val_norm_df['imageID']:
    shutil.copy(f'./data/G1020/Images_Square/{i}', f'./data/val/Normal/{i}')
    
for i in val_glaucoma_df['imageID']:
    shutil.copy(f'./data/G1020/Images_Square/{i}', f'./data/val/Glaucoma/{i}')

for i in test_norm_df['imageID']:
    shutil.copy(f'./data/G1020/Images_Square/{i}', f'./data/test/Normal/{i}')

for i in val_norm_df['imageID']:
    shutil.copy(f'./data/G1020/Images_Square/{i}', f'./data/val/Normal/{i}')
    
for i in val_glaucoma_df['imageID']:
    shutil.copy(f'./data/G1020/Images_Square/{i}', f'./data/val/Glaucoma/{i}')

for i in test_norm_df['imageID']:
    shutil.copy(f'./data/G1020/Images_Square/{i}', f'./data/test/Normal/{i}')
    
for i in test_glaucoma_df['imageID']:
    shutil.copy(f'./data/G1020/Images_Square/{i}', f'./data/test/Glaucoma/{i}')

In [5]:
from sklearn.model_selection import train_test_split

def images_splitting(source_dir, train_dir, val_dir, test_dir):
    images = [f for f in os.listdir(source_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
    train, test = train_test_split(images, test_size=0.1, random_state=42)
    train, val = train_test_split(train, test_size=0.25, random_state=42)

    for image in train:
        shutil.copy(os.path.join(source_dir, image), os.path.join(train_dir, image))
    for image in val:
        shutil.copy(os.path.join(source_dir, image), os.path.join(val_dir, image))
    for image in test:
        shutil.copy(os.path.join(source_dir, image), os.path.join(test_dir, image))



In [6]:
source_dir = 'data/glaucoma'
train_dir = 'data/train/Glaucoma'
val_dir = 'data/val/Glaucoma'
test_dir = 'data/test/Glaucoma'

images_splitting(source_dir, train_dir, val_dir, test_dir)

In [7]:
source_dir = 'data/cataract'
train_dir = 'data/train/Cataract'
val_dir = 'data/val/Cataract'
test_dir = 'data/test/Cataract'

# Create directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

images_splitting(source_dir, train_dir, val_dir, test_dir)


In [8]:
import timm
print(timm.list_models('*vit*'))

  from .autonotebook import tqdm as notebook_tqdm


['convit_base', 'convit_small', 'convit_tiny', 'crossvit_9_240', 'crossvit_9_dagger_240', 'crossvit_15_240', 'crossvit_15_dagger_240', 'crossvit_15_dagger_408', 'crossvit_18_240', 'crossvit_18_dagger_240', 'crossvit_18_dagger_408', 'crossvit_base_240', 'crossvit_small_240', 'crossvit_tiny_240', 'davit_base', 'davit_base_fl', 'davit_giant', 'davit_huge', 'davit_huge_fl', 'davit_large', 'davit_small', 'davit_tiny', 'efficientvit_b0', 'efficientvit_b1', 'efficientvit_b2', 'efficientvit_b3', 'efficientvit_l1', 'efficientvit_l2', 'efficientvit_l3', 'efficientvit_m0', 'efficientvit_m1', 'efficientvit_m2', 'efficientvit_m3', 'efficientvit_m4', 'efficientvit_m5', 'fastvit_ma36', 'fastvit_mci0', 'fastvit_mci1', 'fastvit_mci2', 'fastvit_s12', 'fastvit_sa12', 'fastvit_sa24', 'fastvit_sa36', 'fastvit_t8', 'fastvit_t12', 'flexivit_base', 'flexivit_large', 'flexivit_small', 'gcvit_base', 'gcvit_small', 'gcvit_tiny', 'gcvit_xtiny', 'gcvit_xxtiny', 'levit_128', 'levit_128s', 'levit_192', 'levit_256', 

In [11]:
'vit_large_patch16_224' in timm.list_models('*vit*')

True

In [1]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version used by PyTorch: {torch.version.cuda}")

PyTorch version: 2.7.0+cu118
CUDA available: True
CUDA version used by PyTorch: 11.8


In [3]:
!python finetune_vit.py \
    --model_name vit_large_patch16_224 \
    --data_path ./data/ \
    --checkpoint_path ./pretrained_vit/RETFound_cfp_weights.pth \
    --output_dir ./finetune_output \
    --batch_size 8 \
    --epochs 50 \
    --blr 1e-4 \
    --layer_decay 0.65 \
    --warmup_epochs 5 \
    --eval_test

Using GPU: Quadro T1000
Found 4997 training images in 4 classes
Found 1057 validation images
Found 673 test images
Classes (4): ['Cataract', 'DR', 'Glaucoma', 'Normal']
Creating model: vit_large_patch16_224
Loading weights from: ./pretrained_vit/RETFound_cfp_weights.pth
Key head.weight not found in checkpoint, will use initialized head
Key head.bias not found in checkpoint, will use initialized head
Weight loading message: _IncompatibleKeys(missing_keys=['fc_norm.weight', 'fc_norm.bias', 'head.weight', 'head.bias'], unexpected_keys=['mask_token', 'decoder_pos_embed', 'decoder_embed.weight', 'decoder_embed.bias', 'decoder_blocks.0.norm1.weight', 'decoder_blocks.0.norm1.bias', 'decoder_blocks.0.attn.qkv.weight', 'decoder_blocks.0.attn.qkv.bias', 'decoder_blocks.0.attn.proj.weight', 'decoder_blocks.0.attn.proj.bias', 'decoder_blocks.0.norm2.weight', 'decoder_blocks.0.norm2.bias', 'decoder_blocks.0.mlp.fc1.weight', 'decoder_blocks.0.mlp.fc1.bias', 'decoder_blocks.0.mlp.fc2.weight', 'decode

  scaler = torch.cuda.amp.GradScaler(enabled=True)

  with torch.cuda.amp.autocast(enabled=True):

Epoch 1/50 [Train]:   0%|          | 0/624 [00:09<?, ?it/s]
Traceback (most recent call last):
  File "d:\Columbia Notes\Spring 2025\Deep Learning on the Edge\Final_Project\finetune_vit.py", line 79, in <module>
    main()
  File "d:\Columbia Notes\Spring 2025\Deep Learning on the Edge\Final_Project\finetune_vit.py", line 56, in main
    best_state = train_and_evaluate(
                 ^^^^^^^^^^^^^^^^^^^
  File "d:\Columbia Notes\Spring 2025\Deep Learning on the Edge\Final_Project\trainer.py", line 23, in train_and_evaluate
    train_loss = train_one_epoch(
                 ^^^^^^^^^^^^^^^^
  File "d:\Columbia Notes\Spring 2025\Deep Learning on the Edge\Final_Project\trainer.py", line 113, in train_one_epoch
    loss = criterion(outputs, labels)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\Columbia Notes\Spring 2025\Deep Learning on the Edge\Final_Project\venv\Lib\site-packages\torc