In [1]:
import os

# Correct dataset path for Kaggle
dataset_path = "/kaggle/input/diabetic-retinopathy-balanced/content/Diabetic_Balanced_Data"

# Define train, validation, and test directories
train_dir = os.path.join(dataset_path, "train")
val_dir = os.path.join(dataset_path, "val")
test_dir = os.path.join(dataset_path, "test")

# Verify dataset structure
print("Dataset Structure:", os.listdir(dataset_path))


Dataset Structure: ['val', 'test', 'train']


In [2]:
import random
import shutil

def select_half_images(source_dir, target_dir):
    """ Selects 50% images from each class and copies them to a new directory. """
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    for class_name in os.listdir(source_dir):
        class_path = os.path.join(source_dir, class_name)
        target_class_path = os.path.join(target_dir, class_name)
        
        if not os.path.exists(target_class_path):
            os.makedirs(target_class_path)
        
        all_images = os.listdir(class_path)
        selected_images = random.sample(all_images, len(all_images) // 2)  # Select 50%

        for image in selected_images:
            shutil.copy(os.path.join(class_path, image), os.path.join(target_class_path, image))

# Define new dataset directories
balanced_train_dir = "/kaggle/working/balanced_train"
balanced_val_dir = "/kaggle/working/balanced_val"

# Select 50% images for train & validation sets
select_half_images(train_dir, balanced_train_dir)
select_half_images(val_dir, balanced_val_dir)


counting images after balanncing

In [3]:
def count_images(directory):
    return sum([len(files) for _, _, files in os.walk(directory)])

print(f"🔹 Training Images (50% Per Class): {count_images(balanced_train_dir)}")
print(f"🔹 Validation Images (50% Per Class): {count_images(balanced_val_dir)}")
print(f"🔹 Test Images: {count_images(test_dir)}")


🔹 Training Images (50% Per Class): 17396
🔹 Validation Images (50% Per Class): 4970
🔹 Test Images: 4971


DATA PROCESSING AND AUGUMENTATION:

In [4]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

IMG_SIZE = (224, 224)
BATCH_SIZE = 32

# Data Augmentation for Training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    zoom_range=0.1,
    horizontal_flip=True
)

# Only Rescaling for Validation & Test
val_test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    balanced_train_dir, target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='sparse', shuffle=True
)

val_generator = val_test_datagen.flow_from_directory(
    balanced_val_dir, target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='sparse', shuffle=True
)

test_generator = val_test_datagen.flow_from_directory(
    test_dir, target_size=IMG_SIZE, batch_size=BATCH_SIZE, class_mode='sparse', shuffle=False
)


Found 17396 images belonging to 5 classes.
Found 4970 images belonging to 5 classes.
Found 4971 images belonging to 5 classes.


build vit model

In [5]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Vision Transformer Parameters
PATCH_SIZE = 16  
NUM_CLASSES = 5  
PROJECTION_DIM = 64  
NUM_HEADS = 8  
MLP_UNITS = [128, 64]  
NUM_LAYERS = 8  

# Patch Embedding Layer
class PatchEmbedding(layers.Layer):
    def __init__(self, patch_size, num_patches, projection_dim):
        super().__init__()
        self.projection = layers.Dense(projection_dim)
        self.position_embedding = layers.Embedding(input_dim=num_patches, output_dim=projection_dim)

    def call(self, inputs):
        positions = tf.range(start=0, limit=tf.shape(inputs)[1], delta=1)
        return self.projection(inputs) + self.position_embedding(positions)

# Multi-Head Self-Attention
def transformer_block(inputs, projection_dim, num_heads, mlp_units):
    x = layers.LayerNormalization()(inputs)
    x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=projection_dim)(x, x)
    x = layers.Add()([x, inputs])

    x_skip = layers.LayerNormalization()(x)
    x_mlp = keras.Sequential([layers.Dense(units, activation="gelu") for units in mlp_units])(x_skip)
    return layers.Add()([x_mlp, x])

# Build ViT Model
def build_vit_model(input_shape=(224, 224, 3)):
    inputs = layers.Input(shape=input_shape)

    # Convert Images into Patches
    patches = layers.Conv2D(PROJECTION_DIM, PATCH_SIZE, strides=PATCH_SIZE, padding="valid")(inputs)
    patches = layers.Reshape((-1, PROJECTION_DIM))(patches)

    # Apply Transformer Layers
    for _ in range(NUM_LAYERS):
        patches = transformer_block(patches, PROJECTION_DIM, NUM_HEADS, MLP_UNITS)

    # Classification Head
    representation = layers.LayerNormalization()(patches)
    representation = layers.GlobalAveragePooling1D()(representation)
    outputs = layers.Dense(NUM_CLASSES, activation="softmax")(representation)

    return keras.Model(inputs, outputs)

# Create and Compile Model
model = build_vit_model()
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Summary
model.summary()


**Model training**

In [6]:
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=20,
    verbose=1
)


Epoch 1/20


  self._warn_if_super_not_called()


[1m544/544[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1837s[0m 3s/step - accuracy: 0.2447 - loss: 1.6507 - val_accuracy: 0.2853 - val_loss: 1.5164
Epoch 2/20
[1m544/544[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1928s[0m 4s/step - accuracy: 0.2846 - loss: 1.5407 - val_accuracy: 0.3135 - val_loss: 1.4996
Epoch 3/20
[1m544/544[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1924s[0m 4s/step - accuracy: 0.2968 - loss: 1.5221 - val_accuracy: 0.3215 - val_loss: 1.5016
Epoch 4/20
[1m544/544[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1953s[0m 4s/step - accuracy: 0.2973 - loss: 1.5220 - val_accuracy: 0.2964 - val_loss: 1.4850
Epoch 5/20
[1m544/544[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1937s[0m 4s/step - accuracy: 0.3003 - loss: 1.5037 - val_accuracy: 0.3113 - val_loss: 1.4553
Epoch 6/20
[1m544/544[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1951s[0m 4s/step - accuracy: 0.3025 - loss: 1.5042 - val_accuracy: 0.3310 - val_loss: 1.4457
Epoch 7/20
[1m544/544[0m 

Evalvating moel on test data:

In [7]:
from sklearn.metrics import classification_report

# Get Predictions
y_pred = model.predict(test_generator)
y_pred_classes = y_pred.argmax(axis=1)

# True Labels
y_true = test_generator.classes

# Classification Report
print(classification_report(y_true, y_pred_classes))


  self._warn_if_super_not_called()


[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m220s[0m 1s/step
              precision    recall  f1-score   support

           0       0.42      0.77      0.54      1000
           1       0.33      0.31      0.32       971
           2       0.35      0.03      0.05      1000
           3       0.34      0.49      0.40      1000
           4       0.52      0.38      0.44      1000

    accuracy                           0.39      4971
   macro avg       0.39      0.39      0.35      4971
weighted avg       0.39      0.39      0.35      4971



Saving model:

In [8]:
# Save the trained model
model.save("/kaggle/working/diabetic_retinopathy_transformer_balanced.h5")
print("✅ Model saved successfully!")


✅ Model saved successfully!
