In [None]:
import pandas as pd
import numpy as np
import os
import cv2
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from tensorflow.keras.applications import ResNet50, VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import seaborn as sns
import h5py

In [None]:
!pip install py7zr
import py7zr

In [None]:
#!apt install p7zip-full -y

!"C:/Program Files/7-Zip/7z.exe" x "C:/Users/choun/OneDrive/Desktop/Retinal_dataset/train/train.zip.001" -o"C:/Users/choun/OneDrive/Desktop/Retinal_dataset/train/extracted_images/"

In [None]:
df = pd.read_csv('C:/Users/choun/OneDrive/Desktop/Retinal_dataset/trainLabels.csv')


train_dir = 'C:/Users/choun/OneDrive/Desktop/Retinal_dataset/train/'
image_dir = "C:/Users/choun/OneDrive/Desktop/Retinal_dataset/train/extracted_images/train/"
#test_dir = 'C:/Users/choun/OneDrive/Desktop/Retinal_dataset/'

In [None]:
df.head()

In [None]:
df[['ID', 'Position']] = df['image'].str.split('_', expand=True)
df.head()

In [None]:
df.info()

In [None]:
df['ID'] = df['ID'].astype(int)
df.describe(include='all')

In [None]:
df.level.unique()

In [None]:
r_names = ["No DR" , "Mild" , "Moderate" , "Severe" , "Proliferative"]
r_map = dict(zip(df.level.unique(),r_names))
r_map

In [None]:
r_freq = df['level'].value_counts()
r_freq_df = r_freq.reset_index()
r_freq_df.column = ['level','count']
r_freq_df['label'] = r_freq_df['level'].map(r_map)

plt.figure(figsize=(8, 6))
sns.set_style("whitegrid")
barplot = sns.barplot(data=r_freq_df, x='label', y='count', palette='viridis')

for index, row in r_freq_df.iterrows():
    barplot.text(index, row['count'] + 500, f"{row['count']}", color='black', ha="center", fontweight='bold')

plt.title('Frequency of Diabetic Retinopathy Levels')
plt.xlabel('DR Level')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

In [None]:
df['image_path']=df['image'].apply(lambda x:os.path.join(image_dir,x+ '.jpeg'))
df['level']=df['level'].astype(str)

In [None]:
df.head()

In [None]:
IMG_SIZE = 224

In [None]:
import cv2
import numpy as np
from tensorflow.keras.utils import to_categorical
from concurrent.futures import ThreadPoolExecutor, as_completed



def process_row(row, img_size):
    try:
        img_path = row['image_path']
        label = int(row['level'])
        
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        if img is None:
            print(f"Failed to load {img_path}")
            return None
        
        img = cv2.resize(img, (img_size, img_size))
        img = img.astype(np.float32) / 255.0  # Normalize
        return img, label
    except Exception as e:
        print(f"Error processing {row['image_path']}: {e}")
        return None

def load_images_fast(df, img_size=IMG_SIZE, max_workers=8):
    images = []
    labels = []
    
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(process_row, row, img_size) for _, row in df.iterrows()]
        
        for count, future in enumerate(as_completed(futures), 1):
            result = future.result()
            if result is not None:
                img, label = result
                images.append(img)
                labels.append(label)
            
            if count % 1000 == 0:
                print(f"Processed {count} images.")
    
    return np.array(images), np.array(labels)

X, y = load_images_fast(df, img_size=IMG_SIZE, max_workers=8)
y_cat = to_categorical(y, num_classes=5)

print("Images shape:", X.shape)
print("Labels shape:", y_cat.shape)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y_cat, test_size=0.2, random_state=42, stratify=y)

In [None]:
save_path = 'C:/Users/choun/OneDrive/Desktop/Retinal_dataset/train/extracted_images/save/'

In [None]:
with h5py.File(save_path + 'dataset_split.h5', 'w') as hf:
    hf.create_dataset('X_train', data=X_train, compression='gzip')
    hf.create_dataset('X_val', data=X_val, compression='gzip')
    hf.create_dataset('y_train', data=y_train, compression='gzip')
    hf.create_dataset('y_val', data=y_val, compression='gzip')

In [None]:
# Load datasets
with h5py.File(save_path + 'dataset_split.h5', 'r') as hf:
    X_train = hf['X_train'][:]  # This is an h5py dataset object
    X_val = hf['X_val'][:]
    y_train = hf['y_train'][:]
    y_val = hf['y_val'][:]

print("Datasets successfully loaded from your laptop!")

In [None]:
X_val

In [None]:
X_val[0:100]

In [None]:
X

In [None]:
def build_resnet(input_shape=(IMG_SIZE, IMG_SIZE, 3), num_classes=5):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    for layer in base_model.layers:
        layer.trainable = False  # Freeze base layers

    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.5)(x)
    output = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

resnet_model = build_resnet()
resnet_model.summary()

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models, optimizers

IMG_SIZE = 224
BATCH_SIZE = 64
EPOCHS = 20

datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest"
)

val_datagen = ImageDataGenerator(rescale=1./255)

train_generator = datagen.flow(
    X_train,         
    y_train,         
    batch_size=BATCH_SIZE,
    shuffle=True
)

val_generator = val_datagen.flow(
    X_val,          
    y_val,          
    batch_size=BATCH_SIZE,
    shuffle=False
)

# Callbacks
callbacks = [
    EarlyStopping(patience=5, monitor='val_loss', restore_best_weights=True),
    ModelCheckpoint(
        filepath='C:/Users/choun/OneDrive/Desktop/Retinal_dataset/best_model.h5',
        save_best_only=True,
        monitor='val_loss'
    )
]


base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
base_model.trainable = False  

resnet_model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(y_train.shape[1], activation='softmax')  # Use shape from y_train (one-hot)
])

resnet_model.compile(optimizer=optimizers.Adam(learning_rate=1e-4),
                     loss='categorical_crossentropy',
                     metrics=['accuracy'])

# Train the model
history_resnet = resnet_model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    callbacks=callbacks,
    workers=1,              
    #use_multiprocessing=True
)


In [None]:
from tensorflow.keras.models import load_model
model = load_model('C:/Users/choun/OneDrive/Desktop/Retinal_dataset/best_model.h5')

model.summary()

In [None]:
val_preds = model.predict(X_val)
val_preds_class = np.argmax(val_preds, axis=1)
y_val_class = np.argmax(y_val, axis=1)

# Report
print(classification_report(y_val_class, val_preds_class))
sns.heatmap(confusion_matrix(y_val_class, val_preds_class), annot=True)
plt.show()