In [1]:
!pip install opencv-python




[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split

In [3]:
image_dir = "C:\\Users\\sejal\\Downloads\\diabetic_retinopathy\\colored_images"
csv_file = "C:\\Users\\sejal\\Downloads\\diabetic_retinopathy\\train.csv"

In [4]:
labels_df = pd.read_csv(csv_file)

In [5]:
label_map = dict(zip(labels_df['id_code'], labels_df['diagnosis']))

def get_filtered_file_paths(directory):
    class_folders = [os.path.join(directory, class_dir) for class_dir in os.listdir(directory) if not class_dir.startswith("._")]
    file_paths = []
    for folder in class_folders:
        if os.path.isdir(folder):
            for file in os.listdir(folder):
                if not file.startswith("._"):  # Excluding '._' files
                    file_paths.append(os.path.join(folder, file))
    return file_paths

In [6]:
def preprocess_image(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_image(img, channels=3)

    #grayscale
    gray_img = tf.image.rgb_to_grayscale(img).numpy()

    #median filter
    filtered_img = cv2.medianBlur(np.squeeze(gray_img), ksize=5)

    resized_img = cv2.resize(filtered_img, (224, 224))

    normalized_img = resized_img / 255.0

    return np.expand_dims(normalized_img, axis=-1)

In [7]:
def preprocess_dataset(image_dir, label_map):
    images = []
    labels = []
    file_paths = get_filtered_file_paths(image_dir)
    for file_path in file_paths:
        try:
            filename = os.path.basename(file_path)
            image_id = os.path.splitext(filename)[0]  
            if image_id in label_map: 
                preprocessed_img = preprocess_image(file_path)
                images.append(preprocessed_img)
                labels.append(label_map[image_id])
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
    return np.array(images), np.array(labels)


X, y = preprocess_dataset(image_dir, label_map)

In [8]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

train_data = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(32)
val_data = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(32)

In [9]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(5, activation='softmax')  # 5 classes
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [10]:
history = model.fit(train_data, validation_data=val_data, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
