In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# ***importing Libraries***

In [None]:
import os
import cv2
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Flatten, Dense, Input, Conv2D, concatenate, BatchNormalization, MaxPooling2D, UpSampling2D, Concatenate, Dropout, Cropping2D, ZeroPadding2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.metrics import jaccard_score
import matplotlib.pyplot as plt
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.metrics import MeanIoU

## ***1. DataLoading***

In [None]:
TRAIN_IMAGE_DIR = "/kaggle/input/diabetic-retinopathy-dataset/Daataset_DR/DB0/Images"
TRAIN_MASK_DIR = "/kaggle/input/diabetic-retinopathy-dataset/Daataset_DR/DB0/GroundTruth"
TEST_IMAGE_DIR = "/kaggle/input/diabetic-retinopathy-dataset/Daataset_DR/DB1/Images"
TEST_MASK_DIR = "/kaggle/input/diabetic-retinopathy-dataset/Daataset_DR/DB1/GroundTruth"
TRAIN_MASKS = "/kaggle/working/CombinedMasksTask1"
TEST_MASKS = "/kaggle/working/TestCombinedMasksTask1"

# ***1. Combining Masks of CWS and HE, both for training and testing***

In [None]:
def create_combined_mask(image_filename):
    sample_mask_path = os.path.join(TRAIN_MASK_DIR, mask_subfolders[0], image_filename)
    mask_shape = cv2.imread(sample_mask_path, cv2.IMREAD_GRAYSCALE).shape
    combined_mask = np.zeros(mask_shape, dtype=np.uint8)

    for subfolder in mask_subfolders:
        mask_path = os.path.join(TRAIN_MASK_DIR, subfolder, image_filename)
        if os.path.exists(mask_path):
            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            combined_mask = cv2.bitwise_or(combined_mask, mask)
    return combined_mask

In [None]:
OUTPUT_MASK_DIR = "/kaggle/working/CombinedMasksTask1"
os.makedirs(OUTPUT_MASK_DIR, exist_ok=True)
mask_subfolders = ['CWS', 'HE']
image_filenames = os.listdir(TRAIN_IMAGE_DIR)

for image_filename in tqdm(image_filenames):
    combined_mask = create_combined_mask(image_filename)
    output_path = os.path.join(OUTPUT_MASK_DIR, image_filename)
    cv2.imwrite(output_path, combined_mask)

In [None]:
OUTPUT_MASK_DIR = "/kaggle/working/TestCombinedMasksTask1"
os.makedirs(OUTPUT_MASK_DIR, exist_ok=True)
mask_subfolders = ['CWS', 'HE']
image_filenames = os.listdir(TEST_IMAGE_DIR)

for image_filename in tqdm(image_filenames):
    combined_mask = create_combined_mask(image_filename)
    output_path = os.path.join(OUTPUT_MASK_DIR, image_filename)
    cv2.imwrite(output_path, combined_mask)

## ***Checking if the masks are correctly combined***

In [None]:
def display_masks_comparison(train_image_dir, train_mask_dir, output_mask_dir, mask_classes, num_images=5):
    image_filenames = os.listdir(train_image_dir)
    plt.figure(figsize=(15, 10))
    for i, filename in enumerate(image_filenames[15:20]):
        # Load the original image
        orig_image_path = os.path.join(train_image_dir, filename)
        orig_image = cv2.imread(orig_image_path)
        orig_image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
        
        # Initialize subplot for original image
        plt.subplot(num_images, 4, i*4 + 1)
        plt.imshow(orig_image)
        plt.title('Original Image')
        plt.axis('off')

        # Load and display individual masks
        for j, mask_class in enumerate(mask_classes):
            mask_path = os.path.join(train_mask_dir, mask_class, filename)
            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            plt.subplot(num_images, 4, i*4 + 2 + j)
            plt.imshow(mask, cmap='gray')
            plt.title(f'Mask: {mask_class}')
            plt.axis('off')

        # Load and display the combined mask
        combined_mask_path = os.path.join(output_mask_dir, filename)
        combined_mask = cv2.imread(combined_mask_path, cv2.IMREAD_GRAYSCALE)
        plt.subplot(num_images, 4, i*4 + 4)
        plt.imshow(combined_mask, cmap='gray')
        plt.title('Combined Mask')
        plt.axis('off')
        
    plt.tight_layout()
    plt.show()

display_masks_comparison(TRAIN_IMAGE_DIR, TRAIN_MASK_DIR, TRAIN_MASKS, mask_subfolders)

# ***2. Loading Images and Masks***

In [None]:
def load_data(image_dir, mask_dir):
    image_filenames = sorted(os.listdir(image_dir))
    images = []
    masks = []
    
    for filename in tqdm(image_filenames, desc="Loading data"):
        # Load and resize image
        image_path = os.path.join(image_dir, filename)
        image = cv2.imread(image_path)
        images.append(image)  # Normalize image to [0, 1]
        
        mask_path = os.path.join(mask_dir, filename)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        mask = (mask > 0).astype(np.uint8)  # Binarize mask (0 or 1)
        masks.append(mask)
    
    return np.array(images), np.array(masks)

In [None]:
images, masks = load_data(TRAIN_IMAGE_DIR, TRAIN_MASKS)

X_train, X_val, y_train, y_val = train_test_split(images, masks, test_size=0.1, random_state=42)

## ***3. Dividing Images into 16X16 patches***

In [None]:
def data_patches(image_data):
    # Define patch size
    patch_size = 16

    # Get the dimensions of the image
    num_rows, num_cols, num_bands = image_data.shape

    # Initialize an empty list to store patches
    patches = []

    # Loop over the image in patches of 16x16
    for row in range(0, num_rows, patch_size):
        for col in range(0, num_cols, patch_size):
            # Extract the patch
            patch = image_data[row:row+patch_size, col:col+patch_size]

            # Check if the patch has the right shape (i.e., it's not on the edge)
            if patch.shape[:2] == (patch_size, patch_size):
                patches.append(patch)

    # Convert list of patches to numpy array
    patches_array = np.array(patches)
    return patches_array

In [None]:
train_patches = []

for a in X_train:
    patches = data_patches(a)
    train_patches.append(patches)

train_patches = np.concatenate(train_patches, axis=0)
train_patches.shape

In [None]:
validation_patches = []
for a in X_val:
    patches = data_patches(a)
    validation_patches.append(patches)

validation_patches = np.concatenate(validation_patches, axis=0)
validation_patches.shape

## ***4. Converting Masks into 16X16 patches and assigning each patch a label accordingly***
Either 1 or 0, 1 means the patch contains disease, 0 means it doesn't

#### ***Patch Size 8X8***

In [None]:
def label_patches(image):
    patch_size = 8
    labeled_patches = []  # List to hold the label of each patch

    # Loop over the image in patches of 8x8
    for row in range(0, image.shape[0], patch_size):
        for col in range(0, image.shape[1], patch_size):
            # Extract the patch
            patch = image[row:row + patch_size, col:col + patch_size]

            # Check if the patch has the correct shape (i.e., it's not on the edge)
            if patch.shape[:2] == (patch_size, patch_size):
                # Flatten the patch and calculate the most frequent value (label)
                flattened_patch = patch.flatten()
                counts = np.bincount(flattened_patch)
                most_frequent = np.argmax(counts)
                labeled_patches.append(most_frequent)  # Append label of the patch

    return np.array(labeled_patches)

#### ***Patch Size 16X16***

In [None]:
def label_patches2(image):
    patch_size = 16
    labeled_patches = []  # List to hold the label of each patch

    # Loop over the image in patches of 8x8
    for row in range(0, image.shape[0], patch_size):
        for col in range(0, image.shape[1], patch_size):
            # Extract the patch
            patch = image[row:row + patch_size, col:col + patch_size]

            # Check if the patch has the correct shape (i.e., it's not on the edge)
            if patch.shape[:2] == (patch_size, patch_size):
                # Check if there is at least one pixel with the value 1
                if 1 in patch:
                    labeled_patches.append(1)
                else:
                    labeled_patches.append(0)

    return np.array(labeled_patches)

In [None]:
train_labels = []

for a in y_train:
    labels = label_patches2(a)
    train_labels.extend(labels)

train_labels = np.array(train_labels)
train_labels.shape

In [None]:
validation_labels = []

for a in y_val:
    labels = label_patches2(a)
    validation_labels.extend(labels)

validation_labels = np.array(validation_labels)
validation_labels.shape

## ***5. Model for doing patch classification***

In [None]:
def build_model():
    model = Sequential()
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same', input_shape=(16, 16, 3)))
    model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
    model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
    model.add(Conv2D(1024, (3, 3), activation='relu', padding='same'))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(2, activation='sigmoid'))

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=1e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Model summary to check structure
    return model
model = build_model()

In [None]:
model.summary()

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint

# Setup the ModelCheckpoint callback to save the best model based on validation accuracy
checkpoint = ModelCheckpoint(
    'best_model.keras',  # Path where the model is saved
    monitor='val_accuracy',  # Monitor validation accuracy
    save_best_only=True,  # Only save the model if 'val_accuracy' has improved
    mode='max',  # 'max' because we want to maximize validation accuracy
    verbose=1  # Optional: provides detailed logging about the saved models
)

# Fit the model using the previously defined training and validation data
history2 = model.fit(
    train_patches, 
    train_labels, 
    validation_data=(validation_patches, validation_labels),
    epochs=10,
    batch_size=2048,
    callbacks=[checkpoint]  # Include the checkpoint in the callbacks
)

## ***6. Loading Data for testing***

In [None]:
IMAGE_DIR = "/kaggle/input/diabetic-retinopathy-dataset/Daataset_DR/DB1/Images"
MASK_DIR = "/kaggle/working/TestCombinedMasksTask1"
def load_data(image_dir, mask_dir):
    image_filenames = sorted(os.listdir(image_dir))
    images = []
    masks = []
    
    for filename in tqdm(image_filenames, desc="Loading data"):
        # Load and resize image
        image_path = os.path.join(image_dir, filename)
        image = cv2.imread(image_path)
        images.append(image)
        
        mask_path = os.path.join(mask_dir, filename)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
        mask = (mask > 0).astype(np.uint8) 
        masks.append(mask)
    
    return np.array(images), np.array(masks)

X_test, y_test = load_data(IMAGE_DIR, MASK_DIR)

In [None]:
test_patches = []

for a in X_test:
    patches = data_patches(a)
    test_patches.append(patches)

test_patches = np.concatenate(test_patches, axis=0)
test_patches.shape

In [None]:
test_labels = []

for a in y_test:
    labels = label_patches2(a)
    test_labels.extend(labels)

test_labels = np.array(test_labels)
test_labels.shape

## ***7. Taking predictions***
### ***Here the predictions are labels of the patches, we need to make picture from it***

In [None]:
model.evaluate(test_patches,test_labels)

In [None]:
predictions = model.predict(test_patches)

In [None]:
predict = np.argmax(predictions, axis=1)

## ***8. Reconstructing patches from predicted labels***

In [None]:
pred = []
for x in predict:
  pred.append(np.full((16, 16), x))
pred = np.array(pred)

In [None]:
pred2 = pred.reshape(89, 6696, 16,16)

### ***Reconstructing Masks from patches***

In [None]:
predicted_masks = []
for a in pred2:
    patches_per_row = 93
    patches_per_col = len(a) // patches_per_row
    image = np.concatenate([np.concatenate(a[i:i+patches_per_row], axis=1) for i in range(0, len(a), patches_per_row)], axis=0)
    predicted_masks.append(image)

In [None]:
len(predicted_masks)

## ***9. Plotting all the predicted masks***

In [None]:
import matplotlib.pyplot as plt

# Assuming 'predicted_masks' is your list or array of images
num_images = 89
rows = 18
cols = 5

fig, axes = plt.subplots(rows, cols, figsize=(15, 54))  # Adjust figsize to your screen/display size
axes = axes.flatten()  # Flatten the 2D array of axes to simplify the looping

# Loop over all of the positions in the grid
for i in range(rows * cols):
    if i < num_images:
        # Display image
        axes[i].imshow(predicted_masks[i], cmap='gray')  # Assuming masks are grayscale
        axes[i].axis('off')  # Turn off axis numbering and ticks
    else:
        axes[i].axis('off')  # Make sure empty plots also have no axes

plt.tight_layout()  # Optional, improves spacing between plots
plt.show()

## ***10. Comparing Test Ground truth and predicted masks***

In [None]:
import matplotlib.image as mpimg

test_image_dir = "/kaggle/input/diabetic-retinopathy-dataset/Daataset_DR/DB1/Images"
ground_truth_dir = "/kaggle/working/TestCombinedMasksTask1"
image_files = sorted(os.listdir(test_image_dir))
ground_truth_files = sorted(os.listdir(ground_truth_dir))

assert len(image_files) == len(ground_truth_files) == 89, "Mismatch in number of files"

num_images = len(image_files)
fig, axes = plt.subplots(num_images, 3, figsize=(15, 5 * num_images))

for i, image_file in enumerate(image_files):
    # Read the image and the ground truth mask
    img_path = os.path.join(test_image_dir, image_file)
    gt_path = os.path.join(ground_truth_dir, ground_truth_files[i])

    image = mpimg.imread(img_path)
    ground_truth = mpimg.imread(gt_path)
    predicted_mask = predicted_masks[i]

    # Plotting
    axes[i, 0].imshow(image)
    axes[i, 0].set_title('Original Image')
    axes[i, 0].axis('off')

    axes[i, 1].imshow(ground_truth)
    axes[i, 1].set_title('Ground Truth Mask')
    axes[i, 1].axis('off')

    axes[i, 2].imshow(predicted_mask)
    axes[i, 2].set_title('Predicted Mask')
    axes[i, 2].axis('off')

plt.tight_layout()
plt.show()

#### ***adding extra pading to retain the orignal size --- discorded during pachification***

In [None]:
padded_masks = np.array([np.pad(mask, pad_width=((0, 0), (0, 12)), mode='constant', constant_values=0) for mask in predicted_masks])

### ***Finding IOU Score***

In [None]:
def iou_score(y_true, y_pred):
    intersection = np.logical_and(y_true, y_pred).sum()
    union = np.logical_or(y_true, y_pred).sum()
    if union == 0:
        return 1.0  # To handle cases with no ground truth or predicted objects
    else:
        return intersection / union

In [None]:
# Ensure both padded_masks and y_test are NumPy arrays and have the same dimensions
assert padded_masks.shape == y_test.shape, "The dimensions of padded_masks and y_test must match."

# Calculate IoU scores for each corresponding pair of masks
iou_scores = [iou_score(true, pred) for true, pred in zip(y_test, padded_masks)]

# Calculate mean IoU across all mask pairs
mean_iou = np.mean(iou_scores)
print("Mean IoU:", mean_iou)

In [None]:
import cv2
import numpy as np
import os
from tensorflow.keras.models import load_model

def data_patches(image_data, patch_size=16):
    """
    Extracts non-overlapping patches of size `patch_size x patch_size`.
    Ignores edges that don't fit exactly.
    """
    num_rows, num_cols, num_bands = image_data.shape
    patches = []
    
    for row in range(0, num_rows, patch_size):
        for col in range(0, num_cols, patch_size):
            patch = image_data[row:row+patch_size, col:col+patch_size]
            # Keep only full patches
            if patch.shape[:2] == (patch_size, patch_size):
                patches.append(patch)
    
    return np.array(patches)

def reassemble_patches(patches, original_shape, patch_size=16):
    """
    Reassembles patches of size `patch_size x patch_size` into a single 2D mask.
    Expects `original_shape` = (height, width).
    """
    h, w = original_shape
    patches_per_row = w // patch_size
    patches_per_col = h // patch_size
    
    # Reshape patches into (patches_per_col * patches_per_row, patch_size, patch_size)
    # so we can iterate in row-major order
    patches = patches.reshape(patches_per_col * patches_per_row, patch_size, patch_size)
    
    # Build each row by concatenating patches horizontally
    rows = []
    idx = 0
    for _ in range(patches_per_col):
        row_patches = patches[idx:idx+patches_per_row]
        row_image = np.concatenate(row_patches, axis=1)
        rows.append(row_image)
        idx += patches_per_row
    
    # Stack rows vertically
    full_mask = np.concatenate(rows, axis=0)
    return full_mask


In [None]:
def predict_mask_for_image(image_path, model, patch_size=16):
    """
    1. Reads and preprocesses the image.
    2. Extracts patches, does inference, and reassembles them into a binary mask.
    3. Returns the predicted mask (same height & width as input, 1 channel).
    """
    # --- Load image ---
    image = cv2.imread(image_path)
    original_h, original_w, _ = image.shape
    
    # --- Extract patches ---
    patches = data_patches(image, patch_size=patch_size)
    
    # Model expects the shape (num_patches, patch_size, patch_size, 3)
    # Convert to float / normalize if needed (depends on your training pipeline)
    patches = patches.astype(np.float32) / 255.0  # Example normalization
    
    # --- Predict on patches ---
    predictions = model.predict(patches)  # Shape: (num_patches, 2)
    
    # Argmax over the class dimension => 0 or 1 per patch
    patch_labels = np.argmax(predictions, axis=1)  # Shape: (num_patches,)
    
    # --- Convert each patch label to a (patch_size x patch_size) block ---
    # If label=1, create a block of ones. Otherwise, zeros.
    labeled_patches = []
    for lbl in patch_labels:
        block = np.full((patch_size, patch_size), lbl, dtype=np.uint8)
        labeled_patches.append(block)
    labeled_patches = np.array(labeled_patches)
    
    # --- Reassemble labeled patches into a single 2D mask ---
    full_mask = reassemble_patches(labeled_patches, (original_h, original_w), patch_size=patch_size)
    
    return full_mask


In [None]:
# Load your trained model (either the best_model.keras or the final model)
model = load_model("/kaggle/input/patch1/keras/default/1/best_model.keras")

# Example image path
test_image_path = "/kaggle/input/diabetic-retinopathy-dataset/Daataset_DR/DB1/Images/image003.png"

# Generate the predicted mask
predicted_mask = predict_mask_for_image(test_image_path, model, patch_size=16)

# Save or display the result
cv2.imwrite("predicted_mask.png", predicted_mask * 255)
