<a href="https://colab.research.google.com/github/fjadidi2001/AD_Prediction/blob/main/Hippo_Seg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries


In [1]:
# Purpose: Import necessary libraries for data processing, model building, and visualization.
import os
import random
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from skimage.io import imread, imshow
from skimage.transform import resize
from tqdm import tqdm
from sklearn.utils import shuffle
from tensorflow.keras import backend as K
import seaborn as sns
from sklearn.metrics import confusion_matrix
import shutil
from google.colab import drive
import pandas as pd

# Set random seed for reproducibility
seed = 2019
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

# Google Drive Setup


In [2]:
# Purpose: Mount Google Drive, set paths for dataset and models, and copy dataset if not already present.
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Define Drive paths
drive_dataset_path = '/content/drive/MyDrive/Hippocampus_Dataset'
drive_model_path = '/content/drive/MyDrive/Hippocampus_Models'
local_dataset_path = '/content/hippocampus_dataset'

In [5]:
# Create directories if they don't exist
os.makedirs(drive_dataset_path, exist_ok=True)
os.makedirs(drive_model_path, exist_ok=True)

In [7]:
# Check if dataset exists in Drive
if not os.path.exists(os.path.join(drive_dataset_path, 'aug')):
    print("Copying dataset to Google Drive...")
    shutil.copy('/content/hippocampus segmentation dataset.zip', drive_dataset_path)
    !unzip -q "{os.path.join(drive_dataset_path, 'hippocampus segmentation dataset.zip')}" -d "{local_dataset_path}"
    shutil.copytree(local_dataset_path, os.path.join(drive_dataset_path, 'hippocampus_dataset'))
else:
    print("Loading dataset from Google Drive...")
    shutil.copytree(os.path.join(drive_dataset_path, 'hippocampus_dataset'), local_dataset_path)

# Debug: Check dataset contents
print("Dataset contents:")
!ls {local_dataset_path}
print("Images folder:")
!ls {os.path.join(local_dataset_path, 'aug/images')}
print("Left masks folder:")
!ls {os.path.join(local_dataset_path, 'aug/masks/left')}
print("Right masks folder:")
!ls {os.path.join(local_dataset_path, 'aug/masks/right')}

Copying dataset to Google Drive...
Dataset contents:
aug
Images folder:
img_aug100.jpg	img_aug191.jpg	img_aug281.jpg	img_aug371.jpg	img_aug461.jpg
img_aug101.jpg	img_aug192.jpg	img_aug282.jpg	img_aug372.jpg	img_aug462.jpg
img_aug102.jpg	img_aug193.jpg	img_aug283.jpg	img_aug373.jpg	img_aug463.jpg
img_aug103.jpg	img_aug194.jpg	img_aug284.jpg	img_aug374.jpg	img_aug464.jpg
img_aug104.jpg	img_aug195.jpg	img_aug285.jpg	img_aug375.jpg	img_aug465.jpg
img_aug105.jpg	img_aug196.jpg	img_aug286.jpg	img_aug376.jpg	img_aug466.jpg
img_aug106.jpg	img_aug197.jpg	img_aug287.jpg	img_aug377.jpg	img_aug467.jpg
img_aug107.jpg	img_aug198.jpg	img_aug288.jpg	img_aug378.jpg	img_aug468.jpg
img_aug108.jpg	img_aug199.jpg	img_aug289.jpg	img_aug379.jpg	img_aug469.jpg
img_aug109.jpg	img_aug19.jpg	img_aug28.jpg	img_aug37.jpg	img_aug46.jpg
img_aug10.jpg	img_aug1.jpg	img_aug290.jpg	img_aug380.jpg	img_aug470.jpg
img_aug110.jpg	img_aug200.jpg	img_aug291.jpg	img_aug381.jpg	img_aug471.jpg
img_aug111.jpg	img_aug201.jpg	img_a

# Data Loading and Preprocessing


In [None]:
# Purpose: Load images and masks, match files, preprocess data, and prepare X_train, Y_train.
# Constants
IMG_WIDTH = 128
IMG_HEIGHT = 128
IMG_CHANNELS = 3

# Define dataset paths
data = os.path.join(local_dataset_path, 'aug/images')
data_left = os.path.join(local_dataset_path, 'aug/masks/left')
data_right = os.path.join(local_dataset_path, 'aug/masks/right')

# Load file paths
train_data = [os.path.join(dirName, f) for dirName, _, fileList in sorted(os.walk(data)) for f in fileList if '.jpg' in f.lower()]
mask_left = [os.path.join(dirName, f) for dirName, _, fileList in sorted(os.walk(data_left)) for f in fileList if '.jpg' in f.lower()]
mask_right = [os.path.join(dirName, f) for dirName, _, fileList in sorted(os.walk(data_right)) for f in fileList if '.jpg' in f.lower()]
print(f"Found {len(train_data)} image files, {len(mask_left)} left mask files, {len(mask_right)} right mask files.")

# Match files
def extract_id(filename, prefix):
    base = os.path.splitext(os.path.basename(filename))[0]
    return base.replace(prefix, '')

image_ids = [extract_id(f, 'img_') for f in train_data]
left_mask_ids = [extract_id(f, 'maskleft_') for f in mask_left]
right_mask_ids = [extract_id(f, 'maskright_') for f in mask_right]
print("Image IDs:", image_ids)
print("Left mask IDs:", left_mask_ids)
print("Right mask IDs:", right_mask_ids)

common_ids = sorted(list(set(image_ids) & set(left_mask_ids) & set(right_mask_ids)))
print(f"Found {len(common_ids)} complete image-mask pairs.")

train_data = [f for f in train_data if extract_id(f, 'img_') in common_ids]
mask_left = [f for f in mask_left if extract_id(f, 'maskleft_') in common_ids]
mask_right = [f for f in mask_right if extract_id(f, 'maskright_') in common_ids]

train_data.sort(key=lambda x: extract_id(x, 'img_'))
mask_left.sort(key=lambda x: extract_id(x, 'maskleft_'))
mask_right.sort(key=lambda x: extract_id(x, 'maskright_'))

# Verify alignment
for img, ml, mr in zip(train_data, mask_left, mask_right):
    img_id = extract_id(img, 'img_')
    ml_id = extract_id(ml, 'maskleft_')
    mr_id = extract_id(mr, 'maskright_')
    if img_id != ml_id or img_id != mr_id:
        print(f"Warning: Mismatch - Image: {img_id}, Left: {ml_id}, Right: {mr_id}")

if not train_data or not mask_left or not mask_right:
    raise ValueError("No images or masks found after matching.")

# Initialize arrays
X_train = np.zeros((len(train_data), IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.float32)
Y_train = np.zeros((len(train_data), IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.float32)

# Load and preprocess
for i in tqdm(range(len(train_data)), desc="Loading images"):
    img = imread(train_data[i])
    img = resize(img, (IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), mode='constant', preserve_range=True)
    img = img / 255.0
    X_train[i] = img

for i in tqdm(range(len(train_data)), desc="Loading masks"):
    maskl = imread(mask_left[i], as_gray=True)
    maskr = imread(mask_right[i], as_gray=True)
    mask = np.maximum(maskl, maskr)
    mask = resize(mask, (IMG_HEIGHT, IMG_WIDTH, 1), mode='constant', preserve_range=True)
    mask = (mask > 0.5).astype(np.float32)
    Y_train[i] = mask

print(f"X_train shape: {X_train.shape}, Y_train shape: {Y_train.shape}")

# Visualize sample
if X_train.shape[0] > 0:
    id = random.randint(0, X_train.shape[0] - 1)
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    imshow(X_train[id])
    plt.title(f"Sample Image (ID: {extract_id(train_data[id], 'img_')})")
    plt.axis('off')
    plt.subplot(1, 2, 2)
    imshow(Y_train[id][:, :, 0], cmap='gray')
    plt.title("Sample Mask")
    plt.axis('off')
    plt.tight_layout()
    plt.show()