<a href="https://colab.research.google.com/github/ehsanealam/hipkneework/blob/main/hipkneeacp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install pydicom


Collecting pydicom
  Downloading pydicom-2.4.3-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-2.4.3


In [None]:
pip install nibabel



In [None]:
import os
import pydicom
import nibabel as nib
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from skimage.transform import resize

# Set paths to dataset folders and files
hip_images_folder = "dataset/JHIR_Hip_Knee_Datasets/JHIR_Hip_Knee_Datasets/Hip/Images"
hip_annotations_folder = "dataset/JHIR_Hip_Knee_Datasets/JHIR_Hip_Knee_Datasets/Hip/Annotations"
hip_csv_file = "dataset/JHIR_Hip_Knee_Datasets/JHIR_Hip_Knee_Datasets/Hip/segmentation.csv"

# Load CSV metadata
metadata = pd.read_csv(hip_csv_file)

# Function to load DICOM images
def load_dicom_images(folder_path):
    image_data = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".dcm"):
            filepath = os.path.join(folder_path, filename)
            dicom_data = pydicom.dcmread(filepath)
            image_data.append(dicom_data.pixel_array)
    return np.array(image_data)

# Function to load NIfTI annotations and resize them to a common shape
def load_and_resize_nifti_annotations(file_path, target_shape):
    nifti_data = nib.load(file_path)
    annotations = nifti_data.get_fdata()
    annotations_resized = resize(annotations, target_shape, anti_aliasing=True)
    return annotations_resized

# Function to filter and load DICOM images based on metadata
def image_data_generator(folder_path, metadata, attribute_name, attribute_value, num_samples, target_shape, batch_size=32):
    while True:
        batch_images = []
        batch_annotations = []
        samples_loaded = 0
        for index, row in metadata.iterrows():
            if row[attribute_name] == attribute_value and samples_loaded < num_samples:
                image_id = row['id']
                filename = f"{image_id}.dcm"
                filepath = os.path.join(folder_path, filename)
                dicom_data = pydicom.dcmread(filepath)
                image = dicom_data.pixel_array
                annotation_filename = f"{image_id}.nii.gz"
                annotation_filepath = os.path.join(hip_annotations_folder, annotation_filename)
                annotation_data = load_and_resize_nifti_annotations(annotation_filepath, target_shape)
                batch_images.append(image)
                batch_annotations.append(annotation_data)
                samples_loaded += 1
        yield ([np.array(batch_images), np.array(batch_annotations)], np.array(batch_annotations))


num_samples = 200  # Number of samples to load for each subset
target_shape = (256, 256, 1)  # Target shape for images and annotations

male_gender = '1: Male'  # Specify the gender to filter
male_data_generator = image_data_generator(hip_images_folder, metadata, 'P02SEX', male_gender, num_samples, target_shape)

female_gender = '2: Female'  # Specify the gender to filter
female_data_generator = image_data_generator(hip_images_folder, metadata, 'P02SEX', female_gender, num_samples, target_shape)

# Split the dataset into train and test sets for male and female subsets
male_data = list(male_data_generator)
female_data = list(female_data_generator)

male_X, male_y = zip(*male_data)
female_X, female_y = zip(*female_data)

male_X_train, male_X_test, male_y_train, male_y_test = train_test_split(male_X, male_y, test_size=0.2, random_state=42)
female_X_train, female_X_test, female_y_train, female_y_test = train_test_split(female_X, female_y, test_size=0.2, random_state=42)

# Create an ImageDataGenerator with data augmentation options
datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,  # Normalize pixel values to [0, 1]
    rotation_range=20,  # Randomly rotate images by up to 20 degrees
    width_shift_range=0.1,  # Randomly shift image width by up to 10%
    height_shift_range=0.1,  # Randomly shift image height by up to 10%
    shear_range=0.2,  # Shear intensity
    zoom_range=0.2,  # Randomly zoom in on images
    horizontal_flip=True,  # Randomly flip images horizontally
    fill_mode='nearest'  # Fill missing pixels using the nearest neighbor strategy
)

# Define the batch size
batch_size = 32

# Create data generators for male and female subsets
male_data_generator = datagen.flow(np.array(male_X_train), np.array(male_y_train), batch_size=batch_size, shuffle=True)
female_data_generator = datagen.flow(np.array(female_X_train), np.array(female_y_train), batch_size=batch_size, shuffle=True)

# Define the U-Net model architecture
def unet_model(input_shape):
    inputs = Input(input_shape)
    # Encoder
    conv1 = Conv2D(64, 3, activation='relu', padding='same')(inputs)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
    conv2 = Conv2D(128, 3, activation='relu', padding='same')(pool1)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
    # Decoder
    up1 = UpSampling2D(size=(2, 2))(pool2)
    conv3 = Conv2D(64, 3, activation='relu', padding='same')(up1)
    conv4 = Conv2D(1, 1, activation='sigmoid')(conv3)
    return Model(inputs, conv4)

# Compile and train the male model
male_model = unet_model(input_shape=(256, 256, 1))
male_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model using the data generator
male_model.fit(male_data_generator, epochs=10, steps_per_epoch=len(male_X_train) // batch_size, validation_split=0.1)
# Compile and train the female model
female_model = unet_model(input_shape=(256, 256, 1))
female_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model using the data generator
female_model.fit(female_data_generator, epochs=10, steps_per_epoch=len(female_X_train) // batch_size, validation_split=0.1)

# Evaluate the male model on the test set
male_test_loss, male_test_accuracy = male_model.evaluate(np.array(male_X_test), np.array(male_y_test))
print(f'Male Test Loss: {male_test_loss}, Male Test Accuracy: {male_test_accuracy}')

# Evaluate the female model on the test set
female_test_loss, female_test_accuracy = female_model.evaluate(np.array(female_X_test), np.array(female_y_test))
print(f'Female Test Loss: {female_test_loss}, Female Test Accuracy: {female_test_accuracy}')

# Calculate IoU for male and female models on the test set
male_y_pred = male_model.predict(np.array(male_X_test))
female_y_pred = female_model.predict(np.array(female_X_test))

male_iou = calculate_iou(np.array(male_y_test), male_y_pred)
female_iou = calculate_iou(np.array(female_y_test), female_y_pred)

print(f'Male IoU: {male_iou}')
print(f'Female IoU: {female_iou}')



In [None]:
import os
import pydicom
import nibabel as nib
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from skimage.transform import resize
from tensorflow.keras.applications import ResNet50

# Set paths to dataset folders and files
hip_images_folder = "dataset/JHIR_Hip_Knee_Datasets/JHIR_Hip_Knee_Datasets/Hip/Images"
hip_annotations_folder = "dataset/JHIR_Hip_Knee_Datasets/JHIR_Hip_Knee_Datasets/Hip/Annotations"
hip_csv_file = "dataset/JHIR_Hip_Knee_Datasets/JHIR_Hip_Knee_Datasets/Hip/segmentation.csv"

# Load CSV metadata
metadata = pd.read_csv(hip_csv_file)

# Function to load DICOM images
def load_dicom_images(folder_path):
    image_data = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".dcm"):
            filepath = os.path.join(folder_path, filename)
            dicom_data = pydicom.dcmread(filepath)
            image_data.append(dicom_data.pixel_array)
    return np.array(image_data)

# Function to load NIfTI annotations and resize them to a common shape
def load_and_resize_nifti_annotations(file_path, target_shape):
    nifti_data = nib.load(file_path)
    annotations = nifti_data.get_fdata()
    annotations_resized = resize(annotations, target_shape, anti_aliasing=True)
    return annotations_resized

# Function to filter and load DICOM images based on metadata
def image_data_generator(folder_path, metadata, attribute_name, attribute_value, num_samples, target_shape, batch_size=32):
    while True:
        batch_images = []
        batch_annotations = []
        samples_loaded = 0
        for index, row in metadata.iterrows():
            if row[attribute_name] == attribute_value and samples_loaded < num_samples:
                image_id = row['id']
                filename = f"{image_id}.dcm"
                filepath = os.path.join(folder_path, filename)
                dicom_data = pydicom.dcmread(filepath)
                image = dicom_data.pixel_array
                annotation_filename = f"{image_id}.nii.gz"
                annotation_filepath = os.path.join(hip_annotations_folder, annotation_filename)
                annotation_data = load_and_resize_nifti_annotations(annotation_filepath, target_shape)
                batch_images.append(image)
                batch_annotations.append(annotation_data)
                samples_loaded += 1
        yield ([np.array(batch_images), np.array(batch_annotations)], np.array(batch_annotations))


num_samples = 200  # Number of samples to load for each subset
target_shape = (256, 256, 1)  # Target shape for images and annotations

male_gender = '1: Male'  # Specify the gender to filter
male_data_generator = image_data_generator(hip_images_folder, metadata, 'P02SEX', male_gender, num_samples, target_shape)

female_gender = '2: Female'  # Specify the gender to filter
female_data_generator = image_data_generator(hip_images_folder, metadata, 'P02SEX', female_gender, num_samples, target_shape)

# Split the dataset into train and test sets for male and female subsets
male_data = list(male_data_generator)
female_data = list(female_data_generator)

male_X, male_y = zip(*male_data)
female_X, female_y = zip(*female_data)

male_X_train, male_X_test, male_y_train, male_y_test = train_test_split(male_X, male_y, test_size=0.2, random_state=42)
female_X_train, female_X_test, female_y_train, female_y_test = train_test_split(female_X, female_y, test_size=0.2, random_state=42)

# Create an ImageDataGenerator with data augmentation options
datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,  # Normalize pixel values to [0, 1]
    rotation_range=20,  # Randomly rotate images by up to 20 degrees
    width_shift_range=0.1,  # Randomly shift image width by up to 10%
    height_shift_range=0.1,  # Randomly shift image height by up to 10%
    shear_range=0.2,  # Shear intensity
    zoom_range=0.2,  # Randomly zoom in on images
    horizontal_flip=True,  # Randomly flip images horizontally
    fill_mode='nearest'  # Fill missing pixels using the nearest neighbor strategy
)

# Define the batch size
batch_size = 32

# Create data generators for male and female subsets
male_data_generator = datagen.flow(np.array(male_X_train), np.array(male_y_train), batch_size=batch_size, shuffle=True)
female_data_generator = datagen.flow(np.array(female_X_train), np.array(female_y_train), batch_size=batch_size, shuffle=True)

# Define the U-Net model architecture with ResNet-50 backbone
def unet_resnet50_model(input_shape):
    # Encoder (ResNet-50)
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    for layer in base_model.layers:
        layer.trainable = False  # Freeze the weights of the ResNet-50 layers

    # Decoder
    inputs = Input(input_shape)
    skip_connections = list()

    x = base_model(inputs)
    skip_connections.append(x)

    for i in range(len(base_model.layers) - 1, -1, -1):
        if isinstance(base_model.layers[i], tf.keras.layers.MaxPooling2D):
            break
        x = base_model.layers[i](x)
    x = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(x)
    x = Concatenate(axis=3)([x, skip_connections[-1]])
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)

    outputs = Conv2D(1, (1, 1), activation='sigmoid')(x)

    return Model(inputs, outputs)

# Compile and train the male model with ResNet-50 backbone
male_model = unet_resnet50_model(input_shape=(256, 256, 3))
male_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model using the data generator
male_model.fit(male_data_generator, epochs=10, steps_per_epoch=len(male_X_train) // batch_size, validation_split=0.1)

# Compile and train the female model with ResNet-50 backbone
female_model = unet_resnet50_model(input_shape=(256, 256, 3))
female_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model using the data generator
female_model.fit(female_data_generator, epochs=10, steps_per_epoch=len(female_X_train) // batch_size, validation_split=0.1)

# Evaluate the male model on the test set
male_test_loss, male_test_accuracy = male_model.evaluate(np.array(male_X_test), np.array(male_y_test))
print(f'Male Test Loss: {male_test_loss}, Male Test Accuracy: {male_test_accuracy}')

# Evaluate the female model on the test set
female_test_loss, female_test_accuracy = female_model.evaluate(np.array(female_X_test), np.array(female_y_test))
print(f'Female Test Loss: {female_test_loss}, Female Test Accuracy: {female_test_accuracy}')

# Calculate IoU for male and female models on the test set
male_y_pred = male_model.predict(np.array(male_X_test))
female_y_pred = female_model.predict(np.array(female_X_test))

def calculate_iou(y_true, y_pred):
    intersection = np.logical_and(y_true, y_pred)
    union = np.logical_or(y_true, y_pred)
    iou = np.sum(intersection) / np.sum(union)
    return iou

male_iou = calculate_iou(np.array(male_y_test), male_y_pred)
female_iou = calculate_iou(np.array(female_y_test), female_y_pred)

print(f'Male IoU: {male_iou}')
print(f'Female IoU: {female_iou}')


In [None]:
import os
import pydicom
import nibabel as nib
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from skimage.transform import resize
from tensorflow.keras.applications import DenseNet121

# Set paths to dataset folders and files
hip_images_folder = "dataset/JHIR_Hip_Knee_Datasets/JHIR_Hip_Knee_Datasets/Hip/Images"
hip_annotations_folder = "dataset/JHIR_Hip_Knee_Datasets/JHIR_Hip_Knee_Datasets/Hip/Annotations"
hip_csv_file = "dataset/JHIR_Hip_Knee_Datasets/JHIR_Hip_Knee_Datasets/Hip/segmentation.csv"

# Load CSV metadata
metadata = pd.read_csv(hip_csv_file)

# Function to load DICOM images
def load_dicom_images(folder_path):
    image_data = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".dcm"):
            filepath = os.path.join(folder_path, filename)
            dicom_data = pydicom.dcmread(filepath)
            image_data.append(dicom_data.pixel_array)
    return np.array(image_data)

# Function to load NIfTI annotations and resize them to a common shape
def load_and_resize_nifti_annotations(file_path, target_shape):
    nifti_data = nib.load(file_path)
    annotations = nifti_data.get_fdata()
    annotations_resized = resize(annotations, target_shape, anti_aliasing=True)
    return annotations_resized

# Function to filter and load DICOM images based on metadata
def image_data_generator(folder_path, metadata, attribute_name, attribute_value, num_samples, target_shape, batch_size=32):
    while True:
        batch_images = []
        batch_annotations = []
        samples_loaded = 0
        for index, row in metadata.iterrows():
            if row[attribute_name] == attribute_value and samples_loaded < num_samples:
                image_id = row['id']
                filename = f"{image_id}.dcm"
                filepath = os.path.join(folder_path, filename)
                dicom_data = pydicom.dcmread(filepath)
                image = dicom_data.pixel_array
                annotation_filename = f"{image_id}.nii.gz"
                annotation_filepath = os.path.join(hip_annotations_folder, annotation_filename)
                annotation_data = load_and_resize_nifti_annotations(annotation_filepath, target_shape)
                batch_images.append(image)
                batch_annotations.append(annotation_data)
                samples_loaded += 1
        yield ([np.array(batch_images), np.array(batch_annotations)], np.array(batch_annotations))


num_samples = 200  # Number of samples to load for each subset
target_shape = (256, 256, 1)  # Target shape for images and annotations

male_gender = '1: Male'  # Specify the gender to filter
male_data_generator = image_data_generator(hip_images_folder, metadata, 'P02SEX', male_gender, num_samples, target_shape)

female_gender = '2: Female'  # Specify the gender to filter
female_data_generator = image_data_generator(hip_images_folder, metadata, 'P02SEX', female_gender, num_samples, target_shape)

# Split the dataset into train and test sets for male and female subsets
male_data = list(male_data_generator)
female_data = list(female_data_generator)

male_X, male_y = zip(*male_data)
female_X, female_y = zip(*female_data)

male_X_train, male_X_test, male_y_train, male_y_test = train_test_split(male_X, male_y, test_size=0.2, random_state=42)
female_X_train, female_X_test, female_y_train, female_y_test = train_test_split(female_X, female_y, test_size=0.2, random_state=42)

# Create an ImageDataGenerator with data augmentation options
datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,  # Normalize pixel values to [0, 1]
    rotation_range=20,  # Randomly rotate images by up to 20 degrees
    width_shift_range=0.1,  # Randomly shift image width by up to 10%
    height_shift_range=0.1,  # Randomly shift image height by up to 10%
    shear_range=0.2,  # Shear intensity
    zoom_range=0.2,  # Randomly zoom in on images
    horizontal_flip=True,  # Randomly flip images horizontally
    fill_mode='nearest'  # Fill missing pixels using the nearest neighbor strategy
)

# Define the batch size
batch_size = 32

# Create data generators for male and female subsets
male_data_generator = datagen.flow(np.array(male_X_train), np.array(male_y_train), batch_size=batch_size, shuffle=True)
female_data_generator = datagen.flow(np.array(female_X_train), np.array(female_y_train), batch_size=batch_size, shuffle=True)

# Define the U-Net model architecture with DenseNet-121 backbone
def unet_densenet121_model(input_shape):
    # Encoder (DenseNet-121)
    base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=input_shape)
    for layer in base_model.layers:
        layer.trainable = False  # Freeze the weights of the DenseNet-121 layers

    # Decoder
    inputs = Input(input_shape)
    skip_connections = list()

    x = base_model(inputs)
    skip_connections.append(x)

    for i in range(len(base_model.layers) - 1, -1, -1):
        if isinstance(base_model.layers[i], tf.keras.layers.MaxPooling2D):
            break
        x = base_model.layers[i](x)
    x = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(x)
    x = Concatenate(axis=3)([x, skip_connections[-1]])
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)

    outputs = Conv2D(1, (1, 1), activation='sigmoid')(x)

    return Model(inputs, outputs)

# Compile and train the male model with DenseNet-121 backbone
male_model = unet_densenet121_model(input_shape=(256, 256, 3))
male_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model using the data generator
male_model.fit(male_data_generator, epochs=10, steps_per_epoch=len(male_X_train) // batch_size, validation_split=0.1)

# Compile and train the female model with DenseNet-121 backbone
female_model = unet_densenet121_model(input_shape=(256, 256, 3))
female_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model using the data generator
female_model.fit(female_data_generator, epochs=10, steps_per_epoch=len(female_X_train) // batch_size, validation_split=0.1)

# Evaluate the male model on the test set
male_test_loss, male_test_accuracy = male_model.evaluate(np.array(male_X_test), np.array(male_y_test))
print(f'Male Test Loss: {male_test_loss}, Male Test Accuracy: {male_test_accuracy}')

# Evaluate the female model on the test set
female_test_loss, female_test_accuracy = female_model.evaluate(np.array(female_X_test), np.array(female_y_test))
print(f'Female Test Loss: {female_test_loss}, Female Test Accuracy: {female_test_accuracy}')

# Calculate IoU for male and female models on the test set
male_y_pred = male_model.predict(np.array(male_X_test))
female_y_pred = female_model.predict(np.array(female_X_test))

def calculate_iou(y_true, y_pred):
    intersection = np.logical_and(y_true, y_pred)
    union = np.logical_or(y_true, y_pred)
    iou = np.sum(intersection) / np.sum(union)
    return iou

male_iou = calculate_iou(np.array(male_y_test), male_y_pred)
female_iou = calculate_iou(np.array(female_y_test), female_y_pred)

print(f'Male IoU: {male_iou}')
print(f'Female IoU: {female_iou}')


In [None]:
import os
import pydicom
import nibabel as nib
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from skimage.transform import resize
from tensorflow.keras.applications import EfficientNetB0

# Set paths to dataset folders and files
hip_images_folder = "dataset/JHIR_Hip_Knee_Datasets/JHIR_Hip_Knee_Datasets/Hip/Images"
hip_annotations_folder = "dataset/JHIR_Hip_Knee_Datasets/JHIR_Hip_Knee_Datasets/Hip/Annotations"
hip_csv_file = "dataset/JHIR_Hip_Knee_Datasets/JHIR_Hip_Knee_Datasets/Hip/segmentation.csv"

# Load CSV metadata
metadata = pd.read_csv(hip_csv_file)

# Function to load DICOM images
def load_dicom_images(folder_path):
    image_data = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".dcm"):
            filepath = os.path.join(folder_path, filename)
            dicom_data = pydicom.dcmread(filepath)
            image_data.append(dicom_data.pixel_array)
    return np.array(image_data)

# Function to load NIfTI annotations and resize them to a common shape
def load_and_resize_nifti_annotations(file_path, target_shape):
    nifti_data = nib.load(file_path)
    annotations = nifti_data.get_fdata()
    annotations_resized = resize(annotations, target_shape, anti_aliasing=True)
    return annotations_resized

# Function to filter and load DICOM images based on metadata
def image_data_generator(folder_path, metadata, attribute_name, attribute_value, num_samples, target_shape, batch_size=32):
    while True:
        batch_images = []
        batch_annotations = []
        samples_loaded = 0
        for index, row in metadata.iterrows():
            if row[attribute_name] == attribute_value and samples_loaded < num_samples:
                image_id = row['id']
                filename = f"{image_id}.dcm"
                filepath = os.path.join(folder_path, filename)
                dicom_data = pydicom.dcmread(filepath)
                image = dicom_data.pixel_array
                annotation_filename = f"{image_id}.nii.gz"
                annotation_filepath = os.path.join(hip_annotations_folder, annotation_filename)
                annotation_data = load_and_resize_nifti_annotations(annotation_filepath, target_shape)
                batch_images.append(image)
                batch_annotations.append(annotation_data)
                samples_loaded += 1
        yield ([np.array(batch_images), np.array(batch_annotations)], np.array(batch_annotations))

# Load images and annotations for a specific subset of data (e.g., based on gender)
num_samples = 200  # Number of samples to load for each subset
target_shape = (256, 256, 1)  # Target shape for images and annotations

male_gender = '1: Male'  # Specify the gender to filter
male_data_generator = image_data_generator(hip_images_folder, metadata, 'P02SEX', male_gender, num_samples, target_shape)

female_gender = '2: Female'  # Specify the gender to filter
female_data_generator = image_data_generator(hip_images_folder, metadata, 'P02SEX', female_gender, num_samples, target_shape)

# Split the dataset into train and test sets for male and female subsets
male_data = list(male_data_generator)
female_data = list(female_data_generator)

male_X, male_y = zip(*male_data)
female_X, female_y = zip(*female_data)

male_X_train, male_X_test, male_y_train, male_y_test = train_test_split(male_X, male_y, test_size=0.2, random_state=42)
female_X_train, female_X_test, female_y_train, female_y_test = train_test_split(female_X, female_y, test_size=0.2, random_state=42)

# Create an ImageDataGenerator with data augmentation options
datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,  # Normalize pixel values to [0, 1]
    rotation_range=20,  # Randomly rotate images by up to 20 degrees
    width_shift_range=0.1,  # Randomly shift image width by up to 10%
    height_shift_range=0.1,  # Randomly shift image height by up to 10%
    shear_range=0.2,  # Shear intensity
    zoom_range=0.2,  # Randomly zoom in on images
    horizontal_flip=True,  # Randomly flip images horizontally
    fill_mode='nearest'  # Fill missing pixels using the nearest neighbor strategy
)

# Define the batch size
batch_size = 32

# Create data generators for male and female subsets
male_data_generator = datagen.flow(np.array(male_X_train), np.array(male_y_train), batch_size=batch_size, shuffle=True)
female_data_generator = datagen.flow(np.array(female_X_train), np.array(female_y_train), batch_size=batch_size, shuffle=True)

# Define the U-Net model architecture with EfficientNet-B0 backbone
def unet_efficientnetb0_model(input_shape):
    # Encoder (EfficientNet-B0)
    base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=input_shape)
    for layer in base_model.layers:
        layer.trainable = False  # Freeze the weights of the EfficientNet-B0 layers

    # Decoder
    inputs = Input(input_shape)
    skip_connections = list()

    x = base_model(inputs)
    skip_connections.append(x)

    for i in range(len(base_model.layers) - 1, -1, -1):
        if isinstance(base_model.layers[i], tf.keras.layers.MaxPooling2D):
            break
        x = base_model.layers[i](x)
    x = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(x)
    x = Concatenate(axis=3)([x, skip_connections[-1]])
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)

    outputs = Conv2D(1, (1, 1), activation='sigmoid')(x)

    return Model(inputs, outputs)

# Compile and train the male model with EfficientNet-B0 backbone
male_model = unet_efficientnetb0_model(input_shape=(256, 256, 3))
male_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model using the data generator
male_model.fit(male_data_generator, epochs=10, steps_per_epoch=len(male_X_train) // batch_size, validation_split=0.1)

# Compile and train the female model with EfficientNet-B0 backbone
female_model = unet_efficientnetb0_model(input_shape=(256, 256, 3))
female_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fit the model using the data generator
female_model.fit(female_data_generator, epochs=10, steps_per_epoch=len(female_X_train) // batch_size, validation_split=0.1)

# Evaluate the male model on the test set
male_test_loss, male_test_accuracy = male_model.evaluate(np.array(male_X_test), np.array(male_y_test))
print(f'Male Test Loss: {male_test_loss}, Male Test Accuracy: {male_test_accuracy}')

# Evaluate the female model on the test set
female_test_loss, female_test_accuracy = female_model.evaluate(np.array(female_X_test), np.array(female_y_test))
print(f'Female Test Loss: {female_test_loss}, Female Test Accuracy: {female_test_accuracy}')

# Calculate IoU for male and female models on the test set
male_y_pred = male_model.predict(np.array(male_X_test))
female_y_pred = female_model.predict(np.array(female_X_test))

def calculate_iou(y_true, y_pred):
    intersection = np.logical_and(y_true, y_pred)
    union = np.logical_or(y_true, y_pred)
    iou = np.sum(intersection) / np.sum(union)
    return iou

male_iou = calculate_iou(np.array(male_y_test), male_y_pred)
female_iou = calculate_iou(np.array(female_y_test), female_y_pred)

print(f'Male IoU: {male_iou}')
print(f'Female IoU: {female_iou}')
