### Installing & Importing all the necessary packages

Update system packages, install `libgl1`, and install the `openpyxl` library for handling Excel files

- **`albumentations`**: Library for image augmentation to enhance training data diversity.
- **`sweetviz`**: Generates high-density visualizations of pandas DataFrames for quick data analysis.
- **`grad-cam`**: Visualizes important image regions for CNN predictions using Grad-CAM.
- **`lime`**: Provides local explanations for machine learning model predictions.
- **`pandas_profiling`**: Creates detailed reports of pandas DataFrames for exploratory data analysis.
- **`shap`**: Explains model predictions by attributing feature contributions using Shapley values.
- **`Keras-Preprocessing`**: Offers utilities for preprocessing data, including image and text transformations.

In [None]:
from IPython.display import clear_output

!sudo apt-get update && apt-get install libgl1 -y
!pip install openpyxl -q
!pip install albumentations sweetviz grad-cam lime pandas_profiling shap Keras-Preprocessing -q

clear_output()

In [None]:
# Standard Libraries
import os
import math
import shutil
import warnings
from typing import Dict, Optional

# Data Handling
import pandas as pd
import numpy as np
import cv2
from PIL import Image

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.cm as cm
from matplotlib.colors import LinearSegmentedColormap

# Skimage
from skimage.segmentation import slic, mark_boundaries

# TensorFlow and Keras
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Flatten, Conv2D, MaxPooling2D, Dropout, GlobalAveragePooling2D, LeakyReLU
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.applications import ResNet50V2, ResNet101V2, InceptionV3, InceptionResNetV2, MobileNetV2, DenseNet169, NASNetMobile, EfficientNetB7, ConvNeXtBase
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, LearningRateScheduler

# Keras (Standalone)
import keras
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from keras.optimizers import Adam, SGD, Adagrad, Adadelta, RMSprop, Nadam
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint, History, ReduceLROnPlateau, CSVLogger, LearningRateScheduler

# Machine Learning
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (accuracy_score, precision_score, recall_score, classification_report,
                             confusion_matrix, roc_curve, roc_auc_score, matthews_corrcoef, hamming_loss, f1_score, precision_recall_curve)

# Lime and Shap
from lime import lime_image
import shap

# PyTorch Grad-CAM
from pytorch_grad_cam import GradCAM

# Sweetviz (for EDA)
import sweetviz

# IPython for display
from IPython.display import Image, display

# Magic commands (for Jupyter Notebooks)
%matplotlib inline
%load_ext tensorboard
%reload_ext tensorboard

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### Data Loading and Cleaning

Load class labels from an Excel file, binary test labels from a CSV, clean the class labels DataFrame by removing empty rows and columns, and display the cleaned data.

In [None]:
la1=pd.read_excel('/content/drive/MyDrive/PCOS_TL_ML/train/class_label.xlsx')
la1

In [None]:
la2 = pd.read_csv('/content/drive/MyDrive/PCOS_TL_ML/test/test_label_binary.csv')
la2

In [None]:
df = la1.dropna(how='all').dropna(how='all', axis=1)
print(df)

### Train-Validate Split

Split the DataFrame into training and validation sets based on specified percentages, using optional random seed for reproducibility.

In [None]:
def train_validate_split(df, train_percent=.8, validate_percent=.2, seed=None):
    np.random.seed(seed)
    perm = np.random.permutation(df.index)
    m = len(df.index)
    train_end = int(train_percent * m)
    validate_end = int(validate_percent * m) + train_end
    train = df.iloc[perm[:train_end]]
    validate = df.iloc[perm[train_end:validate_end]]
    # test = df.iloc[perm[validate_end:]]
    return train, validate

In [None]:
train, validate = train_validate_split(df)
train

In [None]:
validate

### Image and Label Preparation

Map labels, load and preprocess images for training, validation, and testing, and display shapes of image arrays and labels.

In [None]:
label_mapping = {1: "Healthy", 0: "Unhealthy"}
df['Label'] = df['Healthy'].map(label_mapping)

In [None]:
train_labels = train[["Healthy"]].values
train_image_paths = [os.path.join('/content/drive/MyDrive/PCOS_TL_ML/train/images', filename) for filename in train['imagePath']] #to change when using GPU. similarly for validate and test
train_images = []
for train_image_path in train_image_paths:
    image = load_img(train_image_path, target_size=(300, 300)) # change this when you run on GPU. 320X320 rahegi
    image = img_to_array(image) / 255.0  # Normalize pixel values between 0 and 1
    train_images.append(image)
train_images = np.array(train_images, dtype=np.float32)

In [None]:
train_images.shape

In [None]:
train_labels.shape

In [None]:
validate_labels = validate[["Healthy"]].values
validate_image_paths = [os.path.join('/content/drive/MyDrive/PCOS_TL_ML/train/images', filename) for filename in validate['imagePath']]
validate_images = []
for validate_image_path in validate_image_paths:
    image = load_img(validate_image_path, target_size=(300, 300)) # change this when you run on GPU
    image = img_to_array(image) / 255.0  # Normalize pixel values between 0 and 1
    validate_images.append(image)
validate_images = np.array(validate_images, dtype=np.float32)

In [None]:
validate_images.shape

In [None]:
validate_labels.shape

In [None]:
test_labels = la2[["Healthy"]].values
test_image_paths = [os.path.join('/content/drive/MyDrive/PCOS_TL_ML/test/images', filename) for filename in la2['imagePath']]
test_images = []
for test_image_path in test_image_paths:
    image = load_img(test_image_path, target_size=(300, 300)) # change this when you run on GPU
    image = img_to_array(image) / 255.0  # Normalize pixel values between 0 and 1
    test_images.append(image)
test_images = np.array(test_images, dtype=np.float32)

In [None]:
test_images.shape

In [None]:
test_labels.shape

### Model Evaluation

Load a pre-trained model, make predictions on validation and test sets, apply binary classification threshold, and generate classification reports.

1) **InceptionResNetV2**

In [None]:
# Load the model
model_path = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/InceptionResNetV2/InceptionResNetV2_model.h5'
model = load_model(model_path)

In [None]:
validate_labels_pred = model.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary = (validate_labels_pred > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary))

In [None]:
test_labels_pred = model.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary = (test_labels_pred > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary))


2) **InceptionV3**

In [None]:
# Load the model
model_path2 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/InceptionV3/InceptionV3_model.h5'
model2 = load_model(model_path2)

In [None]:
validate_labels_pred2 = model2.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary2 = (validate_labels_pred2 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary2))

In [None]:
test_labels_pred2 = model2.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary2 = (test_labels_pred2 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary2))

3) **MobileNetV2**

In [None]:
# Load the model
model_path3 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/MobileNetV2/MobileNetV2_model.h5'
model3 = load_model(model_path3)

In [None]:
validate_labels_pred3 = model3.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary3 = (validate_labels_pred3 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary3))

In [None]:
test_labels_pred3 = model3.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary3 = (test_labels_pred3 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary3))

4) **NasnetMobile**

In [None]:
# Load the model
model_path4 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/NasNetMoblie/NASNetMobile_model.h5'
model4 = load_model(model_path4)

In [None]:
validate_labels_pred4 = model4.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary4 = (validate_labels_pred4 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary4))

In [None]:
test_labels_pred4 = model4.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary4 = (test_labels_pred4 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary4))

5) **Resnet50V2**

In [None]:
# Load the model
model_path5 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/ResNet50v2/ResNet50V2_model.h5'
model5 = load_model(model_path5)

In [None]:
validate_labels_pred5 = model5.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary5 = (validate_labels_pred5 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary5))

In [None]:
test_labels_pred5 = model5.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary5 = (test_labels_pred5 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary5))

6) **VGG19**

In [None]:
# Load the model
model_path6 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/Vgg19/VGG19_model.h5'
model6 = load_model(model_path6)

In [None]:
validate_labels_pred6 = model6.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary6 = (validate_labels_pred6 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary6))

In [None]:
test_labels_pred6 = model6.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary6 = (test_labels_pred6 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary6))

7) **XcepTion**

In [None]:
# Load the model
model_path7 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/Xception/Xception_model.h5'
model7 = load_model(model_path7)

In [None]:
validate_labels_pred7 = model7.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary7 = (validate_labels_pred7 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary7))

In [None]:
test_labels_pred7 = model7.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary7 = (test_labels_pred7 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary7))

7) **XcepTion**

In [None]:
# Load the model
model_path7 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/Xception/Xception_model.h5'
model7 = load_model(model_path7)

In [None]:
validate_labels_pred7 = model7.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary7 = (validate_labels_pred7 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary7))

In [None]:
test_labels_pred7 = model7.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary7 = (test_labels_pred7 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary7))

8) **ConvNeXtBase**

In [None]:
# Load the model
model_path8 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/ConvNeXtBase/ConvNeXtBase_weights.h5'
base_model8=ConvNeXtBase(weights='imagenet', include_top=False,input_shape=(300,300,3))
x8 = base_model8.output
x8 = Flatten()(x8)
predictions8 = Dense(1, activation='sigmoid', use_bias=True, kernel_initializer="glorot_uniform", bias_initializer="zeros", kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None)(x8)
model8 = Model(inputs=base_model8.input, outputs=predictions8)
model8.load_weights(model_path8)

In [None]:
validate_labels_pred8 = model8.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary8 = (validate_labels_pred8 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary8))

In [None]:
test_labels_pred8 = model8.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary8 = (test_labels_pred8 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary8))

8) **ConvNeXtBase**

In [None]:
# Load the model
model_path8 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/ConvNeXtBase/ConvNeXtBase_weights.h5'
base_model8=ConvNeXtBase(weights='imagenet', include_top=False,input_shape=(300,300,3))
x8 = base_model8.output
x8 = Flatten()(x8)
predictions8 = Dense(1, activation='sigmoid', use_bias=True, kernel_initializer="glorot_uniform", bias_initializer="zeros", kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None)(x8)
model8 = Model(inputs=base_model8.input, outputs=predictions8)
model8.load_weights(model_path8)

In [None]:
validate_labels_pred8 = model8.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary8 = (validate_labels_pred8 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary8))

In [None]:
test_labels_pred8 = model8.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary8 = (test_labels_pred8 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary8))

9) **DenseNet169**

In [None]:
# Load the model
model_path9 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/DenseNet169/DenseNet169_weights.h5'
base_model9=ConvNeXtBase(weights='imagenet', include_top=False,input_shape=(300,300,3))
x9 = base_model9.output
x9 = Flatten()(x9)
predictions9 = Dense(1, activation='sigmoid', use_bias=True, kernel_initializer="glorot_uniform", bias_initializer="zeros", kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None)(x9)
model9 = Model(inputs=base_model9.input, outputs=predictions9)
model9.load_weights(model_path9)

In [None]:
validate_labels_pred9 = model9.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary9 = (validate_labels_pred9 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary9))

In [None]:
test_labels_pred9 = model9.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary9 = (test_labels_pred9 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary9))