### Installing & Importing all the necessary packages

- **`albumentations`**: Library for image augmentation to enhance training data diversity.
- **`sweetviz`**: Generates high-density visualizations of pandas DataFrames for quick data analysis.
- **`grad-cam`**: Visualizes important image regions for CNN predictions using Grad-CAM.
- **`lime`**: Provides local explanations for machine learning model predictions.
- **`pandas_profiling`**: Creates detailed reports of pandas DataFrames for exploratory data analysis.
- **`shap`**: Explains model predictions by attributing feature contributions using Shapley values.
- **`Keras-Preprocessing`**: Offers utilities for preprocessing data, including image and text transformations.

In [None]:
!pip install albumentations sweetviz grad-cam lime pandas_profiling shap Keras-Preprocessing -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.1/15.1 MB[0m [31m41.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m16.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for grad-cam (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for lime (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m324.4/324.4 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m357.9/357.9 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━

In [None]:
# Standard Libraries
import os
import math
import shutil
import warnings
from typing import Dict, Optional

# Data Handling
import pandas as pd
import numpy as np
import cv2
from PIL import Image

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.cm as cm
from matplotlib.colors import LinearSegmentedColormap

# Skimage
from skimage.segmentation import slic, mark_boundaries

# TensorFlow and Keras
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Flatten, Conv2D, MaxPooling2D, Dropout, GlobalAveragePooling2D, LeakyReLU
from tensorflow.keras.optimizers import Adam, Adamax
from tensorflow.keras.applications import ResNet50V2, ResNet101V2, InceptionV3, InceptionResNetV2, MobileNetV2, DenseNet169, NASNetMobile, EfficientNetB7, ConvNeXtBase
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger, LearningRateScheduler

# Keras (Standalone)
import keras
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from keras.optimizers import Adam, SGD, Adagrad, Adadelta, RMSprop, Nadam
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ModelCheckpoint, History, ReduceLROnPlateau, CSVLogger, LearningRateScheduler

# Machine Learning
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (accuracy_score, precision_score, recall_score, classification_report,
                             confusion_matrix, roc_curve, roc_auc_score, matthews_corrcoef, hamming_loss, f1_score, precision_recall_curve)

# Lime and Shap
from lime import lime_image
import shap

# PyTorch Grad-CAM
from pytorch_grad_cam import GradCAM

# Sweetviz (for EDA)
import sweetviz

# IPython for display
from IPython.display import Image, display

# Magic commands (for Jupyter Notebooks)
%matplotlib inline
%load_ext tensorboard
%reload_ext tensorboard

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Data Loading and Cleaning

Load class labels from an Excel file, binary test labels from a CSV, clean the class labels DataFrame by removing empty rows and columns, and display the cleaned data.

In [None]:
la1=pd.read_excel('/content/drive/MyDrive/PCOS_TL_ML/train/class_label.xlsx')
la1

Unnamed: 0,imagePath,Healthy
0,100image13.jpg,1
1,100image2.jpg,1
2,100image65.jpg,0
3,100image71.jpg,0
4,100image83.jpg,0
...,...,...
3195,pco_4.jpg,1
3196,pco_5.jpg,0
3197,pco_6.jpg,0
3198,pco_7.jpg,0


In [None]:
la2 = pd.read_csv('/content/drive/MyDrive/PCOS_TL_ML/test/test_label_binary.csv')
la2

Unnamed: 0,imagePath,Healthy
0,image10000.jpg,1
1,image10001.jpg,0
2,image10002.jpg,1
3,image10003.jpg,0
4,image10004.jpg,0
...,...,...
1463,image11463.jpg,0
1464,image11464.jpg,0
1465,image11465.jpg,0
1466,image11466.jpg,1


In [None]:
df = la1.dropna(how='all').dropna(how='all', axis=1)
print(df)

           imagePath  Healthy
0     100image13.jpg        1
1      100image2.jpg        1
2     100image65.jpg        0
3     100image71.jpg        0
4     100image83.jpg        0
...              ...      ...
3195       pco_4.jpg        1
3196       pco_5.jpg        0
3197       pco_6.jpg        0
3198       pco_7.jpg        0
3199       pco_8.jpg        0

[3200 rows x 2 columns]


### Train-Validate Split

Split the DataFrame into training and validation sets based on specified percentages, using optional random seed for reproducibility.

In [None]:
def train_validate_split(df, train_percent=.8, validate_percent=.2, seed=None):
    np.random.seed(seed)
    perm = np.random.permutation(df.index)
    m = len(df.index)
    train_end = int(train_percent * m)
    validate_end = int(validate_percent * m) + train_end
    train = df.iloc[perm[:train_end]]
    validate = df.iloc[perm[train_end:validate_end]]
    # test = df.iloc[perm[validate_end:]]
    return train, validate

In [None]:
train, validate = train_validate_split(df)
train

Unnamed: 0,imagePath,Healthy
2314,image2879.jpg,0
2037,image2508.jpg,0
1423,image1026.jpg,0
2758,image3394.jpg,1
1818,image1912.jpg,1
...,...,...
2617,image3230.jpg,0
3018,image3685.jpg,0
557,97image54.jpg,0
857,image0410.jpg,0


In [None]:
validate

Unnamed: 0,imagePath,Healthy
1450,image1056.jpg,0
3183,normal_6.jpg,1
346,45image0051.jpg,0
2163,image2681.jpg,1
1547,image1161.jpg,0
...,...,...
1372,image0967.jpg,0
1319,image0908.jpg,0
1749,image1801.jpg,0
964,image0526.jpg,0


### Image and Label Preparation

Map labels, load and preprocess images for training, validation, and testing, and display shapes of image arrays and labels.

In [None]:
label_mapping = {1: "Healthy", 0: "Unhealthy"}
df['Label'] = df['Healthy'].map(label_mapping)

In [None]:
train_labels = train[["Healthy"]].values
train_image_paths = [os.path.join('/content/drive/MyDrive/PCOS_TL_ML/train/images', filename) for filename in train['imagePath']] #to change when using GPU. similarly for validate and test
train_images = []
for train_image_path in train_image_paths:
    image = load_img(train_image_path, target_size=(300, 300)) # change this when you run on GPU. 320X320 rahegi
    image = img_to_array(image) / 255.0  # Normalize pixel values between 0 and 1
    train_images.append(image)
train_images = np.array(train_images, dtype=np.float32)

In [None]:
train_images.shape

(2560, 300, 300, 3)

In [None]:
train_labels.shape

(2560, 1)

In [None]:
validate_labels = validate[["Healthy"]].values
validate_image_paths = [os.path.join('/content/drive/MyDrive/PCOS_TL_ML/train/images', filename) for filename in validate['imagePath']]
validate_images = []
for validate_image_path in validate_image_paths:
    image = load_img(validate_image_path, target_size=(300, 300)) # change this when you run on GPU
    image = img_to_array(image) / 255.0  # Normalize pixel values between 0 and 1
    validate_images.append(image)
validate_images = np.array(validate_images, dtype=np.float32)

In [None]:
validate_images.shape

(640, 300, 300, 3)

In [None]:
validate_labels.shape

(640, 1)

In [None]:
test_labels = la2[["Healthy"]].values
test_image_paths = [os.path.join('/content/drive/MyDrive/PCOS_TL_ML/test/images', filename) for filename in la2['imagePath']]
test_images = []
for test_image_path in test_image_paths:
    image = load_img(test_image_path, target_size=(300, 300)) # change this when you run on GPU
    image = img_to_array(image) / 255.0  # Normalize pixel values between 0 and 1
    test_images.append(image)
test_images = np.array(test_images, dtype=np.float32)

In [None]:
test_images.shape

(1468, 300, 300, 3)

In [None]:
test_labels.shape

(1468, 1)

### Model Evaluation

Load a pre-trained model, make predictions on validation and test sets, apply binary classification threshold, and generate classification reports.

1) **InceptionResNetV2**

In [None]:
# Load the model
model_path = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/InceptionResNetV2/InceptionResNetV2_model.h5'
model = load_model(model_path)

In [None]:
validate_labels_pred = model.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary = (validate_labels_pred > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary))

              precision    recall  f1-score   support

           0       0.97      0.89      0.93       465
           1       0.76      0.92      0.83       175

    accuracy                           0.90       640
   macro avg       0.87      0.91      0.88       640
weighted avg       0.91      0.90      0.90       640



In [None]:
test_labels_pred = model.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary = (test_labels_pred > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary))


              precision    recall  f1-score   support

           0       0.90      0.92      0.91      1051
           1       0.80      0.75      0.77       417

    accuracy                           0.87      1468
   macro avg       0.85      0.84      0.84      1468
weighted avg       0.87      0.87      0.87      1468



2) **InceptionV3**

In [None]:
# Load the model
model_path2 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/InceptionV3/InceptionV3_model.h5'
model2 = load_model(model_path2)

In [None]:
validate_labels_pred2 = model2.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary2 = (validate_labels_pred2 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary2))

              precision    recall  f1-score   support

           0       0.96      0.94      0.95       464
           1       0.85      0.90      0.87       176

    accuracy                           0.93       640
   macro avg       0.90      0.92      0.91       640
weighted avg       0.93      0.93      0.93       640



In [None]:
test_labels_pred2 = model2.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary2 = (test_labels_pred2 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary2))

              precision    recall  f1-score   support

           0       0.89      0.94      0.92      1051
           1       0.83      0.71      0.76       417

    accuracy                           0.88      1468
   macro avg       0.86      0.83      0.84      1468
weighted avg       0.87      0.88      0.87      1468



3) **MobileNetV2**

In [None]:
# Load the model
model_path3 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/MobileNetV2/MobileNetV2_model.h5'
model3 = load_model(model_path3)

In [None]:
validate_labels_pred3 = model3.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary3 = (validate_labels_pred3 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary3))

              precision    recall  f1-score   support

           0       0.97      0.94      0.95       469
           1       0.85      0.92      0.88       171

    accuracy                           0.93       640
   macro avg       0.91      0.93      0.92       640
weighted avg       0.94      0.93      0.94       640



In [None]:
test_labels_pred3 = model3.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary3 = (test_labels_pred3 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary3))

              precision    recall  f1-score   support

           0       0.84      0.95      0.89      1051
           1       0.82      0.56      0.66       417

    accuracy                           0.84      1468
   macro avg       0.83      0.76      0.78      1468
weighted avg       0.84      0.84      0.83      1468



4) **NasnetMobile**

In [None]:
# Load the model
model_path4 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/NasNetMoblie/NASNetMobile_model.h5'
model4 = load_model(model_path4)

In [None]:
validate_labels_pred4 = model4.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary4 = (validate_labels_pred4 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary4))

              precision    recall  f1-score   support

           0       0.97      0.87      0.92       458
           1       0.74      0.93      0.83       182

    accuracy                           0.89       640
   macro avg       0.86      0.90      0.87       640
weighted avg       0.90      0.89      0.89       640



In [None]:
test_labels_pred4 = model4.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary4 = (test_labels_pred4 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary4))

              precision    recall  f1-score   support

           0       0.92      0.92      0.92      1051
           1       0.80      0.78      0.79       417

    accuracy                           0.88      1468
   macro avg       0.86      0.85      0.86      1468
weighted avg       0.88      0.88      0.88      1468



5) **Resnet50V2**

In [None]:
# Load the model
model_path5 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/ResNet50v2/ResNet50V2_model.h5'
model5 = load_model(model_path5)

In [None]:
validate_labels_pred5 = model5.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary5 = (validate_labels_pred5 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary5))

              precision    recall  f1-score   support

           0       0.92      0.98      0.95       467
           1       0.93      0.78      0.85       173

    accuracy                           0.93       640
   macro avg       0.93      0.88      0.90       640
weighted avg       0.93      0.93      0.92       640



In [None]:
test_labels_pred5 = model5.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary5 = (test_labels_pred5 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary5))

              precision    recall  f1-score   support

           0       0.86      0.97      0.91      1051
           1       0.88      0.60      0.72       417

    accuracy                           0.87      1468
   macro avg       0.87      0.79      0.81      1468
weighted avg       0.87      0.87      0.86      1468



6) **VGG19**

In [None]:
# Load the model
model_path6 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/Vgg19/VGG19_model.h5'
model6 = load_model(model_path6)

In [None]:
validate_labels_pred6 = model6.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary6 = (validate_labels_pred6 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary6))

              precision    recall  f1-score   support

           0       0.93      0.97      0.95       460
           1       0.92      0.81      0.86       180

    accuracy                           0.93       640
   macro avg       0.92      0.89      0.91       640
weighted avg       0.93      0.93      0.93       640



In [None]:
test_labels_pred6 = model6.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary6 = (test_labels_pred6 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary6))

              precision    recall  f1-score   support

           0       0.86      0.95      0.90      1051
           1       0.83      0.60      0.70       417

    accuracy                           0.85      1468
   macro avg       0.84      0.78      0.80      1468
weighted avg       0.85      0.85      0.84      1468



7) **XcepTion**

In [None]:
# Load the model
model_path7 = '/content/drive/MyDrive/PCOS_TL_ML/BinaryLabel/Transfer Learning/Xception/Xception_model.h5'
model7 = load_model(model_path7)

In [None]:
validate_labels_pred7 = model7.predict(validate_images)

# Apply threshold for binary classification
threshold = 0.5
validate_labels_pred_binary7 = (validate_labels_pred7 > threshold).astype(int)

# Extract binary labels
validate_labels = validate[["Healthy"]].values

# Generate classification report
print(classification_report(validate_labels, validate_labels_pred_binary7))

              precision    recall  f1-score   support

           0       0.95      0.98      0.96       462
           1       0.94      0.87      0.90       178

    accuracy                           0.95       640
   macro avg       0.95      0.92      0.93       640
weighted avg       0.95      0.95      0.95       640



In [None]:
test_labels_pred7 = model7.predict(test_images)

# Apply threshold for binary classification
threshold = 0.5
test_labels_pred_binary7 = (test_labels_pred7 > threshold).astype(int)

# Extract binary labels
test_labels = la2[["Healthy"]].values

# Generate classification report
print(classification_report(test_labels, test_labels_pred_binary7))

              precision    recall  f1-score   support

           0       0.87      0.96      0.91      1051
           1       0.87      0.62      0.73       417

    accuracy                           0.87      1468
   macro avg       0.87      0.79      0.82      1468
weighted avg       0.87      0.87      0.86      1468

