<a href="https://colab.research.google.com/github/hureramujeeb60/Final-year-project-code/blob/main/fyp_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, roc_curve, auc
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
import joblib

# Define paths
train_dir = '/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/train'
test_dir = '/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/test'

# Data augmentation and loading
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    shuffle=False  # Important for confusion matrix
)

# Build the DenseNet121 model
base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze the base model

model = models.Sequential([
    base_model,
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(train_generator, epochs=20, validation_data=test_generator)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_generator)
print(f'Test Accuracy: {test_accuracy:.4f}')

# Get predictions
y_pred_probs = model.predict(test_generator)  # Get probabilities
y_pred_binary = (y_pred_probs > 0.5).astype(int)  # Convert to binary predictions

# Confusion matrix
cm = confusion_matrix(test_generator.classes, y_pred_binary)

# Plotting confusion matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No Melanoma', 'Melanoma'], yticklabels=['No Melanoma', 'Melanoma'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# ROC curve and AUC
fpr, tpr, _ = roc_curve(test_generator.classes, y_pred_probs)
roc_auc = auc(fpr, tpr)

# Save TPR and FPR to CSV
pd.DataFrame({'FPR': fpr, 'TPR': tpr}).to_csv('densenet121-20-s4.csv', index=False)

print(f'AUC: {roc_auc:.4f}')

# Save the model in .h5 format
model.save('densenet121-20-p4.h5')

# Save the model in .joblib format
joblib.dump(model, 'densenet121-20-p4.joblib')

# Plot training & validation accuracy values
plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
cm = confusion_matrix(test_generator.classes, y_pred_binary)

# Plotting confusion matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Benign', 'Malignant'], yticklabels=['Benign', 'Malignant'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
y_pred=model.predict(test_generator)
y_pred=np.argmax(y_pred,axis=1)

In [None]:
from sklearn.metrics import classification_report

# Convert probabilities to binary predictions
y_pred_binary = (y_pred_probs > 0.5).astype(int).flatten()  # Ensure it's a flat array

# Generate the classification report
print("\nClassification Report:")
print(classification_report(test_generator.classes, y_pred_binary, target_names=['No Melanoma', 'Melanoma']))

In [None]:
model.save("denseNet.keras")

In [None]:
# =========================

In [None]:
import os
import shutil
import itertools
import pathlib


import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_style('whitegrid')
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix , classification_report

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D , MaxPooling2D , Flatten , Activation , Dense , Dropout , BatchNormalization
from tensorflow.keras.applications import VGG19
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam , Adamax
from tensorflow.keras import regularizers


In [None]:
test_dir = '/kaggle/input/melanoma-skin-cancer-dataset-of-10000-images/melanoma_cancer_dataset/test'
filepaths = []
labels = []

files = os.listdir(test_dir)
for file in files:
    foldpath = os.path.join(test_dir,file)
    filelist = os.listdir(foldpath)
    for f in filelist:
        fpath = os.path.join(foldpath,f)

        filepaths.append(fpath)
        labels.append(file)
Fseries = pd.Series(filepaths, name= 'filepaths')
Lseries = pd.Series(labels, name='labels')
test_df = pd.concat([Fseries, Lseries], axis= 1)

In [None]:
batch_size = 16
img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)

tr_gen = ImageDataGenerator()
ts_gen = ImageDataGenerator()

train_gen = tr_gen.flow_from_dataframe( dataframe=train_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                    color_mode= 'rgb', shuffle= True, batch_size= batch_size)

test_gen = ts_gen.flow_from_dataframe( dataframe=test_df, x_col= 'filepaths', y_col= 'labels', target_size= img_size, class_mode= 'categorical',
                                    color_mode= 'rgb', shuffle= False, batch_size= batch_size)

In [None]:
from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

img_size = (224, 224)
channels = 3
img_shape = (img_size[0], img_size[1], channels)
class_count = 2

base_model = VGG19(weights='imagenet', include_top=False, input_shape=img_shape)
base_model.trainable=False
model2 = Sequential()
model2.add(base_model)
model2.add(Flatten())
model2.add(Dense(512, activation='relu'))
model2.add(Dropout(0.3))
model2.add(Dense(256, activation='relu'))
model2.add(Dropout(0.3))
model2.add(Dense(class_count, activation='softmax'))
model2.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

model2.summary()

In [None]:
EPOCHS = 12
history = model2.fit(x= train_gen, epochs= EPOCHS, verbose= 1, validation_data= test_gen,
                    validation_steps= None, shuffle= False)

In [None]:
loss,test=model2.evaluate(test_gen)
print(loss)
print(test)

In [None]:
y_pred=model2.predict(test_gen)
y_pred=np.argmax(y_pred,axis=1)
y_pred_binary = (y_pred > 0.5).astype(int)

In [None]:
cm = confusion_matrix(test_gen.classes, y_pred_binary)

# Plotting confusion matrix
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Benign', 'Malignant'], yticklabels=['Benign', 'Malignant'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_pred,test_gen.classes))

In [None]:
model2.save("VGG192.keras")

In [None]:
from tensorflow.keras.models import load_model

# Load the models from Google Drive
vgg19_model_path = '/kaggle/working/VGG192.keras'
densenet_model_path = '/kaggle/working/denseNet.keras'

vgg19_model = load_model(vgg19_model_path)
densenet_model = load_model(densenet_model_path)

In [None]:
import numpy as np

# Get predictions from both models on the validation data
vgg19_predictions = vgg19_model.predict(test_gen)
densenet_predictions = densenet_model.predict(test_gen)

# Combine predictions (assuming binary classification)
combined_predictions = (vgg19_predictions + densenet_predictions) / 2  # Average probabilities

# Convert probabilities to class labels (0 or 1)
final_predictions = np.where(combined_predictions > 0.5, 1, 0)  # Threshold at 0.5 for binary classification

In [None]:
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

# Load the models
vgg19_model_path = '/kaggle/working/VGG192.keras'
densenet_model_path = '/kaggle/working/denseNet.keras'

vgg19_model = load_model(vgg19_model_path)
densenet_model = load_model(densenet_model_path)

# Get predictions from both models on the test data
vgg19_predictions = vgg19_model.predict(test_gen)
densenet_predictions = densenet_model.predict(test_gen)

# Check the mean prediction values
print(f"VGG19 Mean Predictions: {np.mean(vgg19_predictions):.3f}")
print(f"DenseNet121 Mean Predictions: {np.mean(densenet_predictions):.3f}")

# Weighted stacking: Apply weights to each model's predictions
vgg19_weight = 0.6
densenet_weight = 0.4

# Calculate weighted predictions
weighted_predictions = (vgg19_weight * vgg19_predictions) + (densenet_weight * densenet_predictions)

# Convert probabilities to binary class labels based on threshold 0.5
final_predictions_weighted = np.argmax(weighted_predictions, axis=1)  # Convert to class indices

# Extract true labels
y_true = test_gen.classes  # Ensure this is a 1D array of class indices

# Evaluate weighted stacking ensemble
accuracy_weighted = accuracy_score(y_true, final_predictions_weighted)
print(f"Weighted Stacking Model Accuracy: {accuracy_weighted:.4f}")

# Classification report
print("Weighted Stacking Classification Report:")
print(classification_report(y_true, final_predictions_weighted))

# Analyze prediction distribution
unique, counts = np.unique(final_predictions_weighted, return_counts=True)
print(f"Prediction Distribution: {dict(zip(unique, counts))}")
