
# Import Necessary Libraries


# Import libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import torch.nn.functional as F
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
from natsort import natsorted # type: ignore
from sklearn.model_selection import GroupShuffleSplit
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torchvision.models as models
import time
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics import precision_recall_curve, average_precision_score, roc_auc_score
from sklearn.metrics import precision_score as skl_precision_score
from sklearn.metrics import recall_score as skl_recall_score
from sklearn.metrics import f1_score as skl_f1_score
from sklearn.metrics import accuracy_score as skl_accuracy_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report



import random
import warnings
import shutil





# Import the code from all .py files

from custom_dataset import CustomMelanomaDataset  # Import the custom dataset
from resnet_model import ResNetModel
from preprocessing_csv import PreprocessingCSV
from train_test_loop import train_and_test
from interactive_visual_comparison import load_metadata, interactive_visual_comparison



#import torch.profiler
#from torch.utils.tensorboard import SummaryWriter
#from tensorboardX import SummaryWriter
#from torch.profiler import profile, ProfilerActivity
#import tkinter as tk
#from tkinter import filedialog, messagebox



In [2]:
warnings.filterwarnings("ignore")


In [None]:
# Define the dynamic root path
BASE_DIR = os.path.join(os.path.expanduser('~'), 'Desktop')
root_path = os.path.join(os.path.expanduser('~'), 'Desktop')


# Create the full path to the CSV file
csv_path = os.path.join(BASE_DIR, 'Thesis_Hafeez/Dataset/Train_JPEG/ISIC_2020_Training_GroundTruth.csv')

# Load the CSV file
df = pd.read_csv(csv_path)

# Display the first few rows of the dataset
print(df.head())

# Display the structure of the dataset
print(df.info())


In [4]:
# Step 1: Define the universal path handling logic
BASE_DIR = os.path.join(os.path.expanduser('~'), 'Desktop')
SPLIT_CSV_DIR = os.path.join(BASE_DIR, 'Thesis_Hafeez', 'Dataset', 'split_csv')

# Universal Path Setup for Images
IMAGE_DIR = os.path.join(BASE_DIR, 'Thesis_Hafeez', 'Dataset', 'Train_JPEG', 'JPEG')

# Paths for Train/Test CSVs
TRAIN_CSV_PATH = os.path.join(BASE_DIR, 'Thesis_Hafeez', 'Dataset', 'split_csv', 'train_split.csv')
TEST_CSV_PATH = os.path.join(BASE_DIR, 'Thesis_Hafeez', 'Dataset', 'split_csv', 'test_split.csv')

# Create the full path to the CSV file
CSV_PATH = os.path.join(root_path, 'Thesis_Hafeez/Dataset/Train_JPEG/ISIC_2020_Training_GroundTruth.csv')
preprocess_CSV_PATH = os.path.join(BASE_DIR, 'Thesis_Hafeez/Dataset/Train_JPEG/ISIC_2020_Training_GroundTruth_preprocess.csv')


MODEL_PATH = os.path.join(BASE_DIR, 'Thesis_Hafeez',  'Thesis_Code/Enhanced-Skin-Lesion-detection-using-Deep-Learning-model/results', 'output')

In [None]:
# Step : Create an instance of PreprocessingCSV
preprocessor = PreprocessingCSV(CSV_PATH, BASE_DIR)

# Step : Execute the preprocessing steps
preprocessor.analyze_raw_data()
preprocessor.check_for_anomalies()
preprocessor.clean_data()
preprocessor.save_clean_data()
preprocessor.split_by_patient_id()
preprocessor.verify_preprocessed_data()

# Define Transformations

In [6]:

# Define transforms for image augmentation and normalization
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to a fixed size (224x224)
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally for data augmentation
    transforms.RandomVerticalFlip(),    # Randomly flip the image vertically
    transforms.ToTensor(),  # Convert PIL Image or numpy.ndarray to Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize images with pre-defined mean and std
])

# Define transformations for testing
test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to a fixed size (224x224)
    transforms.ToTensor(),  # Convert PIL Image or numpy.ndarray to Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize images with pre-defined mean and std
])


# Define train and test datasets


In [7]:
# Initialize datasets
# Create datasets
train_dataset = CustomMelanomaDataset(
    csv_file=TRAIN_CSV_PATH,
    image_dir=IMAGE_DIR,
    transform=train_transforms,
    is_test=False  # Indicates that this dataset is for training


)

test_dataset = CustomMelanomaDataset(
    csv_file=TEST_CSV_PATH,
    image_dir=IMAGE_DIR,
    transform=test_transforms,
    is_test=True  # Indicates that this dataset is for testing
)


# configs

In [None]:
# Learning parameters
lr = 1e-4
NUM_EPOCHS = 15
BATCH_SIZE = 32
num_workers=4

# Determine if CUDA is available
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
PIN_MEMORY = True if torch.cuda.is_available() else False

print(f"[INFO] Using device: {DEVICE}")

# Define dataloaders, lossFunc, Optim

In [9]:
# Create DataLoader instances
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,  # Shuffle the data for training
    num_workers=num_workers,  # Number of workers for data loading
    pin_memory=PIN_MEMORY  # Use pin memory if using CUDA
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,  # No need to shuffle test data
    num_workers=num_workers,  # Number of workers for data loading
    pin_memory=PIN_MEMORY  # Use pin memory if using CUDA
)

In [None]:
# Calculate steps per epoch
trainSteps = len(train_loader)
testSteps = len(test_loader)

print(f"[INFO] Training steps per epoch: {trainSteps}")
print(f"[INFO] Testing steps per epoch: {testSteps}")


 # Initialize Model, Loss Function, and Optimizer

In [11]:
# Calculate class weights for weighted cross entropy
benign_count = len(train_dataset.metadata[train_dataset.metadata['benign_malignant'] == 'benign'])

malignant_count = len(train_dataset.metadata[train_dataset.metadata['benign_malignant'] == 'malignant'])

# Adjust weight for handling class imbalance
pos_weight = torch.tensor([benign_count / malignant_count], dtype=torch.float).to(DEVICE)  

In [12]:
# Define the model, where we pass the number of metadata features (3 in this case)
num_metadata_features = 3  # Number of metadata features: sex, age, and site
model = ResNetModel(num_metadata_features).to(DEVICE)

# Loss function and optimizer
lossFunc = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)  # Learning rate scheduler


In [None]:
print(model)

# Initialize Training History Dictionary

#  Training Loop Implementation with Metric Tracking and Validation

In [None]:
# Define the parameters for the training and test loop
H = train_and_test(
    model=model,
    train_loader=train_loader,
    test_loader=test_loader,
    optimizer=optimizer,
    scheduler=scheduler,
    lossFunc=lossFunc,
    DEVICE=DEVICE,
    NUM_EPOCHS=NUM_EPOCHS
)

# After training, we can use H for further analysis or plotting


In [None]:
# After the training loop ends, save the model
# Ensure the directory exists, if not, create it
if not os.path.exists(MODEL_PATH):
    os.makedirs(MODEL_PATH)

# Define the model filename with the .pth extension
model_filename = "melanoma_trained_model.pth"

# Full path to save the model
model_save_path = os.path.join(MODEL_PATH, model_filename)

# Save the model
torch.save(model.state_dict(), model_save_path)
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")

# Plot the Training Metrics 

In [None]:
def plot_metrics(H):
    epochs = range(1, len(H["train_loss"]) + 1)

    plt.figure(figsize=(16, 20))

    # Plot Training and Test Loss
    plt.subplot(4, 2, 1)
    plt.plot(epochs, H["train_loss"], 'b', label='Train Loss')
    plt.plot(epochs, H["test_loss"], 'r', label='Test Loss')
    plt.title('Training and Test Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Plot Training and Test Accuracy
    plt.subplot(4, 2, 2)
    plt.plot(epochs, H["train_acc"], 'b', label='Train Accuracy')
    plt.plot(epochs, H["test_acc"], 'r', label='Test Accuracy')
    plt.title('Training and Test Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    # Plot Training and Test Precision
    plt.subplot(4, 2, 3)
    plt.plot(epochs, H["train_precision"], 'b', label='Train Precision')
    plt.plot(epochs, H["test_precision"], 'r', label='Test Precision')
    plt.title('Training and Test Precision')
    plt.xlabel('Epochs')
    plt.ylabel('Precision')
    plt.legend()

    # Plot Training and Test Recall
    plt.subplot(4, 2, 4)
    plt.plot(epochs, H["train_recall"], 'b', label='Train Recall')
    plt.plot(epochs, H["test_recall"], 'r', label='Test Recall')
    plt.title('Training and Test Recall')
    plt.xlabel('Epochs')
    plt.ylabel('Recall')
    plt.legend()

    # Plot Training and Test F1 Score
    plt.subplot(4, 2, 5)
    plt.plot(epochs, H["train_f1"], 'b', label='Train F1 Score')
    plt.plot(epochs, H["test_f1"], 'r', label='Test F1 Score')
    plt.title('Training and Test F1 Score')
    plt.xlabel('Epochs')
    plt.ylabel('F1 Score')
    plt.legend()

    # Plot Training and Test ROC AUC
    plt.subplot(4, 2, 6)
    plt.plot(epochs, H["train_roc_auc"], 'b', label='Train ROC AUC')
    plt.plot(epochs, H["test_roc_auc"], 'r', label='Test ROC AUC')
    plt.title('Training and Test ROC AUC')
    plt.xlabel('Epochs')
    plt.ylabel('ROC AUC')
    plt.legend()

    # Plot Precision-Recall Curve
    plt.subplot(4, 2, 7)
    for i, (precision, recall) in enumerate(H["test_precision_recall_curve"]):
        plt.plot(recall, precision, label=f'Epoch {i+1}')
    plt.title('Precision-Recall Curve')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.legend()

    # Plot Average Precision Score
    plt.subplot(4, 2, 8)
    plt.plot(epochs, H["test_average_precision"], 'b', label='Test Average Precision')
    plt.title('Average Precision Score')
    plt.xlabel('Epochs')
    plt.ylabel('Average Precision')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Plot the metrics
plot_metrics(H)

In [None]:
 # Save the plot to the specified path
plot_filename = os.path.join(MODEL_PATH, "training_metrics_plot.png")

# Use a higher DPI for better quality
plt.savefig(plot_filename, format='png', dpi=300)  # Save the plot with higher DPI for clarity
print(f"Plot saved to {plot_filename}")
    
plt.close()  # Ensure the plot is cleared after saving to avoid showing it blank


In [None]:
# Paths for Train/Test CSVs
TRAIN_CSV_PATH = os.path.join(BASE_DIR, 'Thesis_Hafeez', 'Dataset', 'split_csv', 'train_split.csv')
TEST_CSV_PATH = os.path.join(BASE_DIR, 'Thesis_Hafeez', 'Dataset', 'split_csv', 'test_split.csv')

df_train = pd.read_csv(TRAIN_CSV_PATH)  # Load train csv dataset
df_test = pd.read_csv(TEST_CSV_PATH)  # Load test csv dataset

print("Target label count in training....")
print(df_train['target'].value_counts())  # Replace 'label_column' with the name of the column containing class labels

print("Target label count in testing....")
print(df_test['target'].value_counts())  # Replace 'label_column' with the name of the column containing class labels


# interactive visual comparison


In [None]:
# Load the metadata
TEST_CSV_PATH = os.path.join(BASE_DIR, 'Thesis_Hafeez', 'Dataset', 'split_csv', 'test_split.csv')
test_metadata_df = load_metadata(TEST_CSV_PATH)

# Run the interactive visual comparison function
interactive_visual_comparison(model, test_loader, DEVICE, test_metadata_df)


In [None]:
import pandas as pd
import torch
import random
import matplotlib.pyplot as plt
from torchvision import transforms

# Load the test CSV to get metadata information
def load_metadata(csv_path):
    return pd.read_csv(csv_path)

# Define Function for Interactive Visual Comparison of 20 Random Images
def interactive_visual_comparison(model, test_loader, device, test_metadata_df):
    model.eval()
    all_images = []
    all_image_names = []
    all_targets = []
    all_preds = []
    transform_back = transforms.ToPILImage()

    with torch.no_grad():
        for images, metadata, targets, image_names in test_loader:
            images, metadata = images.to(device), metadata.to(device)
            targets = targets.to(device)
            
            # Make predictions
            outputs = model(images, metadata)
            preds = torch.sigmoid(outputs).round()  # Convert logits to binary predictions (0 or 1)
            
            # Store images, image names, targets, and predictions
            all_images.extend(images.cpu().detach())
            all_image_names.extend(image_names)  # Use the image names directly
            all_targets.extend(targets.cpu().detach().numpy())
            all_preds.extend(preds.cpu().detach().numpy())
    
    # Randomly select 20 samples
    indices = random.sample(range(len(all_images)), 20)
    
    plt.figure(figsize=(20, 40))
    for i, idx in enumerate(indices):
        img_name = all_image_names[idx]
        original_label = "malignant" if all_targets[idx] == 1 else "benign"
        predicted_label = "malignant" if all_preds[idx] == 1 else "benign"

        # Fetch metadata from the CSV file
        meta_row = test_metadata_df[test_metadata_df['image_name'] == img_name]

        if meta_row.empty:
            print(f"Warning: Metadata for image '{img_name}' not found.")
            benign_malignant = "unknown"
            target = -1  # Use a placeholder value for target
        else:
            benign_malignant = meta_row['benign_malignant'].values[0]
            target = int(meta_row['target'].values[0])

        # Convert image tensor back to PIL image
        img = transform_back(all_images[idx])
        
        # Plot the image and metadata
        plt.subplot(10, 2, i * 2 + 1)
        plt.imshow(img)
        plt.axis('off')
        plt.title(f"Image: {img_name}")

        # Plot metadata and prediction details
        plt.subplot(10, 2, i * 2 + 2)
        plt.axis('off')
        plt.text(0.1, 0.8, f"Original: {benign_malignant} (Target: {target})", fontsize=12)
        plt.text(0.1, 0.6, f"Predicted: {predicted_label} (Predicted Target: {int(all_preds[idx])})", fontsize=12)
    
    plt.tight_layout()
    plt.show()

# Load the metadata
TEST_CSV_PATH = os.path.join(BASE_DIR, 'Thesis_Hafeez', 'Dataset', 'split_csv', 'test_split.csv')
test_metadata_df = load_metadata(TEST_CSV_PATH)

# Run the interactive visual comparison function
interactive_visual_comparison(model, test_loader, DEVICE, test_metadata_df)
