In [None]:
# === ResNet101 ===

import os
import pandas as pd
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score, roc_auc_score, roc_curve, ConfusionMatrixDisplay
import seaborn as sns
import matplotlib.pyplot as plt

# === DATASET SETUP ===
data_dir = 'C:/Users/ayesh/Downloads/combined_dataset'
metadata_path = 'C:/Users/ayesh/Downloads/scin_dataset/dataset/modified_metadata.csv'
class_names = ['malignant', 'benign']

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

dataset = datasets.ImageFolder(data_dir, transform=transform)
labels = [label for _, label in dataset.samples]

# Stratified splitting
splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_val_idx, test_idx in splitter.split(dataset.samples, labels):
    pass

train_val_labels = [labels[i] for i in train_val_idx]
splitter_val = StratifiedShuffleSplit(n_splits=1, test_size=0.125, random_state=42)
for train_idx, val_idx in splitter_val.split([dataset.samples[i] for i in train_val_idx], train_val_labels):
    pass

train_indices = [train_val_idx[i] for i in train_idx]
val_indices = [train_val_idx[i] for i in val_idx]
test_indices = test_idx

train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, test_indices)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# === MODEL SETUP ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet101(pretrained=True)
model.fc = torch.nn.Sequential(
    torch.nn.Dropout(0.5),
    torch.nn.Linear(model.fc.in_features, 2)
)
model.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# === TRAINING ===
epochs = 10
best_val_loss = float('inf')
early_stop_counter = 0

for epoch in range(epochs):
    model.train()
    train_loss, correct, total = 0.0, 0, 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
    train_acc = correct / total

    model.eval()
    val_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    val_acc = correct / total
    val_loss /= total

    print(f"Epoch {epoch+1}/{epochs} - Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= 3:
            print("Early stopping triggered.")
            break

# === TEST SET EVALUATION ===
model.eval()
test_preds, test_labels, test_probs = [], [], []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)[:, 1]
        _, predicted = torch.max(outputs, 1)
        test_preds.extend(predicted.cpu().numpy())
        test_labels.extend(labels.cpu().numpy())
        test_probs.extend(probs.cpu().numpy())

print("\n=== TEST SET METRICS ===")
print("Classification Report:\n", classification_report(test_labels, test_preds))
print("F1 Score:", f1_score(test_labels, test_preds))
print("Precision:", precision_score(test_labels, test_preds))
print("Recall:", recall_score(test_labels, test_preds))
if len(set(test_labels)) > 1:
    auc_score = roc_auc_score(test_labels, test_probs)
    print("AUC:", auc_score)

# === CONFUSION MATRIX ===
cm = confusion_matrix(test_labels, test_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix (Test Set)")
plt.show()

# === SKIN TONE ANALYSIS ON TEST SET ===
metadata = pd.read_csv(metadata_path)
test_paths = [dataset.samples[i][0] for i in test_indices]
test_filenames = [os.path.basename(p) for p in test_paths]

results = pd.DataFrame({
    "filename": test_filenames,
    "true_label": test_labels,
    "predicted_label": test_preds
})

metadata['filename'] = metadata['filename'].str.strip()
results['filename'] = results['filename'].str.strip()
merged_df = results.merge(metadata, on="filename")

# Accuracy by skin tone
total_by_tone = merged_df['fitzpatrick_skin_type'].value_counts()
correct_by_tone = merged_df[merged_df.true_label == merged_df.predicted_label]['fitzpatrick_skin_type'].value_counts()
accuracy_by_tone = (correct_by_tone / total_by_tone * 100).round(2)

# Misclassification counts and percentages
misclassified = merged_df[merged_df.true_label != merged_df.predicted_label]
miscounts = misclassified['fitzpatrick_skin_type'].value_counts()
percent_misclassified = (miscounts / total_by_tone * 100).round(2)

print("\nMisclassification counts by skin tone (Test Set):")
print(miscounts)
print("\nMisclassification percentages by skin tone (Test Set):")
print(percent_misclassified)
print("\nAccuracy by skin tone (Test Set):")
print(accuracy_by_tone)

# Plot accuracy
sns.barplot(x=accuracy_by_tone.index, y=accuracy_by_tone.values)
plt.title("Test Set Accuracy by Fitzpatrick Skin Tone")
plt.ylabel("Accuracy (%)")
plt.xlabel("Skin Tone")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
# === ResNet50 ===

import os
import pandas as pd
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score, roc_auc_score, roc_curve, ConfusionMatrixDisplay
import seaborn as sns
import matplotlib.pyplot as plt

# === DATASET SETUP ===
data_dir = 'C:/Users/ayesh/Downloads/combined_dataset'
metadata_path = 'C:/Users/ayesh/Downloads/scin_dataset/dataset/modified_metadata.csv'
class_names = ['malignant', 'benign']

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

dataset = datasets.ImageFolder(data_dir, transform=transform)
labels = [label for _, label in dataset.samples]

# Stratified splitting
splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_val_idx, test_idx in splitter.split(dataset.samples, labels):
    pass

train_val_labels = [labels[i] for i in train_val_idx]
splitter_val = StratifiedShuffleSplit(n_splits=1, test_size=0.125, random_state=42)
for train_idx, val_idx in splitter_val.split([dataset.samples[i] for i in train_val_idx], train_val_labels):
    pass

train_indices = [train_val_idx[i] for i in train_idx]
val_indices = [train_val_idx[i] for i in val_idx]
test_indices = test_idx

train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, test_indices)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# === MODEL SETUP ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Sequential(
    torch.nn.Dropout(0.5),
    torch.nn.Linear(model.fc.in_features, 2)
)
model.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# === TRAINING ===
epochs = 10
best_val_loss = float('inf')
early_stop_counter = 0

for epoch in range(epochs):
    model.train()
    train_loss, correct, total = 0.0, 0, 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
    train_acc = correct / total

    model.eval()
    val_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    val_acc = correct / total
    val_loss /= total

    print(f"Epoch {epoch+1}/{epochs} - Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= 3:
            print("Early stopping triggered.")
            break

# === TEST SET EVALUATION ===
model.eval()
test_preds, test_labels, test_probs = [], [], []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)[:, 1]
        _, predicted = torch.max(outputs, 1)
        test_preds.extend(predicted.cpu().numpy())
        test_labels.extend(labels.cpu().numpy())
        test_probs.extend(probs.cpu().numpy())

print("\n=== TEST SET METRICS ===")
print("Classification Report:\n", classification_report(test_labels, test_preds))
print("F1 Score:", f1_score(test_labels, test_preds))
print("Precision:", precision_score(test_labels, test_preds))
print("Recall:", recall_score(test_labels, test_preds))
if len(set(test_labels)) > 1:
    auc_score = roc_auc_score(test_labels, test_probs)
    print("AUC:", auc_score)

# === CONFUSION MATRIX ===
cm = confusion_matrix(test_labels, test_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix (Test Set)")
plt.show()

# === SKIN TONE ANALYSIS ON TEST SET ===
metadata = pd.read_csv(metadata_path)
test_paths = [dataset.samples[i][0] for i in test_indices]
test_filenames = [os.path.basename(p) for p in test_paths]

results = pd.DataFrame({
    "filename": test_filenames,
    "true_label": test_labels,
    "predicted_label": test_preds
})

metadata['filename'] = metadata['filename'].str.strip()
results['filename'] = results['filename'].str.strip()
merged_df = results.merge(metadata, on="filename")

# Accuracy by skin tone
total_by_tone = merged_df['fitzpatrick_skin_type'].value_counts()
correct_by_tone = merged_df[merged_df.true_label == merged_df.predicted_label]['fitzpatrick_skin_type'].value_counts()
accuracy_by_tone = (correct_by_tone / total_by_tone * 100).round(2)

# Misclassification counts and percentages
misclassified = merged_df[merged_df.true_label != merged_df.predicted_label]
miscounts = misclassified['fitzpatrick_skin_type'].value_counts()
percent_misclassified = (miscounts / total_by_tone * 100).round(2)

print("\nMisclassification counts by skin tone (Test Set):")
print(miscounts)
print("\nMisclassification percentages by skin tone (Test Set):")
print(percent_misclassified)
print("\nAccuracy by skin tone (Test Set):")
print(accuracy_by_tone)

# Plot accuracy
sns.barplot(x=accuracy_by_tone.index, y=accuracy_by_tone.values)
plt.title("Test Set Accuracy by Fitzpatrick Skin Tone")
plt.ylabel("Accuracy (%)")
plt.xlabel("Skin Tone")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
# === ResNet18 ===
import os
import pandas as pd
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score, roc_auc_score, roc_curve, ConfusionMatrixDisplay
import seaborn as sns
import matplotlib.pyplot as plt

# === DATASET SETUP ===
data_dir = 'C:/Users/ayesh/Downloads/combined_dataset'
metadata_path = 'C:/Users/ayesh/Downloads/scin_dataset/dataset/modified_metadata.csv'
class_names = ['malignant', 'benign']

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

dataset = datasets.ImageFolder(data_dir, transform=transform)
labels = [label for _, label in dataset.samples]

# Stratified splitting
splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_val_idx, test_idx in splitter.split(dataset.samples, labels):
    pass

train_val_labels = [labels[i] for i in train_val_idx]
splitter_val = StratifiedShuffleSplit(n_splits=1, test_size=0.125, random_state=42)
for train_idx, val_idx in splitter_val.split([dataset.samples[i] for i in train_val_idx], train_val_labels):
    pass

train_indices = [train_val_idx[i] for i in train_idx]
val_indices = [train_val_idx[i] for i in val_idx]
test_indices = test_idx

train_dataset = Subset(dataset, train_indices)
val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, test_indices)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# === MODEL SETUP ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.resnet18(pretrained=True)
model.fc = torch.nn.Sequential(
    torch.nn.Dropout(0.5),
    torch.nn.Linear(model.fc.in_features, 2)
)
model.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

# === TRAINING ===
epochs = 10
best_val_loss = float('inf')
early_stop_counter = 0

for epoch in range(epochs):
    model.train()
    train_loss, correct, total = 0.0, 0, 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
    train_acc = correct / total

    model.eval()
    val_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    val_acc = correct / total
    val_loss /= total

    print(f"Epoch {epoch+1}/{epochs} - Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= 3:
            print("Early stopping triggered.")
            break

# === TEST SET EVALUATION ===
model.eval()
test_preds, test_labels, test_probs = [], [], []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        probs = torch.softmax(outputs, dim=1)[:, 1]
        _, predicted = torch.max(outputs, 1)
        test_preds.extend(predicted.cpu().numpy())
        test_labels.extend(labels.cpu().numpy())
        test_probs.extend(probs.cpu().numpy())

print("\n=== TEST SET METRICS ===")
print("Classification Report:\n", classification_report(test_labels, test_preds))
print("F1 Score:", f1_score(test_labels, test_preds))
print("Precision:", precision_score(test_labels, test_preds))
print("Recall:", recall_score(test_labels, test_preds))
if len(set(test_labels)) > 1:
    auc_score = roc_auc_score(test_labels, test_probs)
    print("AUC:", auc_score)

# === CONFUSION MATRIX ===
cm = confusion_matrix(test_labels, test_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix (Test Set)")
plt.show()

# === SKIN TONE ANALYSIS ON TEST SET ===
metadata = pd.read_csv(metadata_path)
test_paths = [dataset.samples[i][0] for i in test_indices]
test_filenames = [os.path.basename(p) for p in test_paths]

results = pd.DataFrame({
    "filename": test_filenames,
    "true_label": test_labels,
    "predicted_label": test_preds
})

metadata['filename'] = metadata['filename'].str.strip()
results['filename'] = results['filename'].str.strip()
merged_df = results.merge(metadata, on="filename")

# Accuracy by skin tone
total_by_tone = merged_df['fitzpatrick_skin_type'].value_counts()
correct_by_tone = merged_df[merged_df.true_label == merged_df.predicted_label]['fitzpatrick_skin_type'].value_counts()
accuracy_by_tone = (correct_by_tone / total_by_tone * 100).round(2)

# Misclassification counts and percentages
misclassified = merged_df[merged_df.true_label != merged_df.predicted_label]
miscounts = misclassified['fitzpatrick_skin_type'].value_counts()
percent_misclassified = (miscounts / total_by_tone * 100).round(2)

print("\nMisclassification counts by skin tone (Test Set):")
print(miscounts)
print("\nMisclassification percentages by skin tone (Test Set):")
print(percent_misclassified)
print("\nAccuracy by skin tone (Test Set):")
print(accuracy_by_tone)

# Plot accuracy
sns.barplot(x=accuracy_by_tone.index, y=accuracy_by_tone.values)
plt.title("Test Set Accuracy by Fitzpatrick Skin Tone")
plt.ylabel("Accuracy (%)")
plt.xlabel("Skin Tone")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
