# Hybrid Model with Attention Mechanism (PyTorch)

This notebook implements a hybrid deep learning model combining ResNet50 and VGG16 with attention mechanisms for melanoma detection. The model incorporates channel and spatial attention mechanisms, cross-validation, and proper evaluation metrics.

In [None]:

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.models as models
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, SubsetRandomSampler
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:

# Dataset Path
data_dir = "path_to_dataset"  # Update with the actual path

# Data Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
])

# Load Dataset
dataset = datasets.ImageFolder(root=data_dir, transform=transform)
num_classes = len(dataset.classes)


In [None]:

# Function to visualize sample images
def show_samples(dataset):
    fig, axes = plt.subplots(1, 5, figsize=(15, 5))
    for i in range(5):
        img, label = dataset[i]
        img = img.permute(1, 2, 0).numpy()
        img = img * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
        img = np.clip(img, 0, 1)
        axes[i].imshow(img)
        axes[i].set_title(dataset.classes[label])
        axes[i].axis("off")
    plt.show()

show_samples(dataset)


In [None]:

# Channel Attention Module
class ChannelAttention(nn.Module):
    def __init__(self, in_planes, ratio=8):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.fc1 = nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False)
        self.relu = nn.ReLU()
        self.fc2 = nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        avg_out = self.fc2(self.relu(self.fc1(self.avg_pool(x))))
        max_out = self.fc2(self.relu(self.fc1(self.max_pool(x))))
        return self.sigmoid(avg_out + max_out)

# Spatial Attention Module
class SpatialAttention(nn.Module):
    def __init__(self):
        super(SpatialAttention, self).__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        return self.sigmoid(self.conv(x))


In [None]:

# Hybrid Model
class HybridModel(nn.Module):
    def __init__(self, num_classes):
        super(HybridModel, self).__init__()
        self.resnet = models.resnet50(pretrained=True)
        self.vgg = models.vgg16(pretrained=True)
        
        self.resnet = nn.Sequential(*list(self.resnet.children())[:-2])
        self.vgg = nn.Sequential(*list(self.vgg.children())[:-2])
        
        self.ca = ChannelAttention(2048)  # For ResNet50
        self.sa = SpatialAttention()
        
        self.ca_vgg = ChannelAttention(512)  # For VGG16
        self.sa_vgg = SpatialAttention()
        
        self.fc = nn.Linear(2048 + 512, num_classes)
    
    def forward(self, x):
        resnet_feat = self.resnet(x)
        vgg_feat = self.vgg(x)
        
        resnet_feat = self.ca(resnet_feat) * resnet_feat
        resnet_feat = self.sa(resnet_feat) * resnet_feat
        
        vgg_feat = self.ca_vgg(vgg_feat) * vgg_feat
        vgg_feat = self.sa_vgg(vgg_feat) * vgg_feat
        
        resnet_feat = torch.flatten(resnet_feat, start_dim=1)
        vgg_feat = torch.flatten(vgg_feat, start_dim=1)
        
        features = torch.cat((resnet_feat, vgg_feat), dim=1)
        output = self.fc(features)
        return output


In [None]:

# Training and Evaluation
num_epochs = 20
batch_size = 32
learning_rate = 1e-4
k_folds = 5

skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)
all_labels = np.array([label for _, label in dataset.imgs])

overall_metrics = []
for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(all_labels)), all_labels)):
    print(f"Fold {fold+1}/{k_folds}")
    train_sampler = SubsetRandomSampler(train_idx)
    val_sampler = SubsetRandomSampler(val_idx)
    
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
    val_loader = DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)
    
    model = HybridModel(num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    
    for epoch in range(num_epochs):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        scheduler.step()
    
    # Evaluate Model
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
    
    acc = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro')
    recall = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    auc = roc_auc_score(y_true, y_pred, multi_class='ovr')
    
    overall_metrics.append((acc, precision, recall, f1, auc))
    print(f"Fold {fold+1}: Accuracy={acc:.4f}, F1-score={f1:.4f}")

print("Final Results:", np.mean(overall_metrics, axis=0))
