# ResNet50 classification

### Import Libraries & Setup

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm


##   Prepare Tumor Dataset for ResNet50
### 1. Convert grayscale to 3 channels because ResNet50 expects 3-channel input.

### 2. Resize to 224×224 for ResNet50.

In [None]:
from torch.utils.data import Dataset, DataLoader
import cv2
import os
from sklearn.model_selection import train_test_split
from torchvision import transforms

# 1️⃣ Define Dataset
class TumorDataset(Dataset):
    def __init__(self, img_paths, labels, transform=None):
        self.img_paths = img_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)  # ensure 3 channels
        img = cv2.resize(img, (224,224))

        if self.transform:
            img = self.transform(img)

        # Extra check: if somehow 1 channel remains, repeat it
        if img.shape[0] == 1:  # [C,H,W]
            img = img.repeat(3,1,1)

        label = self.labels[idx]
        return img, label

# 2️⃣ Collect image paths & labels
base_path = "/kaggle/input/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT"
classes = ["benign", "malignant", "normal"]
img_paths, labels = [], []

for i, cls in enumerate(classes):
    cls_folder = os.path.join(base_path, cls)
    for f in os.listdir(cls_folder):
        if f.endswith(".png") or f.endswith(".jpg"):
            img_paths.append(os.path.join(cls_folder, f))
            labels.append(i)  # 0=benign, 1=malignant, 2=normal

# 3️⃣ Train/Test Split
train_paths, val_paths, train_labels, val_labels = train_test_split(
    img_paths, labels, test_size=0.2, random_state=42, stratify=labels
)

# 4️⃣ Transform for ResNet50
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

# 5️⃣ DataLoaders
train_dataset = TumorDataset(train_paths, train_labels, transform=transform)
val_dataset   = TumorDataset(val_paths, val_labels, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=16, shuffle=False)

print(f"✅ Train samples: {len(train_dataset)}, Validation samples: {len(val_dataset)}")


## 3 — Load Pretrained ResNet50

In [None]:
from torchvision.models import resnet50, ResNet50_Weights

weights = ResNet50_Weights.DEFAULT  # most up-to-date pretrained weights
model = resnet50(weights=weights)

# Replace the final layer for 3-class classification
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, 3)
model = model.to(device)


In [None]:

device = "cuda" if torch.cuda.is_available() else "cpu"


# 2️ Load Pretrained ResNet50

num_classes = 3  # benign, malignant, normal
model = models.resnet50(pretrained=True)

# Replace the final fully connected layer
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, num_classes)
model = model.to(device)


# 3️ Loss & Optimizer

criterion = nn.CrossEntropyLoss()  # suitable for multi-class
optimizer = optim.Adam(model.parameters(), lr=1e-4)

print(f" ResNet50 ready for {num_classes}-class classification on device: {device}")

In [None]:
from tqdm import tqdm


# Training Setup

num_epochs = 10
best_val_acc = 0.0
train_losses, val_losses = [], []
train_accs, val_accs = [], []

for epoch in range(num_epochs):
    
    # Training
  
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for imgs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * imgs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total
    train_losses.append(epoch_loss)
    train_accs.append(epoch_acc)

    # Validation
   
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    all_labels, all_preds = [], []

    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)

            val_loss += loss.item() * imgs.size(0)
            _, predicted = torch.max(outputs, 1)
            val_correct += (predicted == labels).sum().item()
            val_total += labels.size(0)

            # Store labels & predictions for confusion matrix
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

    val_epoch_loss = val_loss / val_total
    val_epoch_acc = val_correct / val_total
    val_losses.append(val_epoch_loss)
    val_accs.append(val_epoch_acc)

    print(f"Epoch {epoch+1}: Train Loss={epoch_loss:.4f}, Train Acc={epoch_acc:.4f} | "
          f"Val Loss={val_epoch_loss:.4f}, Val Acc={val_epoch_acc:.4f}")

    # ------------------------
    # Save Best Model
    # ------------------------
    if val_epoch_acc > best_val_acc:
        best_val_acc = val_epoch_acc
        torch.save(model.state_dict(), "best_resnet50.pth")
        print("✅ Saved best model")

print(f"Training complete. Best validation accuracy: {best_val_acc:.4f}")


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12,5))


# Loss Curve

plt.subplot(1,2,1)
plt.plot(train_losses, label='Train Loss', marker='o')
plt.plot(val_losses, label='Validation Loss', marker='o')
plt.title("Training & Validation Loss (3-Class)")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)


# Accuracy Curve

plt.subplot(1,2,2)
plt.plot(train_accs, label='Train Accuracy', marker='o')
plt.plot(val_accs, label='Validation Accuracy', marker='o')
plt.title("Training & Validation Accuracy (3-Class)")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()


## 6 — Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# ------------------------
# Confusion Matrix
# ------------------------
cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(cm, display_labels=["Benign", "Malignant", "Normal"])
disp.plot(cmap=plt.cm.Blues, values_format='d')  # integer values
plt.title("Confusion Matrix (3-Class)")
plt.show()


In [None]:
import matplotlib.pyplot as plt
import torch
import numpy as np

# ------------------------
# 1️⃣ Visualization Function
# ------------------------
def visualize_classification(images, labels_true, labels_pred, class_names):
    """
    images: list of tumor patches (224x224x3 or tensors)
    labels_true: true class indices
    labels_pred: predicted class indices
    class_names: list of class names, e.g., ['Normal','Benign','Malignant']
    """
    plt.figure(figsize=(12,6))
    
    for i in range(min(9, len(images))):
        plt.subplot(3,3,i+1)
        img = images[i]
        
        # Convert tensor to numpy if needed
        if torch.is_tensor(img):
            img = img.permute(1,2,0).cpu().numpy()  # C,H,W -> H,W,C
            img = img * np.array([0.229,0.224,0.225]) + np.array([0.485,0.456,0.406])  # unnormalize
            img = np.clip(img, 0, 1)
        
        plt.imshow(img)
        plt.title(f"True: {class_names[labels_true[i]]}\nPred: {class_names[labels_pred[i]]}")
        plt.axis('off')
    
    plt.tight_layout()
    plt.show()

# ------------------------
# 2️⃣ Prepare validation labels
# ------------------------
val_labels = []
for path in val_paths:  # val_paths from your DataLoader split
    if "benign" in path.lower():
        val_labels.append(1)
    elif "malignant" in path.lower():
        val_labels.append(2)
    else:  # normal
        val_labels.append(0)

# ------------------------
# 3️⃣ Run model on validation set
# ------------------------
# Ensure model is defined and on correct device
resnet = model.to(device)
resnet.eval()

labels_pred = []
with torch.no_grad():
    for imgs, _ in val_loader:
        imgs = imgs.to(device)
        outputs = resnet(imgs)
        preds = torch.argmax(outputs, dim=1)
        labels_pred.extend(preds.cpu().numpy())

# ------------------------
# 4️⃣ Extract images from dataset (unnormalized for plotting)
# ------------------------
val_imgs_for_plot = []
for img, _ in val_loader.dataset:
    val_imgs_for_plot.append(img)

# ------------------------
# 5️⃣ Visualize predictions
# ------------------------
class_names = ['Normal','Benign','Malignant']
visualize_classification(val_imgs_for_plot, val_labels, labels_pred, class_names)
