## 1. Import needed libraries and Set Device

In [None]:
# --- General Libraries ---
import os
import random
import time
import warnings

# --- Data Handling ---
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score

# --- Image Handling and Visualization ---
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from PIL import Image

# --- PyTorch and Deep Learning ---
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import timm  # For EfficientNet and other pretrained models

# --- Utilities ---
from tqdm import tqdm  # For training progress visualization
import torch.profiler  # Optional: For GPU usage profiling

# --- Warning Suppression ---
warnings.filterwarnings('ignore')


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(torch.cuda.current_device()))

Using cuda device
True
1
0
NVIDIA GeForce RTX 4060 Ti


### 2. Data Loading and Preprocessing

In this section, we use `torchvision.datasets.ImageFolder` to load lung image data (lung_aca, lung_n, lung_scc). 
We also apply necessary transformations (resizing, normalization), split the dataset into training and validation sets, 
and prepare DataLoaders with GPU support.

```bash
lung_colon_image_set/
‚îú‚îÄ‚îÄ colon_image_sets/
‚îÇ   ‚îú‚îÄ‚îÄ colon_aca/
‚îÇ   ‚îî‚îÄ‚îÄ colon_n/
‚îî‚îÄ‚îÄ lung_image_sets/
    ‚îú‚îÄ‚îÄ lung_aca/
    ‚îú‚îÄ‚îÄ lung_n/
    ‚îî‚îÄ‚îÄ lung_scc/

In [None]:
# Binary classification: 0 = benign, 1 = malignant
binary_labels_map = {
    "lung_n": 0,
    "colon_n": 0,
    "lung_aca": 1,
    "lung_scc": 1,
    "colon_aca": 1
}

# Multiclass classification: 5 class (0‚Äì4)
multiclass_labels_map = {
    "colon_aca": 0,
    "lung_aca": 1,
    "lung_scc": 2,
    "colon_n": 3,
    "lung_n": 4
}

In [None]:
image_paths = []
binary_labels = []
multi_labels = []

root_dir = "lung_colon_image_set"

for class_name in multiclass_labels_map:
    label_bin = binary_labels_map[class_name]
    label_multi = multiclass_labels_map[class_name]

    if "lung" in class_name:
        class_path = os.path.join(root_dir, "lung_image_sets", class_name)
    else:
        class_path = os.path.join(root_dir, "colon_image_sets", class_name)

    for img_file in glob.glob(os.path.join(class_path, "*.jpeg")):
        image_paths.append(img_file)
        binary_labels.append(label_bin)
        multi_labels.append(label_multi)


In [None]:
# ƒ∞lk olarak train + temp (val+test) olarak b√∂l√ºyoruz
X_train, X_temp, y_bin_train, y_bin_temp, y_multi_train, y_multi_temp = train_test_split(
    image_paths, binary_labels, multi_labels,
    test_size=0.3, random_state=42, stratify=multi_labels
)

# Temp'i val ve test olarak ayƒ±rƒ±yoruz
X_val, X_test, y_bin_val, y_bin_test, y_multi_val, y_multi_test = train_test_split(
    X_temp, y_bin_temp, y_multi_temp,
    test_size=0.5, random_state=42, stratify=y_multi_temp
)

In [None]:
class MTLImageDataset(Dataset):
    def __init__(self, image_paths, binary_labels, multi_labels, transform=None):
        self.image_paths = image_paths
        self.binary_labels = binary_labels
        self.multi_labels = multi_labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, {
            "binary_output": self.binary_labels[idx],
            "multi_output": self.multi_labels[idx]
        }


In [None]:
batch_size = 128

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

train_dataset = MTLImageDataset(X_train, y_bin_train, y_multi_train, transform=transform)
val_dataset = MTLImageDataset(X_val, y_bin_val, y_multi_val, transform=transform)
test_dataset = MTLImageDataset(X_test, y_bin_test, y_multi_test, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


NameError: name 'transforms' is not defined

In [None]:
class MTLModel(nn.Module):
    def __init__(self, num_classes_multiclass=5):
        super(MTLModel, self).__init__()

        # Load EfficientNetB3 backbone
        self.backbone = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT)

        # Freeze early layers if needed
        for param in self.backbone.features.parameters():
            param.requires_grad = True  # or False to freeze

        # Shared feature extractor
        self.shared_features = self.backbone.features
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.flatten = nn.Flatten()

        # Binary classification head (cancer present or not)
        self.binary_classifier = nn.Sequential(
            nn.Linear(1536, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

        # Multiclass classification head (cancer type)
        self.multiclass_classifier = nn.Sequential(
            nn.Linear(1536, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes_multiclass)
        )

    def forward(self, x):
        x = self.shared_features(x)
        x = self.pooling(x)
        x = self.flatten(x)

        binary_output = self.binary_classifier(x).squeeze(1)  # shape: (batch_size,)
        multiclass_output = self.multiclass_classifier(x)     # shape: (batch_size, num_classes)

        return {
            "binary_output": binary_output,
            "multi_output": multiclass_output
        }


In [None]:
def train_model(model, dataloaders, optimizer, num_epochs=10):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    criterion_binary = nn.BCELoss()
    criterion_multiclass = nn.CrossEntropyLoss()

    history = {"train_loss": [], "val_loss": []}

    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            for inputs, binary_labels, multi_labels in dataloaders[phase]:
                inputs = inputs.to(device)
                binary_labels = binary_labels.float().to(device)
                multi_labels = multi_labels.long().to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss_binary = criterion_binary(outputs['binary_output'], binary_labels)
                    loss_multiclass = criterion_multiclass(outputs['multi_output'], multi_labels)

                    loss = loss_binary + loss_multiclass

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            history[f"{phase}_loss"].append(epoch_loss)

            print(f"{phase} Loss: {epoch_loss:.4f}")

    return model, history


In [None]:
def plot_losses(history):
    plt.figure(figsize=(10,5))
    plt.plot(history['train_loss'], label='Train Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training & Validation Loss Over Epochs')
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
def evaluate_model(model, test_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    all_binary_preds = []
    all_binary_labels = []

    all_multi_preds = []
    all_multi_labels = []

    with torch.no_grad():
        for inputs, binary_labels, multi_labels in test_loader:
            inputs = inputs.to(device)
            binary_labels = binary_labels.to(device)
            multi_labels = multi_labels.to(device)

            outputs = model(inputs)

            # Binary predictions
            binary_probs = outputs['binary_output']
            binary_preds = (binary_probs > 0.5).int()

            # Multiclass predictions
            multi_logits = outputs['multi_output']
            multi_preds = torch.argmax(multi_logits, dim=1)

            all_binary_preds.extend(binary_preds.cpu().numpy())
            all_binary_labels.extend(binary_labels.cpu().numpy())

            all_multi_preds.extend(multi_preds.cpu().numpy())
            all_multi_labels.extend(multi_labels.cpu().numpy())

    # Metrics for Binary Classification
    print("üîç Binary Classification Results:")
    print("Accuracy:", accuracy_score(all_binary_labels, all_binary_preds))
    print("Precision:", precision_score(all_binary_labels, all_binary_preds))
    print("Recall:", recall_score(all_binary_labels, all_binary_preds))
    print("F1 Score:", f1_score(all_binary_labels, all_binary_preds))

    # Confusion Matrix - Binary
    cm_bin = confusion_matrix(all_binary_labels, all_binary_preds)
    plt.figure(figsize=(5,4))
    sns.heatmap(cm_bin, annot=True, fmt='d', cmap='Blues', xticklabels=['Benign', 'Malignant'], yticklabels=['Benign', 'Malignant'])
    plt.title('Confusion Matrix - Binary Classification')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

    # Metrics for Multiclass Classification
    print("\nüéØ Multiclass Classification Results:")
    print(classification_report(all_multi_labels, all_multi_preds, target_names=[
        'Colon ACA', 'Lung ACA', 'Lung SCC', 'Colon N', 'Lung N'
    ]))

    # Confusion Matrix - Multiclass
    cm_multi = confusion_matrix(all_multi_labels, all_multi_preds)
    plt.figure(figsize=(8,6))
    sns.heatmap(cm_multi, annot=True, fmt='d', cmap='YlGnBu', xticklabels=[
        'Colon ACA', 'Lung ACA', 'Lung SCC', 'Colon N', 'Lung N'
    ], yticklabels=[
        'Colon ACA', 'Lung ACA', 'Lung SCC', 'Colon N', 'Lung N'
    ])
    plt.title('Confusion Matrix - Multiclass Classification')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

In [None]:
# Create the model
model = MTLModel(num_classes_multiclass=5).to(device)

# Define optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [None]:
# Evaluate the model on the test set
evaluate_model(model, test_loader)
