<a href="https://colab.research.google.com/github/UbaidullahTanoli/Pretrained-CNN/blob/main/Frozen_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
files.upload()

In [3]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d raddar/chest-xrays-indiana-university
!unzip chest-xrays-indiana-university.zip -d /content/dataset/

In [5]:
import os
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.models import EfficientNet_B1_Weights
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [6]:
# 1. Custom Dataset that Merges Two CSVs
# ================================
class PureCNNMergedDataset(Dataset):
    def __init__(self, reports_csv, proj_csv, image_folder, transform=None):
        """
        Args:
            reports_csv (str): Path to 'indiana_reports.csv', which includes the "MeSH" column for labels.
            proj_csv (str): Path to 'indiana_projections.csv', which maps uid to image filename.
            image_folder (str): Directory containing the image files.
            transform (callable, optional): Transform to apply on images.
        """
        # Load both CSVs
        self.reports_df = pd.read_csv(reports_csv)
        self.proj_df = pd.read_csv(proj_csv)
        # Merge on 'uid'
        self.data = pd.merge(self.reports_df, self.proj_df, on='uid')
        self.image_folder = image_folder
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        # Get the image filename from the projections CSV merged with reports CSV
        filename = row['filename']  # e.g., "1_IM-0001-4001.dcm.png"
        img_path = os.path.join(self.image_folder, filename)

        # Load the image and convert to RGB if needed
        pil_image = Image.open(img_path)
        if pil_image.mode != 'RGB':
            pil_image = pil_image.convert('RGB')
        if self.transform:
            image_tensor = self.transform(pil_image)
        else:
            image_tensor = transforms.ToTensor()(pil_image)

        # Derive binary label from the "MeSH" column in the reports CSV
        # e.g., if MeSH equals "normal" (case-insensitive), label = 0; else, label = 1.
        mesh_val = str(row['MeSH']).strip().lower()
        label = 0 if mesh_val == 'normal' else 1

        return image_tensor, label

In [7]:
# 2. Data Transforms
# ================================
# Define transforms for both training and validation (no augmentation here, but you can add if needed)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [8]:
# 3. Pure CNN Model using EfficientNet-B1
# ================================
class PureCNNModel(nn.Module):
    def __init__(self, num_classes=2):
        super(PureCNNModel, self).__init__()
        # Load EfficientNet-B1 pretrained on ImageNet
        self.backbone = models.efficientnet_b1(weights=EfficientNet_B1_Weights.IMAGENET1K_V1)
        # Get the input features of the classifier (EfficientNet-B1 typically uses 1280)
        in_features = self.backbone.classifier[1].in_features
        # Replace the final classifier with a new Linear layer for binary classification
        self.backbone.classifier[1] = nn.Linear(in_features, num_classes)
        # (Optional) Freeze parts of the network if desired
        for param in self.backbone.features.parameters():
             param.requires_grad = False

    def forward(self, x):
        return self.backbone(x)

In [9]:
# 4. Training and Evaluation Functions
# ================================
def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    preds_all, labels_all = [], []

    for images, labels in dataloader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, dim=1)
        preds_all.extend(preds.cpu().numpy())
        labels_all.extend(labels.cpu().numpy())

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = accuracy_score(labels_all, preds_all)
    epoch_prec = precision_score(labels_all, preds_all, zero_division=0)
    epoch_rec = recall_score(labels_all, preds_all, zero_division=0)
    epoch_f1 = f1_score(labels_all, preds_all, zero_division=0)
    return epoch_loss, epoch_acc, epoch_prec, epoch_rec, epoch_f1

def eval_epoch(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    preds_all, labels_all = [], []

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, dim=1)
            preds_all.extend(preds.cpu().numpy())
            labels_all.extend(labels.cpu().numpy())

    epoch_loss = running_loss / len(dataloader.dataset)
    epoch_acc = accuracy_score(labels_all, preds_all)
    epoch_prec = precision_score(labels_all, preds_all, zero_division=0)
    epoch_rec = recall_score(labels_all, preds_all, zero_division=0)
    epoch_f1 = f1_score(labels_all, preds_all, zero_division=0)
    return epoch_loss, epoch_acc, epoch_prec, epoch_rec, epoch_f1

In [10]:
# 5. Main Training Loop
# ================================
def main():
    # Paths to the CSV files and image folder
    reports_csv = '/content/dataset/indiana_reports.csv'
    proj_csv = '/content/dataset/indiana_projections.csv'
    image_folder = '/content/dataset/images/images_normalized'

    # Create the merged dataset that includes image filenames and labels
    full_dataset = PureCNNMergedDataset(reports_csv, proj_csv, image_folder, transform=transform)

    # Split the dataset (80% training, 20% testing)
    total_size = len(full_dataset)
    train_size = int(0.8 * total_size)
    test_size = total_size - train_size
    train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

    # Create DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2)
    test_loader  = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=2)

    # Set device (GPU if available)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Instantiate the pure CNN model and move to device
    model = PureCNNModel(num_classes=2).to(device)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    num_epochs = 40  # You can adjust this as needed
    for epoch in range(num_epochs):
        train_loss, train_acc, train_prec, train_rec, train_f1 = train_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc, val_prec, val_rec, val_f1 = eval_epoch(model, test_loader, criterion, device)
        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"  Train Loss: {train_loss:.4f} | Acc: {train_acc:.4f} | Prec: {train_prec:.4f} | Rec: {train_rec:.4f} | F1: {train_f1:.4f}")
        print(f"  Val   Loss: {val_loss:.4f} | Acc: {val_acc:.4f} | Prec: {val_prec:.4f} | Rec: {val_rec:.4f} | F1: {val_f1:.4f}")

In [11]:
if __name__ == "__main__":
    main()

Downloading: "https://download.pytorch.org/models/efficientnet_b1_rwightman-bac287d4.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b1_rwightman-bac287d4.pth
100%|██████████| 30.1M/30.1M [00:00<00:00, 72.3MB/s]


Epoch 1/40
  Train Loss: 0.6296 | Acc: 0.6517 | Prec: 0.6860 | Rec: 0.8376 | F1: 0.7543
  Val   Loss: 0.6047 | Acc: 0.6680 | Prec: 0.7427 | Rec: 0.7396 | F1: 0.7411
Epoch 2/40
  Train Loss: 0.6209 | Acc: 0.6606 | Prec: 0.7035 | Rec: 0.8092 | F1: 0.7527
  Val   Loss: 0.5882 | Acc: 0.6921 | Prec: 0.7159 | Rec: 0.8635 | F1: 0.7828
Epoch 3/40
  Train Loss: 0.6117 | Acc: 0.6685 | Prec: 0.7107 | Rec: 0.8103 | F1: 0.7572
  Val   Loss: 0.5997 | Acc: 0.6760 | Prec: 0.7537 | Rec: 0.7365 | F1: 0.7450
Epoch 4/40
  Train Loss: 0.6172 | Acc: 0.6668 | Prec: 0.7093 | Rec: 0.8098 | F1: 0.7562
  Val   Loss: 0.5908 | Acc: 0.6814 | Prec: 0.7224 | Rec: 0.8187 | F1: 0.7676
Epoch 5/40
  Train Loss: 0.6128 | Acc: 0.6691 | Prec: 0.7104 | Rec: 0.8129 | F1: 0.7582
  Val   Loss: 0.6476 | Acc: 0.6573 | Prec: 0.7979 | Rec: 0.6250 | F1: 0.7009
Epoch 6/40
  Train Loss: 0.6072 | Acc: 0.6746 | Prec: 0.7163 | Rec: 0.8116 | F1: 0.7610
  Val   Loss: 0.6139 | Acc: 0.6760 | Prec: 0.7692 | Rec: 0.7083 | F1: 0.7375
Epoch 7/40