In [1]:
from google.colab import drive
import zipfile
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define path to the ZIP file
zip_path = "/content/drive/My Drive/dataset.zip"  # Change path if needed
extract_path = "/content/data"

# Unzip the dataset
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Dataset extracted successfully!")


Mounted at /content/drive
Dataset extracted successfully!


In [6]:
!pip install torch torchvision



Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
!ls "/content/data/chest_xray"

chest_xray  __MACOSX  test  train  val


In [3]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from sklearn.metrics import accuracy_score
from tqdm import tqdm  # Progress bar

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define Pneumonia Dataset Class
class PneumoniaDataSet(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        for label in ["NORMAL", "PNEUMONIA"]:
            class_dir = os.path.join(root_dir, label)
            for img_name in os.listdir(class_dir):
                self.image_paths.append(os.path.join(class_dir, img_name))
                self.labels.append(0 if label == 'NORMAL' else 1)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        img_path = self.image_paths[index]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[index]

        if self.transform:
            image = self.transform(image)

        return image, label

# Define Image Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Define Dataset Paths (Colab)
DATA_DIR = "/content/data/chest_xray"  # Ensure dataset is extracted here

train_dataset = PneumoniaDataSet(root_dir=os.path.join(DATA_DIR, 'train'), transform=transform)
test_dataset = PneumoniaDataSet(root_dir=os.path.join(DATA_DIR, 'test'), transform=transform)
val_dataset = PneumoniaDataSet(root_dir=os.path.join(DATA_DIR, 'val'), transform=transform)

# Define Data Loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2, pin_memory=True)

# Load Pretrained ResNet18 Model
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, 2)  # 2 Classes: Normal & Pneumonia
model = model.to(device)

# Define Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Mixed Precision Training for Faster Computation
scaler = torch.cuda.amp.GradScaler()

# Training Loop
num_epochs = 10
best_val_accuracy = 0.0
save_path = "/content/pneumonia_classifier_best.pth"  # Save best model

if __name__ == "__main__":  # Prevent multiprocessing issues
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()

            with torch.cuda.amp.autocast():  # Enable mixed precision
                outputs = model(images)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            progress_bar.set_postfix(loss=running_loss / len(train_loader))

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss / len(train_loader):.4f}")

        # Validation
        model.eval()
        val_labels, val_preds = [], []

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)

                val_labels.extend(labels.cpu().numpy())
                val_preds.extend(preds.cpu().numpy())

        val_accuracy = accuracy_score(val_labels, val_preds)
        print(f"Validation Accuracy: {val_accuracy:.4f}")

        # Save best model
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            torch.save(model.state_dict(), save_path)
            print(f"Best model saved with accuracy: {best_val_accuracy:.4f}")

    print("Training complete!")

    # Load best model before testing
    model.load_state_dict(torch.load(save_path))
    model.eval()

    # Test Model
    test_labels, test_preds = [], []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)

            test_labels.extend(labels.cpu().numpy())
            test_preds.extend(preds.cpu().numpy())

    test_accuracy = accuracy_score(test_labels, test_preds)
    print(f"Test Accuracy: {test_accuracy:.4f}")

    # Save Final Model
    torch.save(model.state_dict(), "/content/pneumonia_classifier_final.pth")
    print("Final model saved successfully!")


Using device: cuda


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 200MB/s]
  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():  # Enable mixed precision
Epoch 1/10: 100%|██████████| 163/163 [01:28<00:00,  1.84it/s, loss=0.137]

Epoch 1/10, Loss: 0.1371





Validation Accuracy: 0.9375
Best model saved with accuracy: 0.9375


  with torch.cuda.amp.autocast():  # Enable mixed precision
Epoch 2/10: 100%|██████████| 163/163 [01:17<00:00,  2.10it/s, loss=0.0601]

Epoch 2/10, Loss: 0.0601





Validation Accuracy: 0.6875


  with torch.cuda.amp.autocast():  # Enable mixed precision
Epoch 3/10: 100%|██████████| 163/163 [01:18<00:00,  2.07it/s, loss=0.0441]

Epoch 3/10, Loss: 0.0441





Validation Accuracy: 0.8750


  with torch.cuda.amp.autocast():  # Enable mixed precision
Epoch 4/10: 100%|██████████| 163/163 [01:16<00:00,  2.13it/s, loss=0.0358]

Epoch 4/10, Loss: 0.0358





Validation Accuracy: 0.8125


  with torch.cuda.amp.autocast():  # Enable mixed precision
Epoch 5/10: 100%|██████████| 163/163 [01:18<00:00,  2.08it/s, loss=0.035]

Epoch 5/10, Loss: 0.0350





Validation Accuracy: 0.8125


  with torch.cuda.amp.autocast():  # Enable mixed precision
Epoch 6/10: 100%|██████████| 163/163 [01:16<00:00,  2.12it/s, loss=0.0251]

Epoch 6/10, Loss: 0.0251





Validation Accuracy: 0.8750


  with torch.cuda.amp.autocast():  # Enable mixed precision
Epoch 7/10: 100%|██████████| 163/163 [01:18<00:00,  2.07it/s, loss=0.0168]

Epoch 7/10, Loss: 0.0168





Validation Accuracy: 1.0000
Best model saved with accuracy: 1.0000


  with torch.cuda.amp.autocast():  # Enable mixed precision
Epoch 8/10: 100%|██████████| 163/163 [01:16<00:00,  2.13it/s, loss=0.0107]

Epoch 8/10, Loss: 0.0107





Validation Accuracy: 0.9375


  with torch.cuda.amp.autocast():  # Enable mixed precision
Epoch 9/10: 100%|██████████| 163/163 [01:18<00:00,  2.09it/s, loss=0.021]

Epoch 9/10, Loss: 0.0210





Validation Accuracy: 0.9375


  with torch.cuda.amp.autocast():  # Enable mixed precision
Epoch 10/10: 100%|██████████| 163/163 [01:18<00:00,  2.08it/s, loss=0.0197]

Epoch 10/10, Loss: 0.0197





Validation Accuracy: 0.9375
Training complete!


  model.load_state_dict(torch.load(save_path))


Test Accuracy: 0.8109
Final model saved successfully!


In [4]:
from google.colab import files
files.download('/content/pneumonia_classifier_final.pth')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>