# Notebook to generate predictions for Histopathology Cancer Detection

In [None]:
# Import libraries
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from tqdm import tqdm

In [None]:
# Clone the GitHub repository to access utility scripts
!git clone https://github.com/astoreyai/Histopathology-Cancer-Detection.git
from Histopathology-Cancer-Detection.scripts.data_utils import HistopathologyTestDataset
from Histopathology-Cancer-Detection.scripts.model_utils import BaselineCNN
from Histopathology-Cancer-Detection.scripts.train_utils import generate_predictions
from Histopathology-Cancer-Detection.scripts.config import TEST_DIR, TARGET_SIZE, BATCH_SIZE

In [None]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Define test data transformations
test_transform = transforms.Compose([
    transforms.Resize(TARGET_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
# Load the saved model
model = BaselineCNN().to(device)
model.load_state_dict(torch.load("baseline_cnn.pth"))
model.eval()

In [None]:
# Prepare the test dataset and DataLoader
test_dataset = HistopathologyTestDataset(img_dir=TEST_DIR, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

In [None]:
# Generate predictions on the test set
img_ids, preds = generate_predictions(model, test_loader, device, threshold=0.5)

In [None]:
# Prepare the submission DataFrame
submission_df = pd.DataFrame({
    "id": img_ids,
    "label": preds
})
submission_df.to_csv("submission.csv", index=False)
print("Submission file saved as submission.csv")