In [None]:
import os
import torch
import pandas as pd
from scripts.data_utils import HistopathologyDataModule
from scripts.model_utils import BaselineCNN
from scripts.config import TEST_DIR, BATCH_SIZE, TARGET_SIZE

In [None]:
def generate_predictions_and_submit():
    """
    Main function to load the model, test data, generate predictions,
    and save the results to a CSV file for Kaggle submission.
    """
    print("Initializing data module...")
    # Instantiate data module and prepare the test dataset
    data_module = HistopathologyDataModule(
        batch_size=BATCH_SIZE,
        target_size=TARGET_SIZE,
        test_dir=TEST_DIR
    )
    data_module.setup(stage="test")

    print("Loading trained model checkpoint...")
    # Ensure the checkpoint exists before loading
    model_path = "checkpoints/best_model.ckpt"
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model checkpoint not found at {model_path}")

    model = BaselineCNN.load_from_checkpoint(model_path)
    model.eval()  # Set the model to evaluation mode

    print("Generating predictions...")
    # Generate predictions
    predictions = []
    ids = []
    with torch.no_grad():
        for batch in data_module.test_dataloader():
            images, ids_batch = batch
            outputs = model(images)  # Get model predictions
            preds = (torch.sigmoid(outputs) > 0.5).float()  # Thresholding for binary classification
            predictions.extend(preds.cpu().numpy())
            ids.extend(ids_batch)

    # Create a DataFrame for the submission file
    submission_df = pd.DataFrame({
        "id": ids,
        "label": predictions
    })

    # Save the submission file
    submission_file = "submission.csv"
    submission_df.to_csv(submission_file, index=False)
    print(f"Predictions saved to {submission_file}")

    return submission_file

In [None]:
# Generate predictions and save them to submission file
submission_file = generate_predictions_and_submit()