In [6]:
import os
import pandas as pd
from fpdf import FPDF
from PIL import Image

def generate_pdf(base_path, output_pdf):
    # Initialize the PDF object
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)

    # General list to hold all data
    all_data = []

    # Iterate through each fold folder
    for fold in os.listdir(base_path):
        fold_path = os.path.join(base_path, fold)
        if not os.path.isdir(fold_path):
            continue  # Skip non-folder items
        
        # Path to predictions.parquet
        parquet_file = os.path.join(fold_path, "predictions.parquet")
        heatmaps_folder = os.path.join(fold_path, "heatmaps")

        if not os.path.exists(parquet_file) or not os.path.exists(heatmaps_folder):
            print(f"Missing data in {fold}. Skipping...")
            continue

        # Read the predictions.parquet file
        df = pd.read_parquet(parquet_file)

        # Process each slide
        for _, row in df.iterrows():
            slide = row['slide']
            y_true = row['y_true']
            y_pred0 = row['y_pred0']
            y_pred1 = row['y_pred1']
            y_predicted = 0 if y_pred0 > y_pred1 else 1

            # Find corresponding heatmap image
            heatmap_path = os.path.join(heatmaps_folder, f"{slide}_attn.png")
            if not os.path.exists(heatmap_path):
                print(f"Missing heatmap for slide {slide} in {fold}. Skipping...")
                continue

            # Append data to the general list
            all_data.append((fold, slide, y_true, y_predicted, heatmap_path))

            # Add to PDF
            pdf.add_page()
            pdf.set_font("Arial", size=12)
            pdf.cell(0, 10, f"Fold: {fold}", ln=True)
            pdf.cell(0, 10, f"Slide: {slide}", ln=True)
            pdf.cell(0, 10, f"True Label: {y_true}", ln=True)
            pdf.cell(0, 10, f"Predicted Label: {y_predicted}", ln=True)
            pdf.ln(10)

            # Add heatmap image
            try:
                with Image.open(heatmap_path) as img:
                    img_path = f"/tmp/{slide}.jpg"  # Temporary path for resizing
                    img = img.convert("RGB")
                    #img.thumbnail((190, 190))  # Resize to fit the PDF
                    img.save(img_path, "JPEG")
                    pdf.image(img_path, x=10, y=pdf.get_y(), w=100)
            except Exception as e:
                print(f"Error processing image {heatmap_path}: {e}")
                pdf.cell(0, 10, "Error loading heatmap image.", ln=True)

    # Save the PDF
    pdf.output(output_pdf)
    print(f"PDF generated: {output_pdf}")

# Usage
base_path = "/scratch/project_2003009/he_space_slideflow_cropped/mil/"  # Replace with your directory path
output_pdf = "mil_predictions.pdf"
generate_pdf(base_path, output_pdf)


PDF generated: mil_predictions.pdf
