# AIML Hackathon - Starter Notebook
Use this notebook to generate your first submission (Random Baseline).

In [None]:
import pandas as pd
import random
import os

# 1. Define Paths (Kaggle or Local)
if os.path.exists("/kaggle/input"):
    # NOTE: You may need to update this path based on your Kaggle Dataset name
    DATA_DIR = "/kaggle/input/aiml-hackathon-2526/msmarco_sampled" 
    TEST_FILE = "/kaggle/input/aiml-hackathon-2526/test.csv"
else:
    # Local fallback (assuming you downloaded data)
    DATA_DIR = "msmarco_sampled"
    TEST_FILE = "test.csv"

print(f"Using Data Dir: {DATA_DIR}")

# 2. Check Input
if not os.path.exists(f"{DATA_DIR}/collection.parquet"):
    print(f"WARNING: collection.parquet not found at {DATA_DIR}.")
    print("Please verify the 'Add Input' step in Instructions.")
else:
    # 3. Load Resources
    print("Loading queries and collection...")
    test_queries = pd.read_csv(TEST_FILE)
    collection = pd.read_parquet(f"{DATA_DIR}/collection.parquet")
    all_pids = collection['pid'].astype(str).tolist()
    
    print(f"Loaded {len(test_queries)} queries and {len(all_pids)} passages.")
    
    # 4. Generate Random Rankings
    print("Generating random rankings...")
    results = []
    for qid in test_queries['id']:
        # Randomly sample 10 PIDs from the collection
        ranked_pids = random.sample(all_pids, 10)
        results.append({
            'id': str(qid),
            'expected': " ".join(ranked_pids)
        })
    
    # 5. Save
    os.makedirs("submission_files", exist_ok=True)
    submission = pd.DataFrame(results)
    submission.to_csv("submission_files/submission_random.csv", index=False)
    print("Created submission_files/submission_random.csv with", len(submission), "rows.")
