## Step 1: Clone the Repository

In [None]:
!git clone https://github.com/ab-2109/HMRAG.git
%cd HMRAG

## Step 2: Install Dependencies

In [None]:
# Install required packages
!pip install -q -r requirements.txt

## Step 3: Install and Setup Ollama (Required for LLM)
Note: Ollama needs to run as a service. On Colab, we'll use an alternative approach with HuggingFace models or OpenAI API.

In [None]:
# Option A: Install Ollama (requires background process)
# This is complex on Colab - better to use OpenAI API instead

# Uncomment if you want to try Ollama on Colab:
# !curl -fsSL https://ollama.com/install.sh | sh
# import subprocess
# import time
# # Start Ollama server in background
# ollama_process = subprocess.Popen(['ollama', 'serve'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# time.sleep(5)
# # Pull required models
# !ollama pull qwen2.5:7b
# !ollama pull nomic-embed-text

## Step 4: Setup API Keys

In [None]:
# Set your API keys here
import os
from google.colab import userdata

# Store secrets in Colab's secret manager (left sidebar -> Key icon)
# Then access them like this:
try:
    OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
    SERPER_API_KEY = userdata.get('SERPER_API_KEY')
except:
    # Or set directly (not recommended for production)
    OPENAI_API_KEY = "your-openai-api-key-here"
    SERPER_API_KEY = "your-serper-api-key-here"

print("API keys configured!")

## Step 5: Download ScienceQA Dataset

In [None]:
# Download the ScienceQA dataset
!bash dataset/download_ScienceQA.sh

## Step 6: Create Required Directories

In [None]:
# Create output and working directories
!mkdir -p outputs
!mkdir -p lightrag_workdir

# Check dataset structure
!ls -la dataset/

## Step 7: Run Inference (Small Test)
Start with a small test run (5 examples)

In [None]:
# Run on a small subset first to test
!python3 main.py \
    --data_root ./dataset/ScienceQA/data \
    --image_root ./dataset/ScienceQA/images \
    --output_root ./outputs \
    --caption_file ./dataset/ScienceQA/captions.json \
    --working_dir ./lightrag_workdir \
    --serper_api_key "$SERPER_API_KEY" \
    --openai_key "$OPENAI_API_KEY" \
    --test_split test \
    --test_number 5 \
    --shot_number 0 \
    --label test_run \
    --save_every 5

## Step 8: Run Full Inference
After testing, run on the full dataset

In [None]:
# Full inference run
!python3 main.py \
    --data_root ./dataset/ScienceQA/data \
    --image_root ./dataset/ScienceQA/images \
    --output_root ./outputs \
    --caption_file ./dataset/ScienceQA/captions.json \
    --working_dir ./lightrag_workdir \
    --serper_api_key "$SERPER_API_KEY" \
    --openai_key "$OPENAI_API_KEY" \
    --test_split test \
    --shot_number 2 \
    --label full_run \
    --save_every 50 \
    --use_caption

## Step 9: View Results

In [None]:
# View output files
!ls -lh outputs/

# Load and display results
import json

with open('outputs/test_run_test.json', 'r') as f:
    results = json.load(f)

print(f"Total results: {len(results)}")
print("\nSample results:")
for qid, answer in list(results.items())[:5]:
    print(f"Question ID: {qid}, Answer: {answer}")

## Alternative: Use OpenAI Models Directly
If Ollama setup is difficult, modify the agents to use OpenAI API directly

## Download Results to Local Machine

In [None]:
# Download results
from google.colab import files

# Download the results file
files.download('outputs/test_run_test.json')

# Or zip and download all outputs
!zip -r outputs.zip outputs/
files.download('outputs.zip')