# Running KiVA Benchmark with LLaVA (Memory Optimized)

This notebook runs the KiVA benchmark using LLaVA model with memory optimizations in Google Colab.

First, make sure you're using a GPU runtime:
- Runtime > Change runtime type > GPU

In [None]:
# Check if GPU is available
!nvidia-smi

import torch
print(f"\nTorch CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name()}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## 1. Setup and Dependencies

Install required packages and clone the repository

In [None]:
# Install required packages
!pip install torch torchvision transformers pillow pandas
!pip install bitsandbytes accelerate

# Clone repository
!git clone https://github.com/VHKoisa/kiva-challenge.git
%cd kiva-challenge

## 2. Memory Management Setup

Configure memory management settings for optimal performance

In [None]:
import os
import gc
import torch

# Configure PyTorch memory management
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
os.environ['TRANSFORMERS_OFFLINE'] = '1'

def cleanup_memory():
    """Clean up GPU memory"""
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        
cleanup_memory()

## 3. Run KiVA Benchmark

Run the benchmark with memory-optimized settings

In [None]:
# Parameters for the run
concept = "2DRotation"  # Options: 2DRotation, Colour, Resize, Reflect, Counting
max_trials = 5
max_regenerations = 1
batch_size = 1

# Run the benchmark
!python chat_systems/chat_system_single_image_kiva_reduced.py \
    --model llava \
    --concept {concept} \
    --max_trials {max_trials} \
    --max_regenerations {max_regenerations} \
    --batch_size {batch_size}

## 4. View Results Side by Side

Display and compare the results

In [None]:
import matplotlib.pyplot as plt
from PIL import Image
import glob

def display_images_side_by_side(img_paths, titles=None, figsize=(15, 5)):
    """Display multiple images side by side"""
    n = len(img_paths)
    fig, axes = plt.subplots(1, n, figsize=figsize)
    if n == 1:
        axes = [axes]
    
    for i, (ax, img_path) in enumerate(zip(axes, img_paths)):
        img = Image.open(img_path)
        ax.imshow(img)
        ax.axis('off')
        if titles and i < len(titles):
            ax.set_title(titles[i])
    
    plt.tight_layout()
    plt.show()

# Get the results directory
results_dir = f"output/single_image/output_llava/{concept}/{concept}_stitch"
image_files = sorted(glob.glob(f"{results_dir}/*.jpg"))

if image_files:
    print(f"Found {len(image_files)} result images")
    # Display first result
    display_images_side_by_side([image_files[0]], ["Sample Result"])
else:
    print("No result images found")

## 5. Analyze Performance

View accuracy and performance metrics

In [None]:
import pandas as pd

# Load results CSV
results_file = f"output/single_image/output_llava/{concept}/{concept}+90.csv"
if os.path.exists(results_file):
    df = pd.read_csv(results_file)
    
    # Calculate accuracy
    accuracy = {
        'Cross-Domain': (df['MCResponse#1'] == '1').mean(),
        'Within-Domain': (df['MCResponse#2'] == '1').mean(),
        'Extrapolation': (df['MCResponse#3'] == '1').mean()
    }
    
    print("Accuracy Results:")
    for task, acc in accuracy.items():
        print(f"{task}: {acc:.2%}")
else:
    print("No results file found")

## 6. Memory Cleanup

Clean up resources after running

In [None]:
# Final cleanup
cleanup_memory()

# Print final memory status
if torch.cuda.is_available():
    print(f"Available GPU memory: {torch.cuda.memory_allocated() / 1e9:.2f} GB")