# Unified VQA4Mix Pipeline

This notebook combines the functionality from all category-specific pipelines (food, painting, people, cat) into a single unified pipeline.

## Import Required Libraries

In [None]:
import sys
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import skimage.io as io
from PIL import Image

# Add the project root to the Python path
sys.path.append('..')

# Import project modules
from src.data_processing.data_loader import load_json_data, load_annotation_data, save_json_data, convert_df_to_json
from src.model.question_generator import generate_random_choice, generate_multiple_choice_question
from src.utils.image_augmentation import load_image, apply_augmentations
from src.utils.evaluation import calculate_accuracy, calculate_accuracy_by_difficulty, plot_accuracy_comparison
from src.visualization.plotting import plot_confusion_matrix, plot_accuracy_by_category

# Set pandas display options
pd.set_option('display.max_colwidth', None)

## Configuration

In [None]:
# Define data paths for each category
DATA_PATHS = {
    'food': '../data/food/food_annotation.json',
    'painting': '../data/painting/paintings.json',
    'people': '../data/people/people_data.json',
    'cat': '../data/cat/upking_data.json'
}

# Define output paths for processed data
OUTPUT_PATHS = {
    'food': '../data/food/food_annotation_with_MCQ_3_difficulies.json',
    'painting': '../data/painting/paintings_with_MCQ_3diff.json',
    'people': '../data/people/people_annotation_with_MCQ_3_difficulies.json',
    'cat': '../data/cat/upking_annotation_with_MCQ_3_difficulies.json'
}

# Define output paths for results
RESULT_PATHS = {
    'food': '../data/food/food_annotation_with_MCQ_result_3_difficulties.json',
    'painting': '../data/painting/paintings_with_MCQ_3diff_result.json',
    'people': '../data/people/people_annotation_with_MCQ_result_3_difficulties.json',
    'cat': '../data/cat/upking_annotation_with_MCQ_result_3_difficulties.json'
}

# Define output paths for augmented results
AUGMENTED_RESULT_PATHS = {
    'food': '../data/food/food_annotation_with_MCQ_result_3_difficulties_with_image_augmentation.json',
    'painting': '../data/painting/paintings_with_MCQ_3diff_result_augmentation.json',
    'people': '../data/people/people_annotation_with_MCQ_result_3_difficulties_with_image_augmentation.json',
    'cat': '../data/cat/upking_annotation_with_MCQ_result_3_difficulties_with_image_augmentation.json'
}

# Define the category to process (set to None to process all categories)
CATEGORY = None  # Options: 'food', 'painting', 'people', 'cat', or None for all

## Load Dataset

In [None]:
def load_category_data(category):
    """Load data for a specific category."""
    file_path = DATA_PATHS[category]
    print(f"Loading {category} data from {file_path}")
    
    try:
        df = load_annotation_data(file_path)
        print(f"Loaded {len(df)} records for {category}")
        return df
    except Exception as e:
        print(f"Error loading {category} data: {e}")
        return None

# Load data for the specified category or all categories
category_data = {}
if CATEGORY is not None:
    category_data[CATEGORY] = load_category_data(CATEGORY)
else:
    for category in DATA_PATHS.keys():
        category_data[category] = load_category_data(category)

## Examine Data Structure

In [None]:
# Display the first row of each category's data
for category, df in category_data.items():
    if df is not None:
        print(f"\n{category.upper()} DATA STRUCTURE:")
        print(f"Columns: {df.columns.tolist()}")
        print(f"Sample row:")
        display(df.head(1))

## Generate Multiple Choice Questions

In [None]:
def generate_mcq_for_category(df, category, sample_size=None):
    """Generate multiple choice questions for a category."""
    print(f"Generating MCQs for {category}...")
    
    # Use a sample if specified
    if sample_size is not None:
        df = df.head(sample_size)
    
    # Generate random choices for each row
    df['multiple_choice_solution'] = df.apply(lambda x: generate_random_choice(), axis=1)
    
    # Determine the caption column name based on the category
    caption_col = 'captions' if 'captions' in df.columns else 'reference_caption'
    
    # Generate questions for each difficulty level
    for level in ['easy', 'medium', 'hard']:
        col_name = f'multiple_choice_question_{level}'
        print(f"Generating {level} questions...")
        
        # Get the first caption if it's a list, otherwise use the caption directly
        df[col_name] = df.apply(
            lambda x: generate_multiple_choice_question(
                x[caption_col][0] if isinstance(x[caption_col], list) else x[caption_col],
                x['multiple_choice_solution'],
                level=level
            ),
            axis=1
        )
    
    # Save the results
    output_path = OUTPUT_PATHS[category]
    convert_df_to_json(df, output_path)
    
    return df

# Generate MCQs for each category
mcq_data = {}
for category, df in category_data.items():
    if df is not None:
        # Use a small sample size for demonstration
        sample_size = 5  # Set to None to process all data
        mcq_data[category] = generate_mcq_for_category(df, category, sample_size)

## Load LLaVA Model for Inference

In [None]:
from src.model.inference import load_llava_model, perform_multiple_choice_task

# Load the LLaVA model
model_path = '/path/to/llava-model'  # Update with the actual model path
try:
    processor, model = load_llava_model(model_path)
    print("LLaVA model loaded successfully")
except Exception as e:
    print(f"Error loading LLaVA model: {e}")
    processor, model = None, None

## Perform Multiple Choice Task

In [None]:
def perform_mcq_task_for_category(df, category, processor, model, use_augmentation=False):
    """Perform multiple choice task for a category."""
    if processor is None or model is None:
        print("Model not loaded. Skipping inference.")
        return df
    
    print(f"Performing MCQ task for {category}...")
    
    # Determine the image path column name based on the category
    img_path_col = 'file_path' if 'file_path' in df.columns else 'img_url'
    
    # Perform inference for each difficulty level
    for level in ['easy', 'medium', 'hard']:
        question_col = f'multiple_choice_question_{level}'
        prediction_col = f'multiple_choice_prediction_{level}'
        
        print(f"Processing {level} questions...")
        
        # Perform inference for each row
        predictions = []
        for _, row in df.iterrows():
            img_path = row[img_path_col]
            question = row[question_col]
            
            # Apply augmentation if specified
            if use_augmentation:
                img = load_image(img_path)
                augmented_img = apply_augmentations(img)
                # Save augmented image to a temporary file
                temp_path = f"temp_augmented_{random.randint(1000, 9999)}.jpg"
                Image.fromarray(augmented_img).save(temp_path)
                img_path = temp_path
            
            # Perform inference
            try:
                answer = perform_multiple_choice_task(processor, model, img_path, question)
                predictions.append(answer)
            except Exception as e:
                print(f"Error performing inference: {e}")
                predictions.append(None)
            
            # Remove temporary file if created
            if use_augmentation and os.path.exists(temp_path):
                os.remove(temp_path)
        
        # Add predictions to the DataFrame
        df[prediction_col] = predictions
    
    # Save the results
    output_path = AUGMENTED_RESULT_PATHS[category] if use_augmentation else RESULT_PATHS[category]
    convert_df_to_json(df, output_path)
    
    return df

# Perform MCQ task for each category
result_data = {}
for category, df in mcq_data.items():
    if df is not None:
        # Set use_augmentation to True to use image augmentation
        use_augmentation = False
        result_data[category] = perform_mcq_task_for_category(df, category, processor, model, use_augmentation)

## Evaluate Results

In [None]:
def evaluate_results_for_category(df, category):
    """Evaluate results for a category."""
    print(f"Evaluating results for {category}...")
    
    # Define prediction columns for each difficulty level
    prediction_cols = {
        'easy': 'multiple_choice_prediction_easy',
        'medium': 'multiple_choice_prediction_medium',
        'hard': 'multiple_choice_prediction_hard'
    }
    
    # Calculate accuracy for each difficulty level
    accuracies = {}
    for level, col in prediction_cols.items():
        if col in df.columns:
            accuracy = calculate_accuracy(df[col], df['multiple_choice_solution'])
            accuracies[level] = accuracy
            print(f"{level.capitalize()} accuracy: {accuracy:.2%}")
    
    # Plot accuracy comparison
    if accuracies:
        fig = plot_accuracy_comparison(accuracies, title=f"{category.capitalize()} - Accuracy by Difficulty")
        plt.show()
    
    return accuracies

# Evaluate results for each category
evaluation_results = {}
for category, df in result_data.items():
    if df is not None:
        evaluation_results[category] = evaluate_results_for_category(df, category)

## Compare Results Across Categories

In [None]:
def compare_results_across_categories(evaluation_results):
    """Compare results across categories."""
    if not evaluation_results:
        print("No evaluation results to compare.")
        return
    
    # Compare results for each difficulty level
    for level in ['easy', 'medium', 'hard']:
        # Collect accuracies for each category
        category_accuracies = {}
        for category, accuracies in evaluation_results.items():
            if level in accuracies:
                category_accuracies[category] = accuracies[level]
        
        # Plot comparison
        if category_accuracies:
            fig = plot_accuracy_comparison(category_accuracies, title=f"{level.capitalize()} Difficulty - Accuracy by Category")
            plt.show()

# Compare results across categories
compare_results_across_categories(evaluation_results)

## Conclusion

This unified pipeline combines the functionality from all category-specific pipelines into a single workflow. It allows for processing data from multiple categories, generating multiple-choice questions, performing inference with the LLaVA model, and evaluating the results.