In [None]:
import json
import os
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output, Image, FileLink
from pathlib import Path
import random  # Imported for random assignments

# -------------------------------
# 1. Load Prompts and GIF Paths
# -------------------------------

# Define paths
PROMPTS_FILE = "Text_Prompts.json"
GENERATED_GIFS_DIR = "Generated_GIFs"
RESULTS_FILE = "AB_Test_Results.csv"

# Load prompts from JSON file
if not os.path.exists(PROMPTS_FILE):
    print(f"Prompts file '{PROMPTS_FILE}' not found.")
    evaluation_tasks = []
else:
    with open(PROMPTS_FILE, 'r') as file:
        data = json.load(file)

    # Prepare list of evaluation tasks
    evaluation_tasks = []

    for theme in data.get("Themes", []):
        theme_name_original = theme.get("Name", "Unnamed_Theme")
        # Replace spaces with underscores for filename consistency
        theme_name_sanitized = theme_name_original.replace(" ", "_")
        
        for idx, prompt in enumerate(theme.get("Prompts", []), 1):
            # Incorporate model name into the filename
            filename_a = f"{theme_name_sanitized}_lightning_pipe_Prompt{idx}.gif"
            filename_b = f"{theme_name_sanitized}_original_pipe_Prompt{idx}.gif"
            
            # Construct the full paths to the GIFs
            model_a_path = os.path.join(GENERATED_GIFS_DIR, theme_name_sanitized, "lightning_pipe", filename_a)
            model_b_path = os.path.join(GENERATED_GIFS_DIR, theme_name_sanitized, "original_pipe", filename_b)
            
            # Check if both GIFs exist
            if os.path.exists(model_a_path) and os.path.exists(model_b_path):
                evaluation_tasks.append({
                    "Theme": theme_name_original,  # Use original name for display
                    "Prompt": prompt,
                    "Model_A": model_a_path,
                    "Model_B": model_b_path
                })
            else:
                print(f"Missing GIFs for '{theme_name_original}' Prompt {idx}:")
                if not os.path.exists(model_a_path):
                    print(f"  - Model A: {model_a_path}")
                if not os.path.exists(model_b_path):
                    print(f"  - Model B: {model_b_path}")

    # Debugging: Print the number of evaluation tasks
    print(f"Total Evaluation Tasks Loaded: {len(evaluation_tasks)}")


# -------------------------------
# 2. Define Evaluation Criteria
# -------------------------------

criteria = {
        "Video_Text_Relevance": ["Left is better", "Right is better", "Indistinguishable"],
        "Appearance_Distortion": ["Left is better", "Right is better", "Indistinguishable"],
        "Appearance_Aesthetics": ["Left is better", "Right is better", "Indistinguishable"],
        "Motion_Naturalness": ["Left is better", "Right is better", "Indistinguishable"],
        "Motion_Amplitude": ["Left is better", "Right is better", "Indistinguishable"],
        "Overall_Quality": ["Left is better", "Right is better", "Indistinguishable"]
    }

# -------------------------------
# 3. Create Interactive Widgets
# -------------------------------

# Initialize or load existing responses
if os.path.exists(RESULTS_FILE):
    df_responses = pd.read_csv(RESULTS_FILE)
else:
    # Initialize DataFrame with expanded columns for selections and models
    columns = ["Theme", "Prompt"]
    for criterion in criteria.keys():
        columns.append(f"{criterion}_Selection")
        columns.append(f"{criterion}_Model")
    columns.append("Selected_Model")
    
    df_responses = pd.DataFrame(columns=columns)
    
# Initialize evaluation index
if 'current_task' not in globals():
    current_task = 0

def show_evaluation(task):
    clear_output(wait=True)
    
    theme = task['Theme']
    prompt = task['Prompt']
    model_a = task['Model_A']
    model_b = task['Model_B']
    
    # Randomly assign models to left and right
    sides = ['Left', 'Right']
    random.shuffle(sides)
    side_assignment = {
        sides[0]: model_a,
        sides[1]: model_b
    }
    left_model = side_assignment['Left']
    right_model = side_assignment['Right']
    
    # Display Theme and Prompt
    display(widgets.HTML(f"<h2>Theme: {theme}</h2>"))
    display(widgets.HTML(f"<h4>Prompt: {prompt}</h4>"))
    
    # Display GIFs side by side with labels "Left" and "Right"
    try:
        with open(left_model, "rb") as fa:
            gif_a = fa.read()
        with open(right_model, "rb") as fb:
            gif_b = fb.read()
    except Exception as e:
        print(f"Error loading GIFs: {e}")
        return
    
    box = widgets.HBox([
        widgets.VBox([
            widgets.Label("Left"),
            widgets.Image(value=gif_a, format='gif', width=300)
        ]),
        widgets.VBox([
            widgets.Label("Right"),
            widgets.Image(value=gif_b, format='gif', width=300)
        ])
    ])
    display(box)
    
    # Create radio buttons for each criterion with updated options
    response_widgets = {}
    for criterion, options in criteria.items():
        radio = widgets.RadioButtons(
            options=options,
            description=f"{criterion.replace('_', ' ')}:",
            disabled=False
        )
        response_widgets[criterion] = radio
        display(radio)
    
    # Submit button
    submit_button = widgets.Button(description="Submit", button_style='success')
    display(submit_button)
    
    def on_submit(b):
        global current_task, df_responses
        responses = {}
        models = {}
        
        for criterion, widget in response_widgets.items():
            selection = widget.value
            responses[f"{criterion}_Selection"] = selection
            
            # Map selection to model based on current assignment
            if selection == "Left is better":
                selected_model = "A" if left_model == task['Model_A'] else "B"
            elif selection == "Right is better":
                selected_model = "A" if right_model == task['Model_A'] else "B"
            else:
                selected_model = "Indistinguishable"
            
            models[f"{criterion}_Model"] = selected_model
        
        # Determine overall selected model based on majority
        selection_counts = {"A": 0, "B": 0, "Indistinguishable": 0}
        for model_selection in models.values():
            selection_counts[model_selection] += 1
        
        if selection_counts["A"] > selection_counts["B"] and selection_counts["A"] > selection_counts["Indistinguishable"]:
            overall_selected_model = "A"
        elif selection_counts["B"] > selection_counts["A"] and selection_counts["B"] > selection_counts["Indistinguishable"]:
            overall_selected_model = "B"
        elif selection_counts["Indistinguishable"] > selection_counts["A"] and selection_counts["Indistinguishable"] > selection_counts["B"]:
            overall_selected_model = "Indistinguishable"
        else:
            overall_selected_model = "Indistinguishable"
        
        # Prepare new response entry
        new_entry = {
            "Theme": theme,
            "Prompt": prompt,
            "Selected_Model": overall_selected_model
        }
        new_entry.update(responses)
        new_entry.update(models)
        
        # Append to DataFrame
        new_response = pd.DataFrame([new_entry])
        df_responses = pd.concat([df_responses, new_response], ignore_index=True)
        
        # Save to CSV
        try:
            df_responses.to_csv(RESULTS_FILE, index=False)
            print(f"Response saved for Theme: '{theme}', Prompt: '{prompt}'")
        except Exception as e:
            print(f"Error saving response: {e}")
        
        # Move to next task
        current_task += 1
        
        if current_task < len(evaluation_tasks):
            show_evaluation(evaluation_tasks[current_task])
        else:
            show_completion()
    
    submit_button.on_click(on_submit)

def show_completion():
    clear_output(wait=True)
    display(widgets.HTML("<h2>Evaluation Completed</h2>"))
    display(widgets.HTML("<p>Thank you for completing the A-B test evaluation!</p>"))
    
    # Download button for results
    download_button = widgets.Button(
        description="Download Results",
        button_style='info'
    )
    display(download_button)
    
    def on_download(b):
        display(FileLink(RESULTS_FILE, result_html_prefix="Click here to download the results: "))
    
    download_button.on_click(on_download)

# -------------------------------
# 4. Start Evaluation
# -------------------------------

if len(evaluation_tasks) == 0:
    print("No evaluation tasks found. Please ensure that 'Text_Prompts.json' is populated with prompts and corresponding GIFs exist.")
elif current_task < len(evaluation_tasks):
    show_evaluation(evaluation_tasks[current_task])
else:
    show_completion()

HTML(value='<h2>Evaluation Completed</h2>')

HTML(value='<p>Thank you for completing the A-B test evaluation!</p>')

Button(button_style='info', description='Download Results', style=ButtonStyle())