In [1]:
import polars as pl
from ipywidgets import interact, IntSlider
from IPython.display import display, HTML
import base64
import html

# Load the DataFrame from the Parquet file
df = pl.read_parquet('/home/jkp/Téléchargements/zerobench_subquestions-00000-of-00001.parquet')

def get_mime_type(path):
    """
    Determine the MIME type based on the file extension in the path.
    
    Args:
        path (str): The file path or identifier of the image.
    
    Returns:
        str: The corresponding MIME type (e.g., 'image/png').
    """
    if path.lower().endswith('.png'):
        return 'image/png'
    elif path.lower().endswith('.jpg') or path.lower().endswith('.jpeg'):
        return 'image/jpeg'
    elif path.lower().endswith('.gif'):
        return 'image/gif'
    else:
        return 'image/png'  # Default to PNG if unknown

def display_task(row):
    """
    Display a single task's details, including text and images.
    
    Args:
        row (dict): A dictionary representing a row from the DataFrame.
    """
    # Escape text fields to prevent HTML injection
    question_id = html.escape(str(row['question_id']))
    question_text = html.escape(row['question_text'])
    question_answer = html.escape(row['question_answer'])
    
    # Build HTML content for text
    html_content = f"<h3>Question ID: {question_id}</h3>"
    html_content += f"<p><strong>Question Text:</strong> {question_text}</p>"
    html_content += f"<p><strong>Answer:</strong> {question_answer}</p>"
    
    # Handle images
    images = row['question_images_decoded']
    if images and len(images) > 0:
        html_content += "<div style='display: flex; flex-wrap: wrap;'>"
        for img in images:
            image_data = img['bytes']
            path = img['path']
            mime_type = get_mime_type(path)
            img_base64 = base64.b64encode(image_data).decode('utf-8')
            html_content += f"<img src='data:{mime_type};base64,{img_base64}' style='width: 300px; height: auto; margin: 10px;' />"
        html_content += "</div>"
    else:
        html_content += "<p>No images available for this task.</p>"
    
    # Display the HTML
    display(HTML(html_content))

def display_task_by_index(index):
    """
    Fetch a row by index and display its task.
    
    Args:
        index (int): The index of the row to display.
    """
    row = df.row(index, named=True)
    display_task(row)

# Create a slider for navigation
slider = IntSlider(
    min=0,
    max=len(df) - 1,
    step=1,
    value=0,
    description='Task Index'
)

# Link the slider to the display function
interact(display_task_by_index, index=slider)

interactive(children=(IntSlider(value=0, description='Task Index', max=333), Output()), _dom_classes=('widget-…

<function __main__.display_task_by_index(index)>