In [9]:
import json
import re
import ipywidgets as widgets
from IPython.display import display, clear_output

# -------------------------------
# Configuration: set your JSONL file path here
jsonl_file = "smiles2gen.jsonl"  # <-- Change this to your file's path
# -------------------------------

# Parse the JSONL file content into a list of dictionaries
def parse_jsonl(file_content):
    data = []
    for line in file_content.splitlines():
        if line.strip():
            data.append(json.loads(line))
    return data

def extract_guess(completion):
    """
    Extracts the content inside <answer>...</answer> tags.
    Returns the extracted string or None if no match is found.
    """
    match = re.search(r'<answer>(.*?)</answer>', completion, re.DOTALL)
    if match:
        return match.group(1).strip()  # Strip whitespace to clean up the extracted answer
    return None

# Create the interactive viewer widget given the parsed data list
def create_viewer(data):
    index_slider = widgets.IntSlider(
        min=0, max=len(data)-1, step=1, description="Index:", continuous_update=False
    )
    
    output = widgets.Output()
    
    def update_view(change):
        with output:
            clear_output()
            i = index_slider.value
            item = data[i]
            
            # Extract the guess and compare with ground truth to choose a color
            guess = extract_guess(item.get("example_completion", ""))
            ground_truth = item.get("example_ground_truth", None)
            if guess is not None and ground_truth is not None:
                color = 'lightgreen' if guess == ground_truth else 'salmon'
            else:
                color = 'lightgray'
            
            # Display the index and ground truth
            print(f"Index: {i}")
            print(f"Ground Truth: {ground_truth}")
            
            # Display the image using the provided image path
            image_path = item.get("example_image_path", "")
            try:
                with open("synthetic/indigo_resize/" + image_path, "rb") as f:
                    image_data = f.read()
                image_widget = widgets.Image(value=image_data)
            except Exception as e:
                image_widget = widgets.Label(value=f"Image not found: {image_path}")
            display(image_widget)
            
            # Display the full completion text
            completion_text = item.get("example_completion", "")
            display(widgets.HTML(value=f"<h4>Completion:</h4><p>{completion_text}</p>"))
            
            # Display our guess in a colored box
            guess_str = str(guess) if guess is not None else "N/A"
            guess_label = widgets.Label(value="Our Guess: " + guess_str)
            guess_box = widgets.Box([guess_label])
            guess_box.layout = widgets.Layout(
                border=f'3px solid {color}', padding='5px', margin='5px'
            )
            display(guess_box)
            
            # Create a small table displaying additional statistics
            stats_keys = ["loss", "grad_norm", "learning_rate", "reward", "completion_length", "kl", "epoch"]
            table_rows = ""
            for key in stats_keys:
                if key in item:
                    table_rows += f"<tr><td style='padding:4px; border: 1px solid gray;'>{key}</td><td style='padding:4px; border: 1px solid gray;'>{item[key]}</td></tr>"
            table_html = f"""
            <h4>Statistics</h4>
            <table style='border-collapse: collapse;'>
                <tbody>{table_rows}</tbody>
            </table>
            """
            display(widgets.HTML(value=table_html))
    
    # Update the viewer when the slider value changes
    index_slider.observe(update_view, names='value')
    update_view(None)
    
    return widgets.VBox([index_slider, output])

# Load and parse the JSONL file
with open(jsonl_file, "r") as f:
    content = f.read()
data = parse_jsonl(content)

# Create and display the viewer widget
viewer = create_viewer(data)
display(viewer)


VBox(children=(IntSlider(value=0, continuous_update=False, description='Index:', max=1703), Output()))