# Whats here
used to eval output jsonL files made by LetsDriftGemma2_x experiments

Can be used while the model is generating the answers.


# SETUP NOTEBOOK (filename.jsonl)
Conda Env lets_drift_env_V1

In [13]:
jsonl_file_name = '60to150USMLE_pirate.jsonl'

### Imports

In [14]:
from rich.console import Console
from rich.table import Table
import json

from ipywidgets import interact, IntSlider, Button, Text, HBox, VBox, Layout
from IPython.display import clear_output
import ipywidgets as widgets


# Complete Table with steered and unsteered answers

In [15]:
def create_results_table(jsonl_file):
    # Create table
    table = Table(title="Model Responses")
    
    # Add columns
    table.add_column("Prompt", style="cyan", no_wrap=False)
    table.add_column("Ref_Answer", style="magenta")
    table.add_column("Unsteered_Answer", style="green", no_wrap=False)
    table.add_column("Steered_Answer_0", style="yellow", no_wrap=False)
    table.add_column("Steered_Answer_1", style="yellow", no_wrap=False)
    
    # Read and parse JSONL file
    with open(jsonl_file, 'r') as file:
        for line in file:
            try:
                # Split the line into separate JSON objects
                json_strings = line.replace('}{', '}\n{').split('\n')
                json_objects = [json.loads(s) for s in json_strings if s.strip()]
                
                if len(json_objects) >= 2:  # We need at least metadata and unsteered response
                    metadata = json_objects[0]
                    unsteered = json_objects[1]
                    
                    # Get steered answers
                    steered_answers = []
                    for i in range(2, len(json_objects)):  # Start from index 2 (after metadata and unsteered)
                        if f'Answer_Steered{i-2}' in json_objects[i]:
                            steered_answers.append(json_objects[i][f'Answer_Steered{i-2}'][0])
                    
                    # Pad steered_answers with "N/A" if needed
                    while len(steered_answers) < 2:
                        steered_answers.append("N/A")
                    
                    # Add all data in one row
                    table.add_row(
                        metadata['Prompt'],
                        metadata['Ref_answer'],
                        unsteered['Answer_No_Steering'][0],
                        *steered_answers
                    )

            except json.JSONDecodeError as e:
                print(f"Error parsing JSON: {e}")
                continue
    
    # Display table
    console = Console()
    console.print(table)



In [16]:
# Use the function
# create_results_table(jsonl_file_name)

# FIND Metadata

In [17]:
def display_metadata(jsonl_file):
    # Create a dictionary to store unique metadata values
    metadata = {
        'Dataset': set(),
        'Model': set(),
        'SAE_release': set(), 
        'SAE_ID': set(),
        'Steered_Latent_ID': set(),
        'Steering_coefficient': set(),
        'Temperature': set(),
        'Frequency_penalty': set(),
        'Verbose': set(),
        'Incremental_temperature': set(),
    }
    
    # Create table for display
    table = Table(title="File Metadata")
    table.add_column("Parameter", style="cyan")
    table.add_column("Values", style="yellow", no_wrap=False)
    
    # Read and parse JSONL file
    with open(jsonl_file, 'r') as file:
        for line in file:
            try:
                # Each line may contain multiple JSON objects concatenated
                # Split them properly
                json_parts = line.strip().split('}{')
                
                # Process only the first part (metadata)
                if json_parts:
                    # Fix the JSON string
                    if len(json_parts) > 1:
                        first_json = json_parts[0] + '}'
                    else:
                        first_json = json_parts[0]
                    
                    # Parse the metadata JSON
                    try:
                        json_obj = json.loads(first_json)
                        
                        # Extract metadata values if they exist
                        for key in metadata.keys():
                            if key in json_obj:
                                # Convert to string and add to set
                                metadata[key].add(str(json_obj[key]))
                                
                    except json.JSONDecodeError as e:
                        print(f"Error parsing metadata JSON: {e}")
                        print(f"Problematic JSON: {first_json[:100]}...")
                        continue
                    
            except Exception as e:
                print(f"Error processing line: {e}")
                continue
    
    # Add rows to table
    for key, values in metadata.items():
        if values:  # Only add row if we found values
            table.add_row(
                key.replace('_', ' ').title(),  # Format key name
                '\n'.join(sorted(values))  # Sort and join values
            )
    
    # Display table
    console = Console()
    console.print(table)

    # Also print raw counts for verification
    print("\nRaw counts:")
    for key, values in metadata.items():
        if values:
            print(f"{key}: {len(values)} unique values")

In [18]:
# Use the function
display_metadata(jsonl_file_name)


Raw counts:
Dataset: 1 unique values
Model: 1 unique values
SAE_release: 1 unique values
SAE_ID: 1 unique values
Steered_Latent_ID: 1 unique values
Steering_coefficient: 1 unique values
Temperature: 1 unique values
Frequency_penalty: 2 unique values
Verbose: 1 unique values
Incremental_temperature: 1 unique values


In [19]:
with open(jsonl_file_name, 'r') as file:
    first_line = file.readline()
    print("First line structure:")
    print(first_line[:200])  # Print first 200 chars to see structure

First line structure:
{"Dataset": "60_150USMLE", "Prompt_ID": 0, "Model": "gemma-2-9b-it", "SAE_release": "gemma-scope-9b-it-res-canonical", "SAE_ID": "layer_9/width_131k/canonical", "Steered_Latent_ID": 29917, "Steering_c


# DISPLAY individual rows with slider

Works while model is generating the answers :)

In [20]:
from ipywidgets import interact, IntSlider
import ipywidgets as widgets
def create_results_table(jsonl_file, start_row=None, end_row=None):
    # Create table
    table = Table(title="Model Responses")
    
    # Add columns
    table.add_column("Row", style="white")
    table.add_column("Prompt", style="cyan", no_wrap=False)
    table.add_column("Ref_Answer", style="magenta")
    table.add_column("Unsteered_Answer", style="green", no_wrap=False)
    table.add_column("Steered_Answer_0", style="yellow", no_wrap=False)
    table.add_column("Steered_Answer_1", style="yellow", no_wrap=False)
    
    # Read and parse JSONL file
    with open(jsonl_file, 'r') as file:
        lines = file.readlines()
        
        # Handle row selection
        if start_row is None:
            start_row = 0
        if end_row is None:
            end_row = len(lines)
        
        # Ensure valid row ranges
        start_row = max(0, start_row)
        end_row = min(len(lines), end_row)
        
        # Process selected rows
        for row_idx, line in enumerate(lines[start_row:end_row], start=start_row):
            try:
                # ... existing JSON parsing code ...
                json_strings = line.replace('}{', '}\n{').split('\n')
                json_objects = [json.loads(s) for s in json_strings if s.strip()]
                
                if len(json_objects) >= 2:
                    metadata = json_objects[0]
                    unsteered = json_objects[1]
                    
                    steered_answers = []
                    for i in range(2, len(json_objects)):
                        if f'Answer_Steered{i-2}' in json_objects[i]:
                            steered_answers.append(json_objects[i][f'Answer_Steered{i-2}'][0])
                    
                    while len(steered_answers) < 2:
                        steered_answers.append("N/A")
                    
                    # Add row number to the table
                    table.add_row(
                        str(row_idx),  # Row number
                        metadata['Prompt'],
                        metadata['Ref_answer'],
                        unsteered['Answer_No_Steering'][0],
                        *steered_answers
                    )

            except json.JSONDecodeError as e:
                print(f"Error parsing JSON in row {row_idx}: {e}")
                continue
    
    console = Console()
    console.print(table)

# Example usage:
# Show all rows
#create_results_table('output.jsonl')
# Show rows 5 to 10
# create_results_table('output.jsonl', start_row=5, end_row=10)
# Show first 5 rows
# create_results_table('output.jsonl', end_row=5)




In [21]:
# Get total number of lines in the file
with open(jsonl_file_name, 'r') as file:
    total_rows = sum(1 for line in file)

@interact(
    start_row=IntSlider(min=0, max=total_rows-1, step=1, value=0, description='Start Row:'),
    end_row=IntSlider(min=1, max=total_rows, step=1, value=min(5, total_rows), description='End Row:')
)
def display_table(start_row, end_row):
    if start_row >= end_row:
        print("Start row must be less than end row")
        return
    create_results_table(jsonl_file_name, start_row=start_row, end_row=end_row)

# The widget will appear automatically when you run this cell

interactive(children=(IntSlider(value=0, description='Start Row:', max=89), IntSlider(value=5, description='En…

# Dynamic Eval Table - manually say if the answer was correct or not


In [22]:
from IPython.display import clear_output

In [23]:
class EvaluationTool:
    def __init__(self, jsonl_file):
        self.jsonl_file = jsonl_file
        
        # Read all lines and find first unevaluated row
        with open(jsonl_file, 'r') as file:
            self.lines = file.readlines()
            self.total_rows = len(self.lines)
            self.current_row = self.find_next_unevaluated_row(0)
        
        # Create widgets
        self.matrix_inputs = [
            widgets.Text(
                value='', 
                placeholder='Enter 1 or 0', 
                description=f'{"Unsteered" if i==0 else f"Steered {i-1}"}:'
            )
            for i in range(5)  # Max 5 possible answers
        ]
        
        self.next_button = Button(description='Save & Next')
        self.next_button.on_click(self.save_and_next)
        
        # Display initial row if there are unevaluated rows
        if self.current_row is not None:
            self.display_current_row()
        else:
            print("All rows have been evaluated!")

    @classmethod
    def clean_answer(cls, answer, prompt):
        # Remove the prompt and any common formatting
        answer = answer.replace('<bos>', '').replace('<eos>', '').replace('<end_of_turn>', '')
        
        # Remove the prompt if it appears at the start of the answer
        if answer.strip().startswith(prompt):
            answer = answer[len(prompt):].strip()
        
        # Clean up any leftover whitespace/newlines
        answer = ' '.join(answer.split())
        return answer

    def find_next_unevaluated_row(self, start_from):
        for idx in range(start_from, self.total_rows):
            line = self.lines[idx]
            if not any('"evaluation":' in part for part in line.split('}{')): 
                return idx
        return None
    
    def display_current_row(self):
        # Clear previous output
        from IPython.display import clear_output
        clear_output(wait=True)
        
        if self.current_row is None:
            print("All rows have been evaluated!")
            return
        
        # Create and display table
        table = Table(title=f"Row {self.current_row} Evaluation")
        table.add_column("Type", style="cyan")
        table.add_column("Content", style="white", no_wrap=False)
        
        try:
            # Parse current row
            json_strings = self.lines[self.current_row].replace('}{', '}\n{').split('\n')
            json_objects = [json.loads(s) for s in json_strings if s.strip()]
            
            if len(json_objects) >= 2:
                metadata = json_objects[0]
                unsteered = json_objects[1]
                prompt = metadata['Prompt']
                
                # Display prompt and reference
                table.add_row("Prompt", prompt)
                table.add_row("Reference Answer", metadata['Ref_answer'], style="bright_red")  # Apply style to the entire row
                table.add_row("Unsteered Answer", EvaluationTool.clean_answer(unsteered['Answer_No_Steering'][0], prompt))
                
                # Display steered answers
                for i in range(2, len(json_objects)):
                    if f'Answer_Steered{i-2}' in json_objects[i]:
                        answer = json_objects[i][f'Answer_Steered{i-2}'][0]
                        table.add_row(
                            f"Steered Answer {i-2}", 
                            EvaluationTool.clean_answer(answer, prompt)
                        )
            
            # Display table
            console = Console()
            console.print(table)
            
            # Update visible matrix inputs based on number of answers
            num_answers = len(json_objects) - 1  # -1 for metadata
            for i, input_widget in enumerate(self.matrix_inputs):
                input_widget.layout.display = 'none' if i >= num_answers else 'flex'
            
            # Display widgets
            display(VBox([
                widgets.HTML(value=f"<b>Row {self.current_row}: Enter 1 for correct, 0 for incorrect:</b>"),
                *self.matrix_inputs,
                self.next_button
            ]))
            
        except json.JSONDecodeError as e:
            print(f"Error parsing JSON in row {self.current_row}: {e}")
    
    def save_and_next(self, b):
        # Collect evaluations
        evaluations = [inp.value for inp in self.matrix_inputs if inp.layout.display != 'none']
        
        # Validate inputs
        if not all(ev in ['0', '1'] for ev in evaluations if ev):
            print("Error: Please enter only 0 or 1 for each answer")
            return
        
        # Create evaluation object
        eval_obj = {
            "evaluation": evaluations
        }
        
        # Append evaluation to the line
        lines = self.lines
        lines[self.current_row] = lines[self.current_row].strip() + json.dumps(eval_obj) + '\n'
        
        # Write back to file
        with open(self.jsonl_file, 'w') as file:
            file.writelines(lines)
        
        # Find next unevaluated row
        self.current_row = self.find_next_unevaluated_row(self.current_row + 1)
        
        # Clear input values
        for inp in self.matrix_inputs:
            inp.value = ''
        
        # Display next row or completion message
        if self.current_row is not None:
            self.display_current_row()
        else:            
            clear_output(wait=True)
            print("Evaluation complete! All rows have been evaluated.")


In [24]:
# Create and start the evaluation tool
evaluator = EvaluationTool(jsonl_file_name)

VBox(children=(HTML(value='<b>Row 0: Enter 1 for correct, 0 for incorrect:</b>'), Text(value='', description='…

#### 0to60_HOTPOT_pirate notes
Seems that the ' Be Concise. ' makes it not answer, when it's not sure? Quite often it just says 'endofturn'
--> Remove?
here and there the token constraint is still cutting it off from actually answering!!
It goes extremely off topic about pirates, compared to the USMLE questionnaire. Is that good or bad? 
Idea...: alter both the steering coeff and latent_idx a bit more.. will require way more prompts (and their (manual) evaluation ZZZzz)
For HOTPOT: REDUCE pirate-ness
- For example Row 41: (maybe 3-4 others as well) about the SS Standarte: The question has spelling mistake(s) or is formulated confusingly (misinterpretable)

quite many funny answers/changes of the answer...:
┌──────────────────┬──────────────────────────────────────────────────────────┐
│ Type             │ Content                                                  │
├──────────────────┼──────────────────────────────────────────────────────────┤
│ Prompt           │ The Album Against the Wind was the 11th Album of a Rock  │
│                  │ singer Robert C Seger born may 6 1945. What was the Rock │
│                  │ singers stage name ? Be concise.                         │
│ Reference Answer │ Bob Seger                                                │
│ Unsteered Answer │ Bob Seger                                                │
│ Steered Answer 0 │                                                          │
│ Steered Answer 1 │ The stage name is **Captain Robert C. Seger**.           │
└──────────────────┴──────────────────────────────────────────────────────────┘

So, for the next run:
- new latent
- slightly lower steering coeff
- more prompts
- don't make it concise
- more tokens

#### 60_USMLE_pirate notes
- no real pirate-ness in most. But it is visible, in 1/50ish cases. For example, Row 50
│ Unsteered Answer │ The answer is (D) Tracheal collapse                      │
│ Steered Answer 0 │ The answer is **(A) Bronchospasm** Here's my reasoning:  │
│                  │ * **Harsh cough, blood pressure, vibratory sound:**      │
│                  │ These all point towards a pirate with a history of       │
│                  │ buccaneering and                                         │
│ Steered Answer 1 │ The answer is **(A) Bronchospasm** Here's my reasoning:  │
│                  │ * **Harsh cough, blood pressure, vibratory sound:**      │
│                  │ These point towards a pirate life and possible heart     │
│                  │ disease. * **                                            │
- sometimes, steered answer is too long - don't see the actual letter it chose, but the reasoning may have been sound. 
-- !! NOT ALLOWED TO TAKE THOSE INTO EVAL of output differences later .. -- so marked 'unsteered' as FALSE (didn't do it for the first few...)
- but still, around 5-10 cases, where unsteered is correct and steered is not. (with unsteered answer fitting into token constraint)
- row 6 for example; sometimes both are wrong but different answers steered/unsteered?!
- Row 39: Steered answers correct, unsteered incorrect - random luck, perhaps
-- Next Steps: 
max tokens to 1300 instead of 500 -- kinda useless when it's often only a third of the answer!
Repeat examination for next 60 Qs
steer MORE go from 140 to 180ish?
- turn DOWN freq penalty!!

#### Some notes about the HotPot QA Dataset.. 
some (rare) questions were really hard or maybe even non-sensical.
Google could also not answer this one, for example:
Who was known by his stage name Aladin and helped        │
│                  │ organizations improve their performance as a consultant?
-> Real answer: Eenasul Fateh (not that known!)

some answers by the pirate version were well off topic but quite amazing; for example 'row 10:'
│ Steered Answer 1 │ <bos>What is the name of the fight song of the           │
│                  │ university whose main campus is in Lawrence, Kansas and  │
│                  │ whose branch campuses are in the Kansas City             │
│                  │ metropolitan area?                                       │
│                  │                                                          │
│                  │ This would be the **University of Kansas**.              │
│                  │                                                          │
│                  │ Their fight song is **"The Pirate's Life"**.             │
│                  │                                                          │
│                  │                                                          │
│                  │ Let me know if you have any other questions about        │
│                  │ pirates, ships, or anything else!                        │
│                  │ <end_of_turn><eos>                                       │

Here, the answer of the pirate wasn't concise enough: better not to take into evaluation? in other cases, neither was the unsteered output. more than 10% were like this because it tried to explain it's reasoning.. **PECULIARITY OF THE HOTPOTQA DATASET?**
┌──────────────────┬──────────────────────────────────────────────────────────┐
│ Type             │ Content                                                  │
├──────────────────┼──────────────────────────────────────────────────────────┤
│ Prompt           │ Who is older, Annie Morton or Terry Richardson?          │
│ Reference Answer │ Terry Richardson                                         │
│ Unsteered Answer │ <bos>Who is older, Annie Morton or Terry Richardson?     │
│                  │                                                          │
│                  │ Terry Richardson is older than Annie Morton.             │
│                  │                                                          │
│                  │ Terry Richardson was born on 1965-01-14, while Annie     │
│                  │ Morton's birthdate is not publicly available.            │
│                  │ <end_of_turn><eos>                                       │
│ Steered Answer 0 │ <bos>Who is older, Annie Morton or Terry Richardson?     │
│                  │                                                          │
│                  │ This is a tricky one!                                    │
│                  │                                                          │
│                  │ * **Annie Bonny** (often confused with Annie Morton) was │
│                  │ a real pirate who sailed in the early 1700s.  She was    │
│                  │ known for her fierce fighting spirit and her             │
│                  │ relationship                                             │
│ Steered Answer 1 │ <bos>Who is older, Annie Morton or Terry Richardson?     │
│                  │                                                          │
│                  │ This is a fun question!                                  │
│                  │                                                          │
│                  │ * **Annie Bonny** (often confused with Annie Morton) was │
│                  │ a real pirate who sailed in the early 1700s. She was     │
│                  │ known for her fierce fighting spirit and her cunning.    │
└──────────────────┴──────────────────────────────────────────────────────────┘

Super weird behaviour here; the unsteered answer is 100% accurate but then for the steered answers, name correct, year and opera names FALSE. From here on I decided to evaluate that it got the right answer. Not examining the rest of the text - too much work. In the future, the model will be told to be concise!
                               Row 17 Evaluation                               
┌──────────────────┬──────────────────────────────────────────────────────────┐
│ Type             │ Content                                                  │
├──────────────────┼──────────────────────────────────────────────────────────┤
│ Prompt           │ Are Giuseppe Verdi and Ambroise Thomas both Opera        │
│                  │ composers ?                                              │
│ Reference Answer │ yes                                                      │
│ Unsteered Answer │ <bos>Are Giuseppe Verdi and Ambroise Thomas both Opera   │
│                  │ composers ?                                              │
│                  │                                                          │
│                  │ Yes, both Giuseppe Verdi and Ambroise Thomas were        │
│                  │ renowned opera composers.                                │
│                  │                                                          │
│                  │ * **Giuseppe Verdi** was an Italian composer of the      │
│                  │ Romantic era, best known for operas like *Rigoletto*,    │
│                  │ *La traviata*, *Aida                                     │
│ Steered Answer 0 │ <bos>Are Giuseppe Verdi and Ambroise Thomas both Opera   │
│                  │ composers ?                                              │
│                  │                                                          │
│                  │ Yes, both Giuseppe Verdi and Ambroise Thomas were famous │
│                  │ opera composers.                                         │
│                  │                                                          │
│                  │ * **Giuseppe Verdi** (1791-1862) was an Italian composer │
│                  │ known for operas like *The Barber of Seville*, *The      │
│                  │ Tempest                                                  │
│ Steered Answer 1 │ <bos>Are Giuseppe Verdi and Ambroise Thomas both Opera   │
│                  │ composers ?                                              │
│                  │                                                          │
│                  │ Yes, both Giuseppe Verdi and Ambroise Thomas were famous │
│                  │ opera composers.                                         │
│                  │                                                          │
│                  │ * **Giuseppe Verdi** (1791-1855) was an Italian composer │
│                  │ known for his dramatic operas like *The Barber of        │
│                  │ Seville*, *                                              │
└──────────────────┴──────────────────────────────────────────────────────────┘

also interesting...: sometimes the answers change a lot from the steering, sometimes barely. It seems to depend on the concept that is being talked about..? 
#### IDea for eval:
-- group these 'answering types' and then see what the max_activating_features of those groups are -- are they similar??

### Ideas for next run with HOTPOT QA:
- make the answers concise! by slightly changing prompt. "Be precise."
- use another latent as well to compare results..
- higher amount of output tokens


