## Run this cell below after running `post_vmlu.py`

In [19]:
import json
import csv
import sys

def jsonl_to_csv(jsonl_file_path, csv_file_path):
    """
    Converts a JSONL file to a CSV file.

    Args:
        jsonl_file_path (str): The path to the input JSONL file.
        csv_file_path (str): The path to the output CSV file.
    """
    try:
        # Step 1: Read the JSONL file and collect all objects
        data = []
        with open(jsonl_file_path, 'r', encoding='utf-8') as f_jsonl:
            for line in f_jsonl:
                # Skip empty lines to avoid errors
                if line.strip():
                    data.append(json.loads(line))

        if not data:
            print("The JSONL file is empty or contains no valid JSON objects.")
            return

        # Step 2: Extract column headers from the keys of the first JSON object
        # We assume all objects have the same keys for a clean CSV
        headers = list(data[0].keys())

        # Step 3: Write the data to the CSV file
        with open(csv_file_path, 'w', newline='', encoding='utf-8') as f_csv:
            writer = csv.DictWriter(f_csv, fieldnames=headers)
            
            # Write the header row
            writer.writeheader()
            
            # Write the data rows
            writer.writerows(data)
            
        print(f"Successfully converted '{jsonl_file_path}' to '{csv_file_path}'. 🎉")

    except FileNotFoundError:
        print(f"Error: The file '{jsonl_file_path}' was not found.")
        sys.exit(1)
    except json.JSONDecodeError as e:
        print(f"Error: Could not decode JSON from the file. Check for malformed JSON objects.")
        print(f"Details: {e}")
        sys.exit(1)
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        sys.exit(1)

# Example usage:
if __name__ == "__main__":

    # Define the file paths
    input_file = '/home/sora/llm/moe/output/deepseek_pre/final_result.jsonl'
    output_file = '/home/sora/llm/moe/output/deepseek_pre/final_result.csv'
    
    # Run the conversion
    jsonl_to_csv(input_file, output_file)

Successfully converted '/home/sora/llm/moe/output/deepseek_pre/final_result.jsonl' to '/home/sora/llm/moe/output/deepseek_pre/final_result.csv'. 🎉


## Run this cell below after running any benchmark and before running `post.py` (except MedQA)

In [20]:
import json
import string
import sys
from typing import List, Dict

def add_full_choice(data: List[Dict]) -> List[Dict]:
    """Add a 'full_choice' field like 'D. 0,4' to each sample."""
    letters = list(string.ascii_uppercase)
    
    for sample in data:
        gold = sample.get("gold")
        choices = sample.get("choices", [])
        
        if not gold or gold not in letters:
            sample["full_choice"] = None
            continue
        
        idx = letters.index(gold)
        if idx < len(choices):
            sample["full_choice"] = f"{gold}. {choices[idx]}"
        else:
            sample["full_choice"] = None
    return data

if __name__ == "__main__":

    input_path = "/home/sora/llm/moe/output/deepseek_pre/raw_result_baseline_arc.json"
    output_path = "/home/sora/llm/moe/output/deepseek_pre/raw_result_baseline_arc.json"

    # Load JSON file
    with open(input_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    # If the file contains a single object, wrap it in a list
    if isinstance(data, dict):
        data = [data]
    if isinstance(data, str):
        data = data.split("\n")
    # Process
    updated_data = add_full_choice(data)

    # Save back
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(updated_data if len(updated_data) > 1 else updated_data[0], f, indent=2, ensure_ascii=False)

    print(f"Updated file saved to {output_path}")


Updated file saved to /home/sora/llm/moe/output/deepseek_pre/raw_result_baseline_arc.json


## Run this cell below after running benchmark for MedQA and before `post.py` for it

In [21]:
import json

filename = "/home/sora/llm/moe/output/deepseek_pre/raw_result_med.json"

# Read original JSON
with open(filename, "r", encoding="utf-8") as f:
    data = json.load(f)

# Process each sample
for sample in data:
    choices_dict = {}
    for line in sample["choices"].strip().split("\n"):
        if ". " in line:
            key, _ = line.split(". ", 1)
            choices_dict[key.strip()] = line.strip()

    gold_key = sample.get("gold", "")
    sample["full_choice"] = choices_dict.get(gold_key, "")

# Write back to the same file
with open(filename, "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, indent=2)
