# This notebook try to extract information from output of alpaca method

In [1]:
import polars as pl
import json

In [2]:
def extract_json_to_dataframe(json_file_path: str) -> pl.DataFrame:
    """
    Extracts data from a JSON file and creates a Polars DataFrame with columns:
    'instruction', 'input', and 'output'.

    Args:
        json_file_path (str): The path to the JSON file.

    Returns:
        pl.DataFrame: A Polars DataFrame with the extracted columns.
    """
    try:
        # Read JSON file
        with open(json_file_path, 'r') as file:
            data = json.load(file)
        
        # Validate and extract required fields
        extracted_data = [
            {
                "instruction": item.get("instruction", ""),
                "input": item.get("input", ""),
                "output": item.get("output", ""),
            }
            for item in data
        ]
        
        # Create Polars DataFrame
        df = pl.DataFrame(extracted_data)
        return df
    
    except Exception as e:
        raise RuntimeError(f"An error occurred while processing the JSON file: {e}")


In [3]:
df = extract_json_to_dataframe("./test_instructions.json")

In [4]:
df

instruction,input,output
str,str,str
"""Could you refactor this URScri…","""<no_input>""","""```URscript global positions =…"
"""Generate a URScript function t…","""<no_input>""","""```URscript def control_digita…"
"""Construct a URScript function …","""```URscript relative_position …","""```URscript def update_tcp(rel…"
"""Create a URScript function tha…","""<no_input>""","""```URscript def get_scaled_ana…"
"""Given the following URScript c…","""<no_input>""","""```URscript def move_until_inp…"
…,…,…
"""Develop a URScript program tha…","""``` pick_pos = p[0.1, 0.2, 0.3…","""```URscript def pick_and_place…"
"""You have two poses and want th…","""```URscript pose1 = p[0.1, 0.2…","""```URscript pose1 = p[0.1, 0.2…"
"""Given the provided URScript co…","""```URscript set_digital_out(0,…","""```URscript digital_output = 0…"
"""Write a URScript function to i…","""```URscript scan_start_pose = …","""```URscript def scan_motion(sc…"


In [9]:
df1 = extract_json_to_dataframe("./output_qwen7b_coder.json")
df1

instruction,input,output
str,str,str
"""Could you refactor this URScri…","""<no_input>""","""```URscript global positions =…"
"""Generate a URScript function t…","""<no_input>""","""```URscript def activate_outpu…"
"""Construct a URScript function …","""```URscript relative_position …","""```URscript def define_new_tcp…"
"""Create a URScript function tha…","""<no_input>""","""```URscript def read_analog_pe…"
"""Given the following URScript c…","""<no_input>""","""```URscript def move_until_inp…"
…,…,…
"""Develop a URScript program tha…","""``` pick_pos = p[0.1, 0.2, 0.3…","""```URscript def pick_and_place…"
"""You have two poses and want th…","""```URscript pose1 = p[0.1, 0.2…","""```URscript pose1 = p[0.1, 0.2…"
"""Given the provided URScript co…","""```URscript set_digital_out(0,…","""```URscript def toggle_output(…"
"""Write a URScript function to i…","""```URscript scan_start_pose = …","""```URscript def scan_motion(sc…"


In [10]:
df2 = extract_json_to_dataframe("./output_qwen7b.json")
df2

instruction,input,output
str,str,str
"""Could you refactor this URScri…","""<no_input>""","""```URscript global positions =…"
"""Generate a URScript function t…","""<no_input>""","""```URscript def control_digita…"
"""Construct a URScript function …","""```URscript relative_position …","""```URscript def define_new_tcp…"
"""Create a URScript function tha…","""<no_input>""","""```URscript def read_and_scale…"
"""Given the following URScript c…","""<no_input>""","""```URscript def move_until_inp…"
…,…,…
"""Develop a URScript program tha…","""``` pick_pos = p[0.1, 0.2, 0.3…","""```URscript def pick_and_place…"
"""You have two poses and want th…","""```URscript pose1 = p[0.1, 0.2…","""```URscript pose1 = p[0.1, 0.2…"
"""Given the provided URScript co…","""```URscript set_digital_out(0,…","""```URscript def toggle_digital…"
"""Write a URScript function to i…","""```URscript scan_start_pose = …","""```URscript def scan_motion():…"


In [8]:
df3 = extract_json_to_dataframe("./output_qwen3b_coder.json")
df3

instruction,input,output
str,str,str
"""Could you refactor this URScri…","""<no_input>""","""```URscript global positions =…"
"""Generate a URScript function t…","""<no_input>""","""```URscript def control_digita…"
"""Construct a URScript function …","""```URscript relative_position …","""```URscript def move_relative_…"
"""Create a URScript function tha…","""<no_input>""","""```URscript def read_analog_pe…"
"""Given the following URScript c…","""<no_input>""","""```URscript def move_until_inp…"
…,…,…
"""Develop a URScript program tha…","""``` pick_pos = p[0.1, 0.2, 0.3…","""```URscript def pick_and_place…"
"""You have two poses and want th…","""```URscript pose1 = p[0.1, 0.2…","""```URscript pose1 = p[0.1, 0.2…"
"""Given the provided URScript co…","""```URscript set_digital_out(0,…","""```URscript for i = 1 to 5:  …"
"""Write a URScript function to i…","""```URscript scan_start_pose = …","""```URscript def scan_motion(sc…"


In [17]:
df4 = extract_json_to_dataframe("./output_qwen3b.json")
df4

instruction,input,output
str,str,str
"""Could you refactor this URScri…","""<no_input>""","""```URscript global positions =…"
"""Generate a URScript function t…","""<no_input>""","""```URscript def control_digita…"
"""Construct a URScript function …","""```URscript relative_position …","""```URscript def transform_pose…"
"""Create a URScript function tha…","""<no_input>""","""```URscript def read_analog_in…"
"""Given the following URScript c…","""<no_input>""","""```URscript def move_until_inp…"
…,…,…
"""Develop a URScript program tha…","""``` pick_pos = p[0.1, 0.2, 0.3…","""```URscript def pick_and_place…"
"""You have two poses and want th…","""```URscript pose1 = p[0.1, 0.2…","""```URscript pose1 = p[0.1, 0.2…"
"""Given the provided URScript co…","""```URscript set_digital_out(0,…","""```URscript def toggle_digital…"
"""Write a URScript function to i…","""```URscript scan_start_pose = …","""```URscript def scan_motion():…"


In [18]:
new_df = df.with_columns(
   pl.Series(name="output (Qwen7B-coder)", values=df1["output"]),
   pl.Series(name="output (Qwen7B)", values=df2["output"]),
   pl.Series(name="output (Qwen3B-coder)", values=df3["output"]),
   pl.Series(name="output (Qwen3B)", values=df4["output"]),
)
new_df

instruction,input,output,output (Qwen7B-coder),output (Qwen7B),output (Qwen3B-coder),output (Qwen3B)
str,str,str,str,str,str,str
"""Could you refactor this URScri…","""<no_input>""","""```URscript global positions =…","""```URscript global positions =…","""```URscript global positions =…","""```URscript global positions =…","""```URscript global positions =…"
"""Generate a URScript function t…","""<no_input>""","""```URscript def control_digita…","""```URscript def activate_outpu…","""```URscript def control_digita…","""```URscript def control_digita…","""```URscript def control_digita…"
"""Construct a URScript function …","""```URscript relative_position …","""```URscript def update_tcp(rel…","""```URscript def define_new_tcp…","""```URscript def define_new_tcp…","""```URscript def move_relative_…","""```URscript def transform_pose…"
"""Create a URScript function tha…","""<no_input>""","""```URscript def get_scaled_ana…","""```URscript def read_analog_pe…","""```URscript def read_and_scale…","""```URscript def read_analog_pe…","""```URscript def read_analog_in…"
"""Given the following URScript c…","""<no_input>""","""```URscript def move_until_inp…","""```URscript def move_until_inp…","""```URscript def move_until_inp…","""```URscript def move_until_inp…","""```URscript def move_until_inp…"
…,…,…,…,…,…,…
"""Develop a URScript program tha…","""``` pick_pos = p[0.1, 0.2, 0.3…","""```URscript def pick_and_place…","""```URscript def pick_and_place…","""```URscript def pick_and_place…","""```URscript def pick_and_place…","""```URscript def pick_and_place…"
"""You have two poses and want th…","""```URscript pose1 = p[0.1, 0.2…","""```URscript pose1 = p[0.1, 0.2…","""```URscript pose1 = p[0.1, 0.2…","""```URscript pose1 = p[0.1, 0.2…","""```URscript pose1 = p[0.1, 0.2…","""```URscript pose1 = p[0.1, 0.2…"
"""Given the provided URScript co…","""```URscript set_digital_out(0,…","""```URscript digital_output = 0…","""```URscript def toggle_output(…","""```URscript def toggle_digital…","""```URscript for i = 1 to 5:  …","""```URscript def toggle_digital…"
"""Write a URScript function to i…","""```URscript scan_start_pose = …","""```URscript def scan_motion(sc…","""```URscript def scan_motion(sc…","""```URscript def scan_motion():…","""```URscript def scan_motion(sc…","""```URscript def scan_motion():…"


In [19]:
def save_dataframe_to_csv(df: pl.DataFrame, csv_file_path: str):
    """
    Saves a Polars DataFrame to a CSV file.

    Args:
        df (pl.DataFrame): The Polars DataFrame to save.
        csv_file_path (str): The path where the CSV file will be saved.
    """
    try:
        df.write_csv(csv_file_path)
        print(f"DataFrame successfully saved to {csv_file_path}")
    except Exception as e:
        raise RuntimeError(f"An error occurred while saving the DataFrame to CSV: {e}")


In [21]:
save_dataframe_to_csv(new_df, csv_file_path="./all-outputs.csv")

DataFrame successfully saved to ./all-outputs.csv


In [6]:
save_dataframe_to_csv(df, csv_file_path="./test-instructions.csv")

DataFrame successfully saved to ./test-instructions.csv
