# This notebook try to extract information from output of alpaca method

In [4]:
import polars as pl
import json

In [5]:
def extract_json_to_dataframe(json_file_path: str) -> pl.DataFrame:
    """
    Extracts data from a JSON file and creates a Polars DataFrame with columns:
    'instruction', 'input', and 'output'.

    Args:
        json_file_path (str): The path to the JSON file.

    Returns:
        pl.DataFrame: A Polars DataFrame with the extracted columns.
    """
    try:
        # Read JSON file
        with open(json_file_path, 'r') as file:
            data = json.load(file)
        
        # Validate and extract required fields
        extracted_data = [
            {
                "instruction": item.get("instruction", ""),
                "input": item.get("input", ""),
                "output": item.get("output", ""),
            }
            for item in data
        ]
        
        # Create Polars DataFrame
        df = pl.DataFrame(extracted_data)
        return df
    
    except Exception as e:
        raise RuntimeError(f"An error occurred while processing the JSON file: {e}")


In [6]:
df = extract_json_to_dataframe("./gen_instructions_1.json")

In [7]:
df

instruction,input,output
str,str,str
"""Given the following URScript c…","""```URscript def broken_program…","""```URscript def fixed_program(…"
"""Write a URScript function that…","""""","""```URscript def move_variable_…"
"""You are given existing URScrip…","""```URscript def old_loop_motio…","""```URscript def move_to_positi…"
"""Create a URScript function tha…","""""","""```URscript def pick_and_place…"
"""How would you implement a simp…","""""","""```URscript def error_handling…"
…,…,…
"""Generate URScript code that se…","""""","""```URscript var condition = Tr…"
"""The below URScript code contai…","""```URscript def invalid_functi…","""```URscript def invalid_functi…"
"""Write a function in URScript t…","""""","""```URscript def log_robot_pose…"
"""Construct URScript code that i…","""""","""```URscript def pick_and_place…"


In [8]:
def save_dataframe_to_csv(df: pl.DataFrame, csv_file_path: str):
    """
    Saves a Polars DataFrame to a CSV file.

    Args:
        df (pl.DataFrame): The Polars DataFrame to save.
        csv_file_path (str): The path where the CSV file will be saved.
    """
    try:
        df.write_csv(csv_file_path)
        print(f"DataFrame successfully saved to {csv_file_path}")
    except Exception as e:
        raise RuntimeError(f"An error occurred while saving the DataFrame to CSV: {e}")


In [9]:
save_dataframe_to_csv(df, csv_file_path="./output.csv")

DataFrame successfully saved to ./output.csv
