In [44]:
import os
import pandas as pd

# Define the names of the columns based on the structure of details.txt
columns = ['Run Name','Task', 'Model', 'Prompt Name', 'Prompt', 'Prompt Sys Name', 'Prompt sytem message', 'Answer system message', 
           'Number of examples', 'Accuracy', 'Prompt cost', 'Completion cost', 'Total']

# Initialize an empty DataFrame with the column names
df = pd.DataFrame(columns=columns)

dirname = "./"

# Get a list of all subdirectories in 'data'
subdirs = [d for d in os.listdir(dirname) if os.path.isdir(os.path.join(dirname, d))]

# Traverse each subdirectory in 'data'
for subdir in subdirs:
    details_path = os.path.join(dirname, subdir, 'details.txt')
    if os.path.exists(details_path):
        # Create a new row dictionary
        row = {}
        row['Run Name'] = subdir
        row['Prompt Name'] = subdir.split('__')[2]
        row['Prompt Sys Name'] = subdir.split('__')[3]
        
        # Open and read the details.txt file
        with open(details_path, 'r') as f:
            lines = f.readlines()
                
        # Initialize buffer for multi-line fields
        buffer = ""
        last_key = ""
        
        # Process each line in the file
        for line in lines:
            if any(line.startswith(col + ":") for col in columns):
                # Save the previous multi-line field
                if buffer:
                    row[last_key] = buffer.replace('\n', '\\n').strip()
                    buffer = ""
                
                # Split the line into the key and value
                key, value = line.split(':', 1)
                
                # Save the key to handle multi-line values
                last_key = key
                
                # Add the value to the buffer, this is the start of a new field
                buffer = value#.strip().replace('\n', '\\n')
            else:
                # This line is a continuation of the previous field
                buffer += ' ' + line
                
        # Save any remaining buffer
        if buffer:
            row[last_key] = buffer.replace('\n', '\\n').strip()

        # Append the row to the DataFrame
        df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
        
# Iterate over each column in the dataframe
for column in df.columns:
    df[column] = df[column].str.replace(r'\\n$', '', regex=True)
    
# Save the DataFrame to a CSV file
df.to_csv(os.path.join(dirname,'extracted_data.csv'), index=False)

In [46]:
df.iloc(0)[0]

Run Name                 tracking_shuffled_objects-three_objects__gpt-3...
Task                               tracking_shuffled_objects/three_objects
Model                                                        gpt-3.5-turbo
Prompt Name                                                         CoT-WS
Prompt                   Let's think about this step by step and descri...
Prompt Sys Name                                            ChatGPT-default
Prompt sytem message                          You are a helpful assistant.
Answer system message    You are an instruction following, problem solv...
Number of examples                                                     750
Accuracy                                                0.5906666666666667
Prompt cost                                        $0.57285, per: $0.00076
Completion cost                                    $0.25076, per: $0.00033
Total                                              $0.82361, per: $0.00110
Name: 0, dtype: object

In [61]:
import pandas as pd
import json
import re

# Assuming you have a dataframe named 'df'

# Define a function to convert cost string to float
def convert_cost(cost_string):
    cost_value = re.findall(r"\d+\.\d+", cost_string)
    return float(cost_value[0])

# Convert the dataframe to a JSON structure
output_json = df.apply(lambda x: {
    "Task": x["Task"],
    "Model": x["Model"],
    "Prompt type": x["Prompt Name"],
    "Prompt": x["Prompt"],
    "System message type": x["Prompt Sys Name"],
    "Prompt system message": x["Prompt sytem message"],
    "Answer system message": x["Answer system message"],
    "Number of examples": int(x["Number of examples"]),
    "Accuracy": float(x["Accuracy"]),
    "Cost": {
        "Prompt": {
            "Total": convert_cost(x["Prompt cost"]),
            "Per token": convert_cost(x["Prompt cost"].split("per:")[1])
        },
        "Completion": {
            "Total": convert_cost(x["Completion cost"]),
            "Per token": convert_cost(x["Completion cost"].split("per:")[1])
        },
        "Total": {
            "Total": convert_cost(x["Total"]),
            "Per token": convert_cost(x["Total"].split("per:")[1])
        },
        "Currency": "USD"
    }
}, axis=1)

# Convert the JSON structure to a formatted string with newlines
output_string = json.dumps(output_json.to_list(), indent=4)

# Save the JSON output to a file
with open(f"{dirname}/extracted_data.json", "w") as file:
    file.write(output_string)
