In [1]:
import json
import plotly.express as px
import pandas as pd

In [2]:
def get_count_correct(path):
    with open(path, 'r') as f:
        data = json.load(f)
    return data['stats']['count_correct']

In [3]:
# read in json file

'''
  - ```baseline``` for directly prompting, 
  - ```zs_cot``` for zero-shot chain-of-thought (CoT) prompting, 
  - ```os``` for one-shot in-context learning (ICL) prompting with the original Linda Problem (default), 
  - ```os_cot``` for one-shot ICL plus COT prompting , 
  - ```os_bob``` for one-shot ICL prompting but with a rephrased Bob Problem, 
  - ```os_bob_cot``` for one-shot ICL prompting plus COT but with a rephrased Bob Problem, 
  - ```os_incorrect``` for one-shot ICL but with an incorrect answer, 
  - ```os_incorrect_cot``` for one-shot ICL plus COT but with an incorrect answer,
  - ```fs``` for few-shot ICL prompting,
  - ```fs_cot``` for few-shot ICL plus COT prompting,
  - ```weak_control_zs_cot``` for weakly controlled zero-shot CoT prompting, leaking the hint that it is a Linda Problem but without detailed explanations,
  - ```weak_control_os_cot``` for weakly controlled one-shot CoT prompting, leaking the hint that it is a Linda Problem but without detailed explanations,
  - ```control_zs_cot``` for controlled zero-shot CoT prompting, leaking the hint that it is a Linda Problem with detailed and carefully-curated explanations,
  - ```control_os_cot``` for controlled one-shot CoT prompting, leaking the hint that it is a Linda Problem with detailed and carefully-curated explanations.
'''


def draw(variant):
    data = {"gold": [], "random": []}
    for data_type in ['gold', 'random']:
        for prompting_method in ['baseline', 'zs_cot', 'os', 'os_cot', 'os_bob', 'os_bob_cot', 'os_incorrect', 'os_incorrect_cot', 'fs', 'fs_cot', 'weak_control_zs_cot', 'weak_control_os_cot', 'control_zs_cot', 'control_os_cot']:
            count_correct = get_count_correct(f'outputs/gpt3.5/response_{prompting_method}_synthetic_dataset_linda_variant_{variant}_{data_type}.json')
            data[data_type].append(count_correct)


    # draw a comparison plot
    df = pd.DataFrame(data)
    df['prompting_method'] = ['baseline', 'zs_cot', 'os', 'os_cot', 'os_bob', 'os_bob_cot', 'os_incorrect', 'os_incorrect_cot', 'fs', 'fs_cot', 'weak_control_zs_cot', 'weak_control_os_cot', 'control_zs_cot', 'control_os_cot']
    df = df.melt(id_vars='prompting_method', var_name='data_type', value_name='count_correct')
    fig = px.bar(df, x='prompting_method', y='count_correct', color='data_type', barmode='group')
    fig.update_layout(title='Percentage of correct answers for different prompting methods - Variant 4', xaxis_title='Prompting Method', yaxis_title='Percentage of correct answers')
    # resize the plot
    fig.update_layout(
        autosize=False,
        width=800,
        height=600,
    )
    return fig

In [4]:
draw("four").show()

In [5]:
draw("six").show()

FileNotFoundError: [Errno 2] No such file or directory: 'outputs/gpt3.5/response_control_zs_cot_synthetic_dataset_linda_variant_six_gold.json'