In [19]:
import pandas as pd
import numpy as np
from output_processing import get_prompt_type_results, convert_version_to_target

In [20]:
prompt_types = ["yes_or_no", "no_or_yes", "agreement", "agreement_negation", "disagreement", "disagreement_negation", "options", "options_flipped"]

In [21]:
model_name = "meta-llama/Llama-3.1-8B-Instruct"
model_results = pd.read_csv(f"runs_05_15/{model_name}-results.csv")

In [22]:
def get_correct_prob_and_incorrect_prob_columns(prompt_type, version):
    match (prompt_type, version):
        case (_, "controversial"):
            # TODO what do we do?
            # For controversial there is no correct, answer - Keep them as is
            return "Yes_prob", "No_prob"
            
        case ("yes_or_no" | "no_or_yes", "unambiguous_covered"):
                return "Yes_prob", "No_prob"

        case ("yes_or_no" | "no_or_yes", "unambiguous_uncovered"):
            return "No_prob", "Yes_prob"
            
        case ("agreement"|"disagreement_negation", "unambiguous_covered"):
            return "Yes_prob", "No_prob"

        case ("agreement"|"disagreement_negation", "unambiguous_uncovered"):
            return "No_prob", "Yes_prob"

        case ("agreement_negation"| "disagreement", "unambiguous_covered"):
            return "No_prob", "Yes_prob"

        case ("agreement_negation"| "disagreement", "unambiguous_uncovered"):
            return "Yes_prob", "No_prob"

        case ("options", "unambiguous_covered"):
            return "A_prob", "B_prob"
            
        case ("options", "unambiguous_uncovered"):
            return "B_prob", "A_prob"

        case ("options_flipped", "unambiguous_covered"):
            return "B_prob", "A_prob"
            
        case ("options_flipped", "unambiguous_uncovered"):
            return "A_prob", "B_prob"

model_results["correct_logprob"] = np.nan
model_results.loc["incorrect_logprob"] = np.nan
for group, indices in model_results.groupby(["prompt_type", "version"], sort=False).indices.items():
    correct_column, incorrect_column = get_correct_prob_and_incorrect_prob_columns(*group)
    print(correct_column, incorrect_column)
    model_results.loc[indices, "correct_logprob"] = model_results[correct_column][indices]
    model_results.loc[indices,  "incorrect_logprob"] = model_results[incorrect_column][indices]

No_prob Yes_prob
Yes_prob No_prob
Yes_prob No_prob
No_prob Yes_prob
Yes_prob No_prob
Yes_prob No_prob
No_prob Yes_prob
Yes_prob No_prob
Yes_prob No_prob
Yes_prob No_prob
No_prob Yes_prob
Yes_prob No_prob
Yes_prob No_prob
No_prob Yes_prob
Yes_prob No_prob
No_prob Yes_prob
Yes_prob No_prob
Yes_prob No_prob
B_prob A_prob
A_prob B_prob
Yes_prob No_prob
A_prob B_prob
B_prob A_prob
Yes_prob No_prob


In [23]:
model_results["correct_prob"] = np.exp(model_results["correct_logprob"].values)
model_results["incorrect_prob"] = np.exp(model_results["incorrect_logprob"].values)

controversial_mask = model_results["version"] == "controversial"
non_controversial_mask = model_results["version"] != "controversial"

In [24]:
model_results_for_controversial = model_results[controversial_mask]
model_results_for_non_controversial = model_results[non_controversial_mask]

In [25]:
for_controversial = model_results_for_controversial.groupby('prompt_type', sort=False).agg({"correct_prob": ["mean", "std"], "incorrect_prob": ["mean", "std"]})
for_controversial

Unnamed: 0_level_0,correct_prob,correct_prob,incorrect_prob,incorrect_prob
Unnamed: 0_level_1,mean,std,mean,std
prompt_type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
yes_or_no,0.047083,0.205411,0.026814,0.146724
no_or_yes,0.006758,0.002413,0.007806,0.004404
agreement,0.025389,0.012053,0.009009,0.004024
agreement_negation,0.024124,0.00932,0.017526,0.009455
disagreement,0.029984,0.01161,0.014573,0.00627
disagreement_negation,0.031607,0.011171,0.01585,0.006257
options,1.0,0.0,1.0,0.0
options_flipped,1.0,0.0,1.0,0.0


# for_non_controversial = model_results_for_non_controversial.groupby('prompt_type', sort=False).agg({"correct_prob": ["mean", "std"], "incorrect_prob": ["mean", "std"]})
for_non_controversial

In [20]:
results = pd.DataFrame()
changes = dict()
for model_name in ["meta-llama/Llama-3.2-1B",
                   "meta-llama/Llama-3.2-1B-Instruct",
                   "meta-llama/Llama-3.2-3B",
                   "meta-llama/Llama-3.2-3B-Instruct",
                   "meta-llama/Llama-3.1-8B",
                   "meta-llama/Llama-3.1-8B-Instruct"]:
    print(model_name)
    model_results = pd.read_csv(f"runs_05_14/{model_name}-results.csv")
    #To be fixed in collation
    model_results["output"] = [output.strip("!,") for output in model_results["output"]]
    model_results["model"] = model_name
    model_results["target"] = [convert_version_to_target(prompt_type, version) for prompt_type, version in zip(model_results["prompt_type"], model_results["version"])]

    prompt_type_0 = prompt_types[0]
    mask = model_results["prompt_type"] == prompt_type_0
    model_results_control = model_results[mask]
    changes_model = dict()
    for prompt_type in prompt_types[1:6]: 
        mask = model_results["prompt_type"] == prompt_type
        model_results_variation = model_results[mask]
        print(f"vs {prompt_type}", model_results_variation.output.value_counts())
        # print(f"vs {prompt_type}", 
        #       np.sum(
        #           np.logical_not(
        #               model_results_control[["output"]].reset_index(drop=True) == model_results_variation[["output"]].reset_index(drop=True)
        #       )))
        changes_model[prompt_type] = np.sum(
                  np.logical_not(
                      model_results_control[["output"]].reset_index(drop=True) == model_results_variation[["output"]].reset_index(drop=True)
              ))
    mask1 = model_results["prompt_type"] == prompt_type[6]
    model_results_variation1 = model_results[mask1]

    mask2 = model_results["prompt_type"] == prompt_type[7]
    model_results_variation2 = model_results[mask1]
    changes_model["options_vs_options_flipped"] = np.sum(
                  np.logical_not(
                      model_results_variation1[["output"]].reset_index(drop=True) == model_results_variation2[["output"]].reset_index(drop=True)
    ))
    
        
    changes[model_name] = changes_model
    # print(model_results.output.value_counts())
    # for prompt_type in prompt_types[1:]:
    # results = pd.concat([results, model_results])

meta-llama/Llama-3.2-1B
vs no_or_yes output
Yes         135
Penelope      3
Name: count, dtype: int64
vs agreement output
Yes    138
Name: count, dtype: int64
vs agreement_negation output
Yes    138
Name: count, dtype: int64
vs disagreement output
Yes    138
Name: count, dtype: int64
vs disagreement_negation output
Yes    138
Name: count, dtype: int64
meta-llama/Llama-3.2-1B-Instruct
vs no_or_yes output
No     72
Yes    66
Name: count, dtype: int64
vs agreement output
Yes    138
Name: count, dtype: int64
vs agreement_negation output
Yes    138
Name: count, dtype: int64
vs disagreement output
Yes    138
Name: count, dtype: int64
vs disagreement_negation output
Yes    138
Name: count, dtype: int64
meta-llama/Llama-3.2-3B
vs no_or_yes output
Yes    138
Name: count, dtype: int64
vs agreement output
Yes    138
Name: count, dtype: int64
vs agreement_negation output
Yes    137
No       1
Name: count, dtype: int64
vs disagreement output
Yes    138
Name: count, dtype: int64
vs disagreement_nega

  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passk

vs no_or_yes output
Yes    138
Name: count, dtype: int64
vs agreement output
Yes    138
Name: count, dtype: int64
vs agreement_negation output
Yes    138
Name: count, dtype: int64
vs disagreement output
Yes    138
Name: count, dtype: int64
vs disagreement_negation output
Yes    138
Name: count, dtype: int64
meta-llama/Llama-3.1-8B-Instruct
vs no_or_yes output
Yes    136
No       2
Name: count, dtype: int64
vs agreement output
Yes    138
Name: count, dtype: int64
vs agreement_negation output
Yes    138
Name: count, dtype: int64
vs disagreement output
Yes    138
Name: count, dtype: int64
vs disagreement_negation output
Yes    138
Name: count, dtype: int64


  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)


In [21]:
changes

{'meta-llama/Llama-3.2-1B': {'no_or_yes': output    3
  dtype: int64,
  'agreement': output    0
  dtype: int64,
  'agreement_negation': output    0
  dtype: int64,
  'disagreement': output    0
  dtype: int64,
  'disagreement_negation': output    0
  dtype: int64,
  'options_vs_options_flipped': output    0
  dtype: int64},
 'meta-llama/Llama-3.2-1B-Instruct': {'no_or_yes': output    72
  dtype: int64,
  'agreement': output    0
  dtype: int64,
  'agreement_negation': output    0
  dtype: int64,
  'disagreement': output    0
  dtype: int64,
  'disagreement_negation': output    0
  dtype: int64,
  'options_vs_options_flipped': output    0
  dtype: int64},
 'meta-llama/Llama-3.2-3B': {'no_or_yes': output    0
  dtype: int64,
  'agreement': output    0
  dtype: int64,
  'agreement_negation': output    1
  dtype: int64,
  'disagreement': output    0
  dtype: int64,
  'disagreement_negation': output    0
  dtype: int64,
  'options_vs_options_flipped': output    0
  dtype: int64},
 'meta-ll