# Get data for combination experiments

In [13]:
import numpy, os
import pandas as pd

## Read the data

In [14]:
SAVEFOLDER = "/cephyr/users/lovhag/Alvis/projects/rome/data/prediction_mech_comb_experiments"
MODEL_NAME = "llama2_7B" # "llama2_7B" # "gpt2_xl"

# for all configs, both settings with maximally confident and maximally unconfident samples are checked
# gives number of samples per category
SINGLETON_CONFIGS = {
           "single_prompt_bias": {"prompt_bias": 1,
                                  "person_name_bias": 0,
                                  "exact_recall": 0,
                                  "guesswork": 0,
                                  "generic": 0
                                             },
           "single_name_bias": {"prompt_bias": 0,
                                "person_name_bias": 1,
                                "exact_recall": 0,
                                "guesswork": 0,
                                "generic": 0
                               },
           "single_exact_recall": {"prompt_bias": 0,
                                   "person_name_bias": 0,
                                   "exact_recall": 1,
                                   "guesswork": 0 ,
                                   "generic": 0
                                  },
           "single_guesswork": {"prompt_bias": 0,
                                "person_name_bias": 0,
                                "exact_recall": 0,
                                "guesswork": 1,
                                "generic": 0
                               },
           "single_generic": {"prompt_bias": 0,
                              "person_name_bias": 0,
                              "exact_recall": 0,
                              "guesswork": 0,
                              "generic": 1
                             },
           "100_prompt_bias": {"prompt_bias": 100,
                              "person_name_bias": 0,
                              "exact_recall": 0,
                              "guesswork": 0,
                              "generic": 0
                             },
           "100_name_bias": {"prompt_bias": 0,
                              "person_name_bias": 100,
                              "exact_recall": 0,
                              "guesswork": 0,
                              "generic": 0
                             },
           "100_exact_recall": {"prompt_bias": 0,
                              "person_name_bias": 0,
                              "exact_recall": 100,
                              "guesswork": 0,
                              "generic": 0
                             },
           "100_guesswork": {"prompt_bias": 0,
                              "person_name_bias": 0,
                              "exact_recall": 0,
                              "guesswork": 100,
                              "generic": 0
                             },
           "100_generic": {"prompt_bias": 0,
                              "person_name_bias": 0,
                              "exact_recall": 0,
                              "guesswork": 0,
                              "generic": 100
                         },
           "1000_prompt_bias": {"prompt_bias": 1000, # unconfident/confident split doesn't matter here
                              "person_name_bias": 0, # always takes _all_ samples
                              "exact_recall": 0,
                              "guesswork": 0,
                              "generic": 0
                             },
           "1000_name_bias": {"prompt_bias": 0,
                              "person_name_bias": 1000,
                              "exact_recall": 0,
                              "guesswork": 0,
                              "generic": 0
                             },
           "1000_exact_recall": {"prompt_bias": 0,
                              "person_name_bias": 0,
                              "exact_recall": 1000,
                              "guesswork": 0,
                              "generic": 0
                             },
           "1000_guesswork": {"prompt_bias": 0,
                              "person_name_bias": 0,
                              "exact_recall": 0,
                              "guesswork": 1000,
                              "generic": 0
                             },
           "1000_generic": {"prompt_bias": 0,
                              "person_name_bias": 0,
                              "exact_recall": 0,
                              "guesswork": 0,
                              "generic": 1000
                         },
          }

# always 1000 samples for these, so give proportions
COMPOSITION_CONFIGS = {"50_exact_50_biased": {"prompt_bias": 0.25,
                              "person_name_bias": 0.25,
                              "exact_recall": 0.5,
                              "guesswork": 0,
                              "generic": 0
                         },
                       "75_exact_25_biased": {"prompt_bias": 0.125,
                              "person_name_bias": 0.125,
                              "exact_recall": 0.75,
                              "guesswork": 0,
                              "generic": 0
                         },
                       "25_exact_75_biased": {"prompt_bias": 0.375,
                              "person_name_bias": 0.375,
                              "exact_recall": 0.25,
                              "guesswork": 0,
                              "generic": 0
                         },
                       "45_exact_45_biased_10_guess": {"prompt_bias": 0.225,
                              "person_name_bias": 0.225,
                              "exact_recall": 0.45,
                              "guesswork": 0.1,
                              "generic": 0
                         },
                       "40_exact_40_biased_20_guess": {"prompt_bias": 0.2,
                              "person_name_bias": 0.2,
                              "exact_recall": 0.4,
                              "guesswork": 0.2,
                              "generic": 0
                         },
                       "25_exact_25_biased_50_guess": {"prompt_bias": 0.125,
                              "person_name_bias": 0.125,
                              "exact_recall": 0.25,
                              "guesswork": 0.5,
                              "generic": 0
                         },
                       "45_exact_45_biased_10_generic": {"prompt_bias": 0.225,
                              "person_name_bias": 0.225,
                              "exact_recall": 0.45,
                              "guesswork": 0,
                              "generic": 0.1
                         },
                       "50_exact_50_guess": {"prompt_bias": 0,
                              "person_name_bias": 0,
                              "exact_recall": 0.5,
                              "guesswork": 0.5,
                              "generic": 0
                         },
                       "100_biased": {"prompt_bias": 0.5,
                              "person_name_bias": 0.5,
                              "exact_recall": 0,
                              "guesswork": 0,
                              "generic": 0
                         },
                      }
for val in COMPOSITION_CONFIGS.values():
    assert sum(val.values())==1

In [15]:
DATASETS = {"gpt2_xl": {"exact_recall": {"queries_file": "/cephyr/users/lovhag/Alvis/projects/rome/data/eval_on_fact_recall_set/gpt2-xl/1000_exact.json",
             "CT_results_dir": "/cephyr/users/lovhag/Alvis/projects/rome/data/eval_on_fact_recall_set/gpt2-xl/causal_trace_2377617/cases",
             "filename_template": "knowledge_{}_mlp.npz" 
              },
              "prompt_bias": {"queries_file": "/cephyr/users/lovhag/Alvis/projects/fact-recall-detection/data/CT_results/gpt2_xl/synthetic_data/prompt_bias_bias.jsonl",
              "CT_results_dir": "/cephyr/users/lovhag/Alvis/projects/fact-recall-detection/data/CT_results/gpt2_xl/synthetic_data/prompt_bias_bias/cases",
              "filename_template": "{}_candidate_mlp.npz"
              },
              "person_name_bias": {"queries_file": "/cephyr/users/lovhag/Alvis/projects/fact-recall-detection/data/CT_results/gpt2_xl/synthetic_data/person_name_bias.jsonl",
              "CT_results_dir": "/cephyr/users/lovhag/Alvis/projects/fact-recall-detection/data/CT_results/gpt2_xl/synthetic_data/person_name_bias/cases",
              "filename_template": "{}_candidate_mlp.npz"
              },
              "guesswork": {"queries_file": "/cephyr/users/lovhag/Alvis/projects/rome/data/eval_on_fact_recall_set/gpt2-xl/1000_guesswork.json",
              "CT_results_dir": "/cephyr/users/lovhag/Alvis/projects/rome/data/eval_on_fact_recall_set/gpt2-xl/causal_trace_guesswork_2388522/cases",
              "filename_template": "knowledge_{}_mlp.npz"
              },
              "generic": {"queries_file": "/cephyr/users/lovhag/Alvis/projects/fact-recall-detection/data/CT_results/gpt2_xl/generic_samples/generic_samples.jsonl",
              "CT_results_dir": "/cephyr/users/lovhag/Alvis/projects/fact-recall-detection/data/CT_results/gpt2_xl/generic_samples/cases",
              "filename_template": "{}_candidate_mlp.npz"
              }          
            },
            "llama2_7B": {"exact_recall": {"queries_file": "/cephyr/users/lovhag/Alvis/projects/fact-recall-detection/data/CT_sensitivity_recall_eval_sets/llama2_7B/1000_exact.jsonl",
             "CT_results_dir": "/cephyr/users/lovhag/Alvis/projects/rome/data/eval_on_fact_recall_set/llama2_7B/causal_trace_exact_2398046/cases",
             "filename_template": "{}_candidate_mlp.npz" 
              },
              "prompt_bias": {"queries_file": "/cephyr/users/lovhag/Alvis/projects/fact-recall-detection/data/CT_results/llama2_7B/synthetic_data/prompt_bias_bias.jsonl",
              "CT_results_dir": "/cephyr/users/lovhag/Alvis/projects/fact-recall-detection/data/CT_results/llama2_7B/synthetic_data/prompt_bias_bias/cases",
              "filename_template": "{}_candidate_mlp.npz"
              },
              "person_name_bias": {"queries_file": "/cephyr/users/lovhag/Alvis/projects/fact-recall-detection/data/CT_results/llama2_7B/synthetic_data/person_name_bias.jsonl",
              "CT_results_dir": "/cephyr/users/lovhag/Alvis/projects/fact-recall-detection/data/CT_results/llama2_7B/synthetic_data/person_name_bias/cases",
              "filename_template": "{}_candidate_mlp.npz"
              },
              "guesswork": {"queries_file": "/cephyr/users/lovhag/Alvis/projects/fact-recall-detection/data/CT_sensitivity_recall_eval_sets/llama2_7B/1000_guesswork.jsonl",
              "CT_results_dir": "/cephyr/users/lovhag/Alvis/projects/rome/data/eval_on_fact_recall_set/llama2_7B/causal_trace_guesswork_2398048/cases",
              "filename_template": "{}_candidate_mlp.npz"
              },
              "generic": {"queries_file": "/cephyr/users/lovhag/Alvis/projects/fact-recall-detection/data/CT_results/llama2_7B/generic_samples/generic_samples.jsonl",
              "CT_results_dir": "/cephyr/users/lovhag/Alvis/projects/fact-recall-detection/data/CT_results/llama2_7B/generic_samples/cases",
              "filename_template": "{}_candidate_mlp.npz"
              }
             },
            }

## Data generation for singleton configs
Sampled for both most confident and most unconfident samples.

In [16]:
def get_candidate_answer_p(row):
    answer_ix = row.answers.index()

In [17]:
for key, config in SINGLETON_CONFIGS.items():
    for d_name, num_samples in config.items():
        if num_samples > 0:
            src_datainfo = DATASETS[MODEL_NAME][d_name]
            src_dataset = pd.read_json(src_datainfo["queries_file"], lines=src_datainfo["queries_file"].endswith("jsonl"))
            # add necessary metadata
            src_dataset["type"] = d_name
            src_dataset["CT_results_dir"] = src_datainfo["CT_results_dir"]
            src_dataset["filename_template"] = src_datainfo["filename_template"]
            
            # find probability value column
            if "p_answers" in src_dataset.columns and type(src_dataset.p_answers.iloc[0]) is not list:
                p_col = "p_answers"
            elif "candidate_p" in src_dataset.columns:
                p_col = "candidate_p"
            elif "probability" in src_dataset.columns:
                p_col = "probability"
            else:
                raise ValueError("Could not find a probability column")
            
            # save data splits
            # confident split
            filename = os.path.join(SAVEFOLDER, f"singleton/{MODEL_NAME}/{key}_confident.jsonl")
            src_dataset.sort_values(p_col, ascending=False).iloc[:num_samples].to_json(filename, orient="records", lines=True)
            # unconfident split
            filename = os.path.join(SAVEFOLDER, f"singleton/{MODEL_NAME}/{key}_unconfident.jsonl")
            src_dataset.sort_values(p_col).iloc[:num_samples].to_json(filename, orient="records", lines=True)

## Data generation for combination configs
Always 1000 samples.

In [18]:
num_samples = 1000

part_info = {"biased": ["prompt_bias", "person_name_bias"],
             "exact": ["exact_recall"],
             "guess": ["guesswork"],
             "generic": ["generic"]
            }

In [19]:
for key, config in COMPOSITION_CONFIGS.items():
    data = pd.DataFrame()
    for d_name, prop in config.items():
        if prop > 0:
            src_datainfo = DATASETS[MODEL_NAME][d_name]
            src_dataset = pd.read_json(src_datainfo["queries_file"], lines=src_datainfo["queries_file"].endswith("jsonl"))
            src_dataset = src_dataset.sample(int(num_samples*prop), random_state=42)
            
            # add necessary metadata
            src_dataset["type"] = d_name
            src_dataset["CT_results_dir"] = src_datainfo["CT_results_dir"]
            src_dataset["filename_template"] = src_datainfo["filename_template"]
            
            data = pd.concat((data, src_dataset), ignore_index=True)
            
    # save data
    filename = os.path.join(SAVEFOLDER, f"combined/{MODEL_NAME}/{key}.jsonl")
    data.to_json(filename, orient="records", lines=True)
    
    # save data parts
    for part_name, d_names in part_info.items():
        share_num = int(sum([config[d_name] for d_name in d_names])*100)
        if share_num > 0:
            part_filename = f"{share_num}_{part_name}.jsonl"
            part_filepath = os.path.join(SAVEFOLDER, f"combined/{MODEL_NAME}/{part_filename}")
            data[data.type.isin(d_names)].to_json(part_filepath, orient="records", lines=True)

Also with min and max confidence.

In [20]:
for key, config in COMPOSITION_CONFIGS.items():
    conf_data = pd.DataFrame()
    unconf_data = pd.DataFrame()
    for d_name, prop in config.items():
        if prop > 0:
            src_datainfo = DATASETS[MODEL_NAME][d_name]
            src_dataset = pd.read_json(src_datainfo["queries_file"], lines=src_datainfo["queries_file"].endswith("jsonl"))
            # add necessary metadata
            src_dataset["type"] = d_name
            src_dataset["CT_results_dir"] = src_datainfo["CT_results_dir"]
            src_dataset["filename_template"] = src_datainfo["filename_template"]
            
            # find probability value column
            if "p_answers" in src_dataset.columns and type(src_dataset.p_answers.iloc[0]) is not list:
                p_col = "p_answers"
            elif "candidate_p" in src_dataset.columns:
                p_col = "candidate_p"
            elif "probability" in src_dataset.columns:
                p_col = "probability"
            else:
                raise ValueError("Could not find a probability column")
            
            num_part_samples = int(num_samples*prop)
            conf_data = pd.concat((conf_data, src_dataset.sort_values(p_col, ascending=False).iloc[:num_part_samples]), ignore_index=True)
            unconf_data = pd.concat((unconf_data, src_dataset.sort_values(p_col).iloc[:num_part_samples]), ignore_index=True)
            
    # save data
    conf_filename = os.path.join(SAVEFOLDER, f"combined/{MODEL_NAME}/{key}_confident.jsonl")
    conf_data.to_json(conf_filename, orient="records", lines=True)
    unconf_filename = os.path.join(SAVEFOLDER, f"combined/{MODEL_NAME}/{key}_unconfident.jsonl")
    unconf_data.to_json(unconf_filename, orient="records", lines=True)
    
    # save data parts
    for part_name, d_names in part_info.items():
        share_num = int(sum([config[d_name] for d_name in d_names])*100)
        if share_num > 0:
            # confident
            part_filename = f"{share_num}_{part_name}_confident.jsonl"
            part_filepath = os.path.join(SAVEFOLDER, f"combined/{MODEL_NAME}/{part_filename}")
            conf_data[conf_data.type.isin(d_names)].to_json(part_filepath, orient="records", lines=True)
            
            # unconfident
            part_filename = f"{share_num}_{part_name}_unconfident.jsonl"
            part_filepath = os.path.join(SAVEFOLDER, f"combined/{MODEL_NAME}/{part_filename}")
            unconf_data[unconf_data.type.isin(d_names)].to_json(part_filepath, orient="records", lines=True)