# Full Probability Distribution on Questionnaires

In this notebook we will use `scripts/get_answer_probs.py` to compute the full
probability distribution/logit set for a given set of questions $q$, answers
$a$, and prompts $p$: 

$$P(a | p + q)$$



In this notebook, we will visualize the resulting data, alongside the commands 
used to generated the data.

In [8]:
import os
import json
import jsonlines
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm 

## `cache/fullprobs_claude30_X_AQ_20240806`
```bash
python3 scripts/get_pqa_probs.py \
    --questionnaire_jsonl datasets/webgazer_AQ_20240613.jsonl \
    --prompt_dataset datasets/claude_autism30_20240613.jsonl \
    --template datasets/llama_instruct_template.txt \
    --out_dir cache/fullprobs_claude30_X_AQ_20240806 \
    --batch_size 16 \
    --verbose
```

In [56]:
results_dir = "../cache/fullprobs_claude30_X_AQ_20240806"
# list jsonl files in results_dir
jsonl_files = [f for f in os.listdir(results_dir) if f.endswith('.jsonl')]
print(f"Available jsonl results files in {results_dir}: ", jsonl_files)


def load_all_datasets(results_dir, jsonl_name="pqa_probs_results_no_answer_logits.jsonl"):
    # the script call above does not store all the answer logits
    pqa_results_path = os.path.join(results_dir, jsonl_name)

    # get args.json 
    args_path = os.path.join(results_dir, "args.json")
    with open(args_path, 'r') as f:
        args = json.load(f)
    # print("args: ", args)

    # load args['questionnaire_jsonl'] and args['prompt_dataset']
    questionnaire_jsonl_path = os.path.join("../", args['questionnaire_jsonl'])
    prompt_dataset_path = os.path.join("../", args['prompt_dataset'])

    # load the questionnaire jsonl file
    questionnaire_dataset = []
    with jsonlines.open(questionnaire_jsonl_path) as reader:
        for obj in reader:
            questionnaire_dataset.append(obj)

    # load the prompt dataset
    prompt_dataset = []
    with jsonlines.open(prompt_dataset_path) as reader:
        for obj in reader:
            prompt_dataset.append(obj)

    # load the results one line at a time using tqdm 
    results = []
    with jsonlines.open(pqa_results_path) as reader:
        for obj in tqdm(reader, desc="Loading results", unit=" lines", total = len(prompt_dataset)*len(questionnaire_dataset)):
            results.append(obj)

    return args, questionnaire_dataset, prompt_dataset, results

args, questionnaire_dataset, prompt_dataset, results = load_all_datasets(results_dir)


Available jsonl results files in ../cache/fullprobs_claude30_X_AQ_20240806:  ['pqa_probs_results_with_answer_logits.jsonl', 'pqa_probs_results_no_answer_logits.jsonl']
args:  {'questionnaire_jsonl': 'datasets/webgazer_AQ_20240613.jsonl', 'prompt_dataset': 'datasets/claude_autism30_20240613.jsonl', 'template': 'datasets/llama_instruct_template.txt', 'out_dir': 'cache/fullprobs_claude30_X_AQ_20240806', 'model_name': 'meta-llama/Meta-Llama-3-8B-Instruct', 'batch_size': 16, 'store_answer_logits': True, 'verbose': True}


Loading results: 100%|██████████| 1500/1500 [00:26<00:00, 56.78 lines/s]


We can use the "answers_losses" across all questions as the representation for 
a given prompt. 

In [36]:
def get_unique_pq_ids(reuslts_dict_list): 
    question_id_list = [] # list of question_id (the numerical int id, not the token ids)
    for r in results: 
        question_id_list.append(r['question_id'])
    question_id_list = list(dict.fromkeys(question_id_list))

    prompt_id_list = []
    for r in results: 
        prompt_id_list.append(r['prompt_id'])
    prompt_id_list = list(dict.fromkeys(prompt_id_list))

    return prompt_id_list, question_id_list

def get_prompt_loss_reps(results_dict_list): 
    """ Given a results dict list outputted by `get_pqa_probs.py`, we will 
    extract the representation of each prompt by aggregating 
    """
    prompt_id_list, question_id_list = get_unique_pq_ids(results_dict_list)
    prompt_reps_dict = {}
    for prompt_id in prompt_id_list: 
        prompt_reps_dict[prompt_id] = {}
    
    for result_dict in results_dict_list: 
        prompt_id = result_dict['prompt_id']
        question_id = result_dict['question_id']
        prompt_reps_dict[prompt_id][question_id] = result_dict['answers_losses']
    # now make an ordered, flattened prompt_reps_dict
    flat_prompt_reps_dict = {}
    for prompt_id in prompt_id_list: 
        flat_prompt_reps_dict[prompt_id] = []
        for question_id in question_id_list: 
            flat_prompt_reps_dict[prompt_id] += prompt_reps_dict[prompt_id][question_id]

    # now make each into a numpy array
    for prompt_id in prompt_id_list: 
        flat_prompt_reps_dict[prompt_id] = np.array(flat_prompt_reps_dict[prompt_id])
    return flat_prompt_reps_dict

flat_prompt_reps_dict = get_prompt_loss_reps(results)
# turn into one numpy array [num_prompts, dim_prompt_reps]
prompt_reps = np.array([flat_prompt_reps_dict[prompt_id] for prompt_id in flat_prompt_reps_dict.keys()])
print("prompt_reps.shape: ", prompt_reps.shape)

prompt_reps.shape:  (30, 200)


In [44]:
prompt_dataset[0].keys()

dict_keys(['prompt', 'id', 'tag'])

In [47]:
# now get the tags from the prompt dataset 
prompt_tags = []
for prompt in prompt_dataset: 
    prompt_tags.append(prompt['tag'])

prompt_strings = [p['prompt'] for p in prompt_dataset]



In [55]:
from sklearn.decomposition import PCA
import plotly.graph_objects as go
import pandas as pd

# Assuming prompt_reps and prompt_tags are already defined
pca = PCA(n_components=3)
pca_result = pca.fit_transform(prompt_reps)
print("pca_result.shape: ", pca_result.shape)

# Create a DataFrame with PCA results and tags
df = pd.DataFrame({
    'PC1': pca_result[:, 0],
    'PC2': pca_result[:, 1],
    'PC3': pca_result[:, 2],
    'Tag': prompt_tags, 
    'HoverInfo': prompt_strings
})

# Create the figure
fig = go.Figure()

# Add traces for each unique tag
for tag in df['Tag'].unique():
    tag_data = df[df['Tag'] == tag]
    fig.add_trace(go.Scatter3d(
        x=tag_data['PC1'],
        y=tag_data['PC2'],
        z=tag_data['PC3'],
        mode='markers',
        marker=dict(size=5),
        name=tag, 
        text=tag_data["HoverInfo"], 
        hoverinfo="text"
    ))

# Update layout to change axis labels
fig.update_layout(
    scene=dict(
        xaxis_title='PC1',
        yaxis_title='PC2',
        zaxis_title='PC3'
    )
)
# title
fig.update_layout(title_text="20240806 PCA on loss(ans | question + prompt) -- proper Llama instruction prompt")

fig.show()

# save in the results_dir
fig.write_html(os.path.join(results_dir, "prompt_loss_pca_plot.html"))

pca_result.shape:  (30, 3)


Plot available here: [https://lancelot.languagegame.io/archive/emo/aman/cache/fullprobs_claude30_X_AQ_20240806/prompt_loss_pca_plot.html](https://lancelot.languagegame.io/archive/emo/aman/cache/fullprobs_claude30_X_AQ_20240806/prompt_loss_pca_plot.html)

Interesting to see that using the "proper" Llama instruction format (see `datasets/llama_instruct_template.txt`) actualy makes a very different distribution in the PCA from our earlier test where we didn't use the proper instruction prompt: [https://lancelot.languagegame.io/claude30_x_AQ_pqa_losses_202406133dpca.html](https://lancelot.languagegame.io/claude30_x_AQ_pqa_losses_202406133dpca.html)

The biggest difference is that **original results have 'neurotypical pretending' in the middle, whereas new results have 'autistic' in the middle.**