# Manually looking at high-deception instances

In [None]:
import os
import sys
import json
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import requests
import pandas as pd

from pandas import DataFrame, json_normalize
from typing import List, Dict, Any, Tuple, Union, Optional

LOGS_PATH: str = "../evaluations/results/"

In [None]:
import dotenv
dotenv.load_dotenv()

In [None]:
sys.path.append("..")

from utils import load_agent_logs_df, read_jsonl_as_json, load_game_summary

In [None]:
EXPT_NAMES: List[str] = [
    "2025-02-01_phi_llama_100_games_v3",
    "2025-02-01_llama_phi_100_games_v3",
    "2025-02-01_phi_phi_100_games_v3",
    "2025-02-01_llama_llama_100_games_v3",
    ]

In [None]:
DESCRIPTIONS: List[str] = [
    "Crew: Phi, Imp: Llama",
    "Crew: Llama, Imp: Phi",
    "Crew: Phi, Imp: Phi",
    "Crew: Llama, Imp: Llama",
    ]

In [None]:
summary_logs_paths: List[str] = [
    os.path.join(LOGS_PATH, f"{expt_name}_all_skill_scores.json") for expt_name in EXPT_NAMES
]

In [None]:
summary_dfs: List[DataFrame] = []

for summary_logs_path in summary_logs_paths:
    # read json line by line
    summary_logs: List[Dict[str, Any]] = read_jsonl_as_json(summary_logs_path)
    summary_df: DataFrame = json_normalize(summary_logs)
    # sort by game_index and then step
    summary_df = summary_df.sort_values(by=["game_index", "step"])
    summary_dfs.append(summary_df)
    print(f"Loaded {len(summary_df)} logs from {summary_logs_path}")

In [None]:
summary_dfs[0].head()

In [None]:
# concat all summary dfs with a column for the experiment name for each summary df
summary_df_all_expts = pd.concat([summary_df.assign(experiment=expt_name) for summary_df, expt_name in zip(summary_dfs, EXPT_NAMES)])

In [None]:
# Filter for high deception and SPEAK actions
high_deception_filtered_df = summary_df_all_expts[
    (summary_df_all_expts['deception'] == '10') & 
    (summary_df_all_expts['action'].str.contains('SPEAK', na=False)) &
    (summary_df_all_expts['action'].str.contains('a lot of movement', na=False))
]

high_deception_filtered_df.shape


In [None]:
pd.set_option('display.max_colwidth', None)


In [None]:
high_deception_filtered_df[['thought', 'action']].head(10)

In [None]:
high_deception_filtered_df.loc[2122]

In [None]:
# reporting 5125 and 2122
