# Analysis: Agent social problem solving ability

How well do current pretrained LLMs do at social problem solving?

## Run experiment

In [None]:
from pathlib import Path

from src.env import Env
from src.state import EnvState, AgentState
from src.actionmodel import RandomActionModel, GPT4Model, BaseActionModel
from src.experiment import Experiment

In [None]:
n_agents = 4
n_rounds = 10
energy_usage_rate = 100
starting_agent_energy = 300
total_energy = n_agents * n_rounds * energy_usage_rate


def env_factory_with_action_model(model_name: str, model: BaseActionModel | None = None):
    if model:
            Env.register_model(model_name=model_name, model=model)
    def factory():
        agents = [
            AgentState(id=f"agent_{i}", energy=starting_agent_energy, model=model_name)
            for i in range(n_agents)
        ]
        env = Env(
            EnvState(
                available_energy=(total_energy - starting_agent_energy * n_agents),
                energy_usage_rate=energy_usage_rate,
                agents=agents,
                n_rounds=n_rounds,
                receiver_agent=agents[0].id,
            )
        )
        return env
    return factory

experiment = Experiment(
    name="random_vs_always_skip_vs_gpt_4_1_nano",
    n_replications=10,
    variants={
        "random_strategy": env_factory_with_action_model("random", RandomActionModel(trxn_amount=100)),
        "always_skip_strategy": env_factory_with_action_model("always-skip"),
        "gpt_4_1_nano": env_factory_with_action_model("gpt-4.1-nano", GPT4Model(model_name="gpt-4.1-nano-2025-04-14")),
        "gpt_4_1": env_factory_with_action_model("gpt-4.1"),
    }
)


In [None]:
# experiment.run(log_dir=Path.cwd() / "data/")

## Analyze experiments

Questions:
- How does performance at this social game compare across action models?
- How does the distribution of energy (across agents over time) compare across action models?
- How does altruistic vs self-serving behaviour compare across action models?
    - What is the rate of giving?
    - What is the rate of taking?
    - What is the ratio of current energy to energy given?
    - What is the ratio of current energy to energy taken?

### Read experiment data

Tables:
- ExperimentTrials:
    - Experiment name
    - Variant
    - Trial
    - Score
    - Max score

- ActionLogs:
    - Experiment name
    - Variant
    - Trial
    - Agent
    - Round
    - Action
    - Current energy

In [None]:
from src.event import EventUnion

def trial_metrics(init_state: EnvState, events: list[EventUnion]):
    from src.event import ActionEvent, StartTurnEvent, EndRoundEvent, GameOverEvent

    trial_data = {"score": 0, "actions": [], "energy_dist": []}      
    def log_action(state: EnvState, event: EventUnion):
        if isinstance(event, ActionEvent):
            agent = state.get_agent(event.agent_id)
            if agent:
                action_data = {
                    "agent_id": event.agent_id,
                    "round": state.current_round,
                    "action": event.action,
                    "agent_energy": agent.energy,
                }
                trial_data["actions"].append(action_data)

    def log_game_score_increment(state: EnvState, event: EventUnion):
        if isinstance(event, StartTurnEvent):
            trial_data["score"] = trial_data.get("score", 0) + 1

    def log_energy_distribution(state: EnvState, event: EventUnion):
        if isinstance(event, (EndRoundEvent, GameOverEvent)):
            trial_data["energy_dist"].append({
                "round": state.current_round,
                "energy": [agent.energy for agent in state.agents],
            })

    env = Env(init_state)
    env.apply_events(
        events,
        pre_apply_callbacks=[
            log_action, log_game_score_increment, log_energy_distribution
        ]
    )
    
    return trial_data

In [None]:
experiment_dir = Path("/Users/kevinchan/agentsocialbench/data/random_vs_always_skip_vs_gpt_4_1_nano_20250802151318")
experiment_data = experiment.experiment_results(trial_metrics, experiment_dir)

#### How does performance at this social game compare across action models?

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


perf_df = pd.DataFrame(
    [
        {
            "variant": variant_name,
            "trial": trial["trial_num"],
            "score": trial["metrics"]["score"],
        }
        for (variant_name, trials) in experiment_data["variants"].items()
        for trial in trials
    ]
)

g = sns.catplot(perf_df, kind="bar", x="variant", y="score")
g.figure.subplots_adjust(top=0.9)
g.figure.suptitle("Performance by strategy");

In [None]:
agent_survival_df = pd.DataFrame([
    {
        "variant": variant_name,
        "trial": trial["trial_num"],
        "round": rnd["round"],
        "survivor_count": sum(1 if energy > 0 else 0 for energy in rnd["energy"]),
    }
    for (variant_name, trials) in experiment_data["variants"].items()
    for trial in trials
    for rnd in trial["metrics"]["energy_dist"]
])

# plot each agent's energy over time (round) as a line (trial), faceted by strategy (col)
g = sns.relplot(
    agent_survival_df,
    kind="line",
    x="round",
    y="survivor_count",
    col="variant",
    col_wrap=2,
    units="trial",
    estimator=None,
)
for ax in g.axes:
    ax.set_ylim(bottom=0)
g.figure.subplots_adjust(top=0.9)
g.figure.suptitle("Agent survival count over time across strategy variants");

### How does the distribution of energy (across agents over time) compare across action models?

In [None]:
agent_energy_df = pd.DataFrame([
    {
        "variant": variant_name,
        "trial": trial["trial_num"],
        "round": rnd["round"],
        "agent": i,
        "energy": energy,
    }
    for (variant_name, trials) in experiment_data["variants"].items()
    for trial in trials
    for rnd in trial["metrics"]["energy_dist"]
    for i, energy in enumerate(rnd["energy"])
])

# plot each agent's energy over time (round) as a line (trial), faceted by strategy (col)
g = sns.relplot(
    agent_energy_df,
    kind="line",
    x="round",
    y="energy",
    hue="agent",
    col="variant",
    col_wrap=2,
    units="trial",
    estimator=None,
)
g.map(plt.axhline, y=0, ls='--', color="grey")
g.figure.subplots_adjust(top=0.9)
g.figure.suptitle("Agent energy over time across strategy variants");

#### How does altruistic vs self-serving behaviour compare across action models?

##### What is the distribution of actions across variants?

In [None]:
actions_df = pd.DataFrame([
    {
        "variant": variant_name,
        "trial": trial["trial_num"],
        "round": action["round"],
        "agent_id": action["agent_id"],
        "action": action["action"].action,
    }
    for (variant_name, trials) in experiment_data["variants"].items()
    for trial in trials
    for action in trial["metrics"]["actions"]
])

action_rate_df = (
    actions_df
    .groupby(["variant", "action"])["action"]
    .count()
    .rename("action_count")
    .reset_index()
    .assign(action_rate=lambda x: x["action_count"] / x.groupby(["variant"])["action_count"].transform("sum"))
)

g = sns.catplot(action_rate_df, kind="bar", y="action_rate", x="variant", hue="action")
g.figure.subplots_adjust(top=0.9)
g.figure.suptitle("Action distribution by strategy");

In [None]:
action_rate_by_type_df = (
    actions_df
    .assign(agent_type=lambda x: x["agent_id"].apply(lambda val: "receiver" if val == "agent_0" else "other"))
    .groupby(["variant", "agent_type", "action"])["action"]
    .count()
    .rename("action_count")
    .reset_index()
    .assign(action_rate=lambda x: x["action_count"] / x.groupby(["variant", "agent_type"])["action_count"].transform("sum"))
)

g = sns.catplot(action_rate_by_type_df, kind="bar", y="action_rate", x="variant", hue="action", col="agent_type")
g.figure.subplots_adjust(top=0.9)
g.figure.suptitle("Action distribution by strategy");

##### What is the ratio of current energy to energy given?
When agents give energy, what is the ratio energy given / current energy?

In [None]:
give_ratio_df = pd.DataFrame([
    {
        "variant": variant_name,
        "trial_num": trial["trial_num"],
        "agent_id": action["agent_id"],
        "agent_type": "receiver" if action["agent_id"] == "agent_0" else "other",
        "round": action["round"],
        "give_amount": action["action"].amount,
        "agent_energy": action["agent_energy"],
        "give_ratio": action["action"].amount / action["agent_energy"],
    }
    for (variant_name, trials) in experiment_data["variants"].items()
    for trial in trials
    for action in trial["metrics"]["actions"]
    if action["action"].action == "give"
])

g = sns.catplot(give_ratio_df, y="give_ratio", x="variant", hue="agent_type")

In [None]:
g = sns.catplot(give_ratio_df, y="agent_energy", x="variant", hue="agent_type")

##### What is the ratio of current energy to energy taken?

When agents take energy, what is the ratio energy taken / current energy?

In [None]:
take_ratio_df = pd.DataFrame([
    {
        "variant": variant_name,
        "trial_num": trial["trial_num"],
        "agent_id": action["agent_id"],
        "agent_type": "receiver" if action["agent_id"] == "agent_0" else "other",
        "round": action["round"],
        "take_amount": action["action"].amount,
        "agent_energy": action["agent_energy"],
        "take_ratio": action["action"].amount / action["agent_energy"],
    }
    for (variant_name, trials) in experiment_data["variants"].items()
    for trial in trials
    for action in trial["metrics"]["actions"]
    if action["action"].action == "take"
])

g = sns.catplot(take_ratio_df, y="take_ratio", x="variant", hue="agent_type")

In [None]:
g = sns.catplot(take_ratio_df, y="agent_energy", x="variant", hue="agent_type")