In [1]:
import numpy as np
import torch
import sys
sys.path.append('../')
from voting_games.werewolf_env_v0 import plurality_env, pare, Phase, Roles
import random
import copy
from tqdm import tqdm
from collections import Counter
import matplotlib.pyplot as plt
from notebooks.learning_agents.models import ActorCriticAgent
from notebooks.learning_agents.utils import play_recurrent_game
from notebooks.learning_agents.static_agents import random_approval_wolf, random_plurality_wolf
import notebooks.learning_agents.stats as indicators 


  from .autonotebook import tqdm as notebook_tqdm


# Gameplay Indicators

Now that we have trained agents, we want to see what kind of indicators possibly match up with learned voting behaviors, and how explainable they are. We do this for both plurality and approval voting mechanisms. 


```{note}
The way the environment stores history is slightly different than observations. Whereas the latter stores the prior votes, env.history steps have the votes and the outcomes that occured at that particular day/phase/round.
```

## Load up data

We are going to use replays from our trained agents to investigate these various markers. 1000 games of each voting type will be used.

### Plurality Agent

In [2]:
env = plurality_env(num_agents=10, werewolves=2, num_accusations=2)
observations, _, _, _, _ = env.reset()

obs_size= env.convert_obs(observations['player_0']['observation']).shape[-1]

untrained_plurality_agent = ActorCriticAgent({"rec_hidden_size": 128, 
                                        "rec_layers": 1,
                                        "joint_mlp_size": 128,
                                        "split_mlp_size": 128,
                                        "num_votes": 1,
                                        "approval_states": 10},
                                        num_players=10,
                                        obs_size=obs_size)

trained_plurality_agent = ActorCriticAgent({"rec_hidden_size": 128,
                                        "rec_layers": 1, 
                                        "joint_mlp_size": 128,
                                        "split_mlp_size": 128,
                                        "num_votes": 1,
                                        "approval_states": 10},
                                        num_players=10,
                                        obs_size=obs_size)
trained_plurality_agent.load_state_dict(torch.load("stored_agents/lstm_first_no_one_hot_128_128/plurality_agent_10_score_46"))

# random_agent = None

trained_plurality_wins, trained_plurality_replays = play_recurrent_game(env, random_plurality_wolf, trained_plurality_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")
untrained_plurality_wins, untrained_plurality_replays = play_recurrent_game(env, random_plurality_wolf, untrained_plurality_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")
# random_wins, random_replays = play_recurrent_game_w_replays(env, random_coordinated_single_wolf, random_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")

trained_plurality_villager_wins = [r for r in trained_plurality_replays if r[-1]["winners"] == Roles.VILLAGER]
print(f'Trained villagers won {trained_plurality_wins} games')
untrained_plurality_villager_wins = [r for r in untrained_plurality_replays if r[-1]["winners"] == Roles.VILLAGER]
print(f'Untrained villagers won {untrained_plurality_wins} games')

Trained villagers won 486 games
Untrained villagers won 47 games


### Approval Agent

In [3]:
env = pare(num_agents=10, werewolves=2, num_accusations=2)
observations, _, _, _, _ = env.reset()

obs_size= env.convert_obs(observations['player_0']['observation']).shape[-1]
observations['player_0']['observation']

untrained_approval_agent = ActorCriticAgent({"rec_hidden_size": 256, 
                                        "rec_layers": 1,
                                        "joint_mlp_size": 128,
                                        "split_mlp_size": 128,
                                        "num_votes": 10,
                                        "approval_states": 3},
                                        num_players=10,
                                        obs_size=obs_size)

trained_approval_agent = ActorCriticAgent({"rec_hidden_size": 256,
                                        "rec_layers": 1, 
                                        "joint_mlp_size": 128,
                                        "split_mlp_size": 128,
                                        "num_votes": 10,
                                        "approval_states": 3},
                                        num_players=10,
                                        obs_size=obs_size)
trained_approval_agent.load_state_dict(torch.load("stored_agents/lstm_first_no_one_hot_256_128/approval_agent_10_score_49"))

# random_agent = None

trained_approval_wins, trained_approval_replays = play_recurrent_game(env, random_approval_wolf, trained_approval_agent, num_times=1000, hidden_state_size=256, voting_type="approval")
untrained_approval_wins, untrained_approval_replays = play_recurrent_game(env, random_approval_wolf, untrained_approval_agent, num_times=1000, hidden_state_size=256, voting_type="approval")
# random_wins, random_replays = play_recurrent_game_w_replays(env, random_coordinated_single_wolf, random_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")

trained_approval_villager_wins = [r for r in trained_approval_replays if r[-1]["winners"] == Roles.VILLAGER]
print(f'Trained villagers won {trained_approval_wins} games')
untrained_approval_villager_wins = [r for r in untrained_approval_replays if r[-1]["winners"] == Roles.VILLAGER]
print(f'Untrained villagers won {untrained_approval_wins} games')


Trained villagers won 507 games
Untrained villagers won 62 games


## Behavioral Indicators

### Days elapsed before a villager win

Looking at the average amount of days elapsed before villagers win is a metric that highlights positive learning and collaboration trends


In [6]:
print("Average amount of days until a win is achieved by villagers in plurality games")
print(f'\t Trained villagers : {np.mean([villager_win[-1]["day"] for villager_win in trained_plurality_villager_wins]):2f}')
print(f'\t Untrained villagers : {np.mean([villager_win[-1]["day"] for villager_win in untrained_plurality_villager_wins]):2f}')

print("\n")

print("Average amount of days until a win is achieved by villagers in approval games")
print(f'\t Trained villagers : {np.mean([villager_win[-1]["day"] for villager_win in trained_approval_villager_wins]):2f}')
print(f'\t Untrained villagers : {np.mean([villager_win[-1]["day"] for villager_win in untrained_approval_villager_wins]):2f}')

Average amount of days until a win is achieved by villagers in plurality games
	 Trained villagers : 2.987368
	 Untrained villagers : 3.076923


Average amount of days until a win is achieved by villagers in approval games
	 Trained villagers : 2.970833
	 Untrained villagers : 3.376471


### Days between wolf executions

Looking at the distance in days between wolf executions also highlights positive trends in learning and collaboration, as the lower the number, the more likely villagers were able to confidently coordinate and identify the wolves.

In [7]:
print("Average amount of days until the next wolf was killed in plurality games for 2 wolf environments")
wolf_execution_days = \
    [indicators._when_did_wolves_get_killed(trained_villager_win) for trained_villager_win in trained_plurality_villager_wins]
wolf_execution_duration_between = [b-a for a,b in wolf_execution_days]
print(f'\tDays between wolf kills for trained agents : {np.mean(wolf_execution_duration_between):.3f}')

wolf_execution_days = \
    [indicators._when_did_wolves_get_killed(untrained_villager_win) for untrained_villager_win in untrained_plurality_villager_wins]
wolf_execution_duration_between = [b-a for a,b in wolf_execution_days]
print(f'\tDays between wolf kills for untrained agents : {np.mean(wolf_execution_duration_between):.3f}')

print("\n")

print("Average amount of days until the next wolf was killed in approval games for 2 wolf environments")
wolf_execution_days = \
    [indicators._when_did_wolves_get_killed(trained_villager_win) for trained_villager_win in trained_approval_villager_wins]
wolf_execution_duration_between = [b-a for a,b in wolf_execution_days]
print(f'\tDays between wolf kills for trained agents : {np.mean(wolf_execution_duration_between):.3f}')

wolf_execution_days = \
    [indicators._when_did_wolves_get_killed(untrained_villager_win) for untrained_villager_win in untrained_approval_villager_wins]
wolf_execution_duration_between = [b-a for a,b in wolf_execution_days]
print(f'\tDays between wolf kills for untrained agents : {np.mean(wolf_execution_duration_between):.3f}')

Average amount of days until the next wolf was killed in plurality games for 2 wolf environments
	Days between wolf kills for trained agents : 1.581
	Days between wolf kills for untrained agents : 1.667


Average amount of days until the next wolf was killed in approval games for 2 wolf environments
	Days between wolf kills for trained agents : 1.448
	Days between wolf kills for untrained agents : 1.659


### Targetting Indicators

Picking the right indicators to try and describe targetting behavior is not straightforward, and differs between plurality and approval voting. Below are the ones currently chosen for both game types, along with a rendering of them across days and phases in a randomly selected game.

For further analysis, we also use `indicators._game_avg_records(replays,indicator_function)` to average these values across phases and days for every replay.

#### Plurality

To try an make sense of targetting, we chose to look at:
- Ratio of unique villager targets
- Ratio of villagers voting for themselves
- Percentage of villagers targetting dead players
- Percentage of villager votes targetting wolves and dead wolves

These should be good enough to indicate cooperation as well as general role comprehension.

In [4]:
record = indicators._plurality_target_indicators(trained_plurality_villager_wins[0], verbose=True)

Wolves : ['player_0', 'player_3']

Day : 1 | Phase : 0 | Round : 0
Villager votes : [9, 9, 9, 3, 9, 9, 5, 2]
	 | - Ratio of unique players targetted : 0.5
	 | - 0.125 of the votes targetting wolves
	 | - 0.125 of villagers targetting themselves
	 | - 0.000 share of villager votes targetting dead players
	 | - 0.000 share of villager votes targetting dead wolves

Day : 1 | Phase : 0 | Round : 1
Villager votes : [3, 9, 9, 7, 7, 8, 6, 7]
	 | - Ratio of unique players targetted : 0.625
	 | - 0.125 of the votes targetting wolves
	 | - 0.125 of villagers targetting themselves
	 | - 0.000 share of villager votes targetting dead players
	 | - 0.000 share of villager votes targetting dead wolves

Day : 1 | Phase : 1 | Round : 0
Villager votes : [2, 8, 9, 8, 9, 0, 3, 8]
	 | - Ratio of unique players targetted : 0.625
	 | - 0.250 of the votes targetting wolves
	 | - 0.0 of villagers targetting themselves
	 | - 0.000 share of villager votes targetting dead players
	 | - 0.000 share of villager vot

#### Approval

Because of the extra dimensions and expressability of approval voting, trying to determine behavior for these agents is much harder. We thus collect many different and interelated indicators to see what combinations of them might give us the most insight.

The ones we are currently looking at are:
- average target count
- average like count
- average neutral count
- average self target 
- average self like
- percentage of wolves in top targets
- percentage of wolves in top likes
- percent of votes targetting dead players
- percent of votes targetting wolves by tracking:
    - percent of votes targetting dead wolves
    - percent of votes targetting wolves that are still alive
- percent of likes towards wolves by tracking:
    - percent of likes for dead wolves
    - percent of likes for wolves that are still alive
- percent of likes towards villagers by tracking:
    - percent of likes towards dead villagers
    - perceent of likes towards villagers that are still alive

There might be more complicated tracking indicators that look at changes in between targets, however these have not been implemented yet.

In [8]:
_ = indicators._approval_target_indicators(trained_approval_villager_wins[0], verbose=True)

Day : 1 | Phase : 0 - Accusation Phase | Round : 0
	 | - avg targetted 3.75, liked 3.00, neutral 3.25
	 | - 0.50 share of villagers targeted themselves, and 0.38 liked themselves
	 | - 0.0 wolves targetted in top votes
	 | - 0.0 wolves liked in top likes
	 | - % of votes towards dead players (0.00
	 | - % of votes for dead wolves (0.00), and towards living wolves (0.20)
	 | - % of likes towards dead wolves (0.00) and towards living wolves (0.17)
	 | - % of likes towards dead villagers (0.00), and towards living villagers (0.83)


Day : 1 | Phase : 0 - Accusation Phase | Round : 1
	 | - avg targetted 2.62, liked 3.38, neutral 4.00
	 | - 0.12 share of villagers targeted themselves, and 0.38 liked themselves
	 | - 0.3333333333333333 wolves targetted in top votes
	 | - 0.0 wolves liked in top likes
	 | - % of votes towards dead players (0.00
	 | - % of votes for dead wolves (0.00), and towards living wolves (0.24)
	 | - % of likes towards dead wolves (0.00) and towards living wolves (0.19)

### Ties

Ties are quite common, and could possibly be used strategically. Knowning when/if ties are occuring could possibly lead to a better understanding of agent voting patterns.

What we are currenly looking for is:
- What percentage of voting rounds are ties?
- How often do ties in accusation rounds lead to ties in voting rounds?
- If a wolf gets lucky and survives a tied voting round, how likey is it they get executed the next voting round?

There are two functions we use to achieve this:
- `indicators._game_tie_info(game_replay, voting_type=None)` which returns if there was a tie, if and which wolf was targetted, and if a wolf died during the phase. This is done for every day and every phase in a game
- `indicators._process_tie_info(tie_records)` takes the results above and returns:
    - percentage of ties in accusation phases per game
    - percentage of ties in voting phases per game
    - likelihood of a tie in a voting phase given a tie in the prior accusation phases
    - likelihood of a wolf getting targetting in a subsequent voting round after getting lucky and surviving a tie round where they were a target

In [20]:
tgps = \
    np.nanmean(np.stack([indicators._process_tie_info(indicators._game_tie_info(trained_villager_win, voting_type="plurality")) for trained_villager_win in trained_plurality_villager_wins]),axis= 0)
tgas = \
    np.nanmean(np.stack([indicators._process_tie_info(indicators._game_tie_info(trained_villager_win, voting_type="approval")) for trained_villager_win in trained_approval_villager_wins]), axis=0)

print("Plurality tie indicators")
print(f'\tLikelihood of ties in accusation phases : {tgps[0]:.2f}')
print(f'\tLikelihood of ties in voting phases : {tgps[1]:.2f}')
print(f'\tLikelihood of a tie in a voting phase given a tie in the prior accusation phases {tgps[2]:.2f}')
print(f'\tLikelihood of a wolf getting targetting in a subsequent voting round if they survived a tie : {tgps[3]:.2f}')
print("\n")
print("Approval tie indicators")
print(f'\tLikelihood of ties in accusation phases : {tgas[0]:.2f}')
print(f'\tLikelihood of ties in voting phases : {tgas[1]:.2f}')
print(f'\tLikelihood of a tie in a voting phase given a tie in the prior accusation phases {tgas[2]:.2f}')
print(f'\tLikelihood of a wolf getting targetting in a subsequent voting round if they survived a tie : {tgas[3]:.2f}')


Plurality tie indicators
	Likelihood of ties in accusation phases : 0.29
	Likelihood of ties in voting phases : 0.19
	Likelihood of a tie in a voting phase given a tie in the prior accusation phases 0.21
	Likelihood of a wolf getting targetting in a subsequent voting round if they survived a tie : 0.77


Approval tie indicators
	Likelihood of ties in accusation phases : 0.36
	Likelihood of ties in voting phases : 0.31
	Likelihood of a tie in a voting phase given a tie in the prior accusation phases 0.32
	Likelihood of a wolf getting targetting in a subsequent voting round if they survived a tie : 0.63
