In [1]:
import numpy as np
import torch
import sys
sys.path.append('../')
from voting_games.werewolf_env_v0 import plurality_env, pare, Phase, Roles
import random
import copy
from tqdm import tqdm
from collections import Counter
import matplotlib.pyplot as plt
from notebooks.learning_agents.models import ActorCriticAgent
from notebooks.learning_agents.utils import play_recurrent_game
from notebooks.learning_agents.static_agents import random_approval_wolf, random_plurality_wolf
import notebooks.learning_agents.stats as indicators 


  from .autonotebook import tqdm as notebook_tqdm


# Gameplay Indicators

Now that we have trained agents, we want to see what kind of indicators possibly match up with learned voting behaviors, and how explainable they are. We do this for both plurality and approval voting mechanisms. 


```{note}
The way the environment stores history is slightly different than observations. Whereas the latter stores the prior votes, env.history steps have the votes and the outcomes that occured at that particular day/phase/round.
```

## Load up data

We are going to use replays from our trained agents to investigate these various markers. 1000 games of each voting type will be used.

### Plurality Agent

In [2]:
env = plurality_env(num_agents=10, werewolves=2, num_accusations=2)
observations, _, _, _, _ = env.reset()

obs_size= env.convert_obs(observations['player_0']['observation']).shape[-1]

untrained_plurality_agent = ActorCriticAgent({"rec_hidden_size": 128, 
                                        "rec_layers": 1,
                                        "joint_mlp_size": 128,
                                        "split_mlp_size": 128,
                                        "num_votes": 1,
                                        "approval_states": 10},
                                        num_players=10,
                                        obs_size=obs_size)

trained_plurality_agent = ActorCriticAgent({"rec_hidden_size": 128,
                                        "rec_layers": 1, 
                                        "joint_mlp_size": 128,
                                        "split_mlp_size": 128,
                                        "num_votes": 1,
                                        "approval_states": 10},
                                        num_players=10,
                                        obs_size=obs_size)
trained_plurality_agent.load_state_dict(torch.load("stored_agents/lstm_first_no_one_hot_128_128/plurality_agent_10_score_46"))

# random_agent = None

trained_plurality_wins, trained_plurality_replays = play_recurrent_game(env, random_plurality_wolf, trained_plurality_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")
untrained_plurality_wins, untrained_plurality_replays = play_recurrent_game(env, random_plurality_wolf, untrained_plurality_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")
# random_wins, random_replays = play_recurrent_game_w_replays(env, random_coordinated_single_wolf, random_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")

trained_plurality_villager_wins = [r for r in trained_plurality_replays if r[-1]["winners"] == Roles.VILLAGER]
print(f'Trained villagers won {trained_plurality_wins} games')
untrained_plurality_villager_wins = [r for r in untrained_plurality_replays if r[-1]["winners"] == Roles.VILLAGER]
print(f'Untrained villagers won {untrained_plurality_wins} games')

Trained villagers won 486 games
Untrained villagers won 47 games


### Approval Agent

In [3]:
env = pare(num_agents=10, werewolves=2, num_accusations=2)
observations, _, _, _, _ = env.reset()

obs_size= env.convert_obs(observations['player_0']['observation']).shape[-1]
observations['player_0']['observation']

untrained_approval_agent = ActorCriticAgent({"rec_hidden_size": 256, 
                                        "rec_layers": 1,
                                        "joint_mlp_size": 128,
                                        "split_mlp_size": 128,
                                        "num_votes": 10,
                                        "approval_states": 3},
                                        num_players=10,
                                        obs_size=obs_size)

trained_approval_agent = ActorCriticAgent({"rec_hidden_size": 256,
                                        "rec_layers": 1, 
                                        "joint_mlp_size": 128,
                                        "split_mlp_size": 128,
                                        "num_votes": 10,
                                        "approval_states": 3},
                                        num_players=10,
                                        obs_size=obs_size)
trained_approval_agent.load_state_dict(torch.load("stored_agents/lstm_first_no_one_hot_256_128/approval_agent_10_score_49"))

# random_agent = None

trained_approval_wins, trained_approval_replays = play_recurrent_game(env, random_approval_wolf, trained_approval_agent, num_times=1000, hidden_state_size=256, voting_type="approval")
untrained_approval_wins, untrained_approval_replays = play_recurrent_game(env, random_approval_wolf, untrained_approval_agent, num_times=1000, hidden_state_size=256, voting_type="approval")
# random_wins, random_replays = play_recurrent_game_w_replays(env, random_coordinated_single_wolf, random_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")

trained_approval_villager_wins = [r for r in trained_approval_replays if r[-1]["winners"] == Roles.VILLAGER]
print(f'Trained villagers won {trained_approval_wins} games')
untrained_approval_villager_wins = [r for r in untrained_approval_replays if r[-1]["winners"] == Roles.VILLAGER]
print(f'Untrained villagers won {untrained_approval_wins} games')


Trained villagers won 507 games
Untrained villagers won 62 games


## Behavioral Indicators

### Days elapsed before a villager win

Looking at the average amount of days elapsed before villagers win is a metric that highlights positive learning and collaboration trends


In [6]:
print("Average amount of days until a win is achieved by villagers in plurality games")
print(f'\t Trained villagers : {np.mean([villager_win[-1]["day"] for villager_win in trained_plurality_villager_wins]):2f}')
print(f'\t Untrained villagers : {np.mean([villager_win[-1]["day"] for villager_win in untrained_plurality_villager_wins]):2f}')

print("\n")

print("Average amount of days until a win is achieved by villagers in approval games")
print(f'\t Trained villagers : {np.mean([villager_win[-1]["day"] for villager_win in trained_approval_villager_wins]):2f}')
print(f'\t Untrained villagers : {np.mean([villager_win[-1]["day"] for villager_win in untrained_approval_villager_wins]):2f}')

Average amount of days until a win is achieved by villagers in plurality games
	 Trained villagers : 2.987368
	 Untrained villagers : 3.076923


Average amount of days until a win is achieved by villagers in approval games
	 Trained villagers : 2.970833
	 Untrained villagers : 3.376471


### Days between wolf executions

Looking at the distance in days between wolf executions also highlights positive trends in learning and collaboration, as the lower the number, the more likely villagers were able to confidently coordinate and identify the wolves.

In [7]:
print("Average amount of days until the next wolf was killed in plurality games for 2 wolf environments")
wolf_execution_days = \
    [indicators._when_did_wolves_get_killed(trained_villager_win) for trained_villager_win in trained_plurality_villager_wins]
wolf_execution_duration_between = [b-a for a,b in wolf_execution_days]
print(f'\tDays between wolf kills for trained agents : {np.mean(wolf_execution_duration_between):.3f}')

wolf_execution_days = \
    [indicators._when_did_wolves_get_killed(untrained_villager_win) for untrained_villager_win in untrained_plurality_villager_wins]
wolf_execution_duration_between = [b-a for a,b in wolf_execution_days]
print(f'\tDays between wolf kills for untrained agents : {np.mean(wolf_execution_duration_between):.3f}')

print("\n")

print("Average amount of days until the next wolf was killed in approval games for 2 wolf environments")
wolf_execution_days = \
    [indicators._when_did_wolves_get_killed(trained_villager_win) for trained_villager_win in trained_approval_villager_wins]
wolf_execution_duration_between = [b-a for a,b in wolf_execution_days]
print(f'\tDays between wolf kills for trained agents : {np.mean(wolf_execution_duration_between):.3f}')

wolf_execution_days = \
    [indicators._when_did_wolves_get_killed(untrained_villager_win) for untrained_villager_win in untrained_approval_villager_wins]
wolf_execution_duration_between = [b-a for a,b in wolf_execution_days]
print(f'\tDays between wolf kills for untrained agents : {np.mean(wolf_execution_duration_between):.3f}')

Average amount of days until the next wolf was killed in plurality games for 2 wolf environments
	Days between wolf kills for trained agents : 1.581
	Days between wolf kills for untrained agents : 1.667


Average amount of days until the next wolf was killed in approval games for 2 wolf environments
	Days between wolf kills for trained agents : 1.448
	Days between wolf kills for untrained agents : 1.659


### Targetting Indicators

Picking the right indicators to try and describe targetting behavior is not straightforward, and differs between plurality and approval voting. Below are the ones currently chosen for both game types, along with a rendering of them across days and phases in a randomly selected game.

#### Plurality
        target_record[step['day']].append([unique_villager_votes,
                                           avg_self_vote,
                                           percent_of_villagers_targetting_wolves, 
                                           percent_of_villagers_targetting_dead_players, 
                                           percent_of_villagers_targetting_a_dead_wol


In [4]:
record = indicators._plurality_target_indicators(trained_plurality_villager_wins[0], verbose=True)

Wolves : ['player_0', 'player_3']

Day : 1 | Phase : 0 | Round : 0
Villager votes : [9, 9, 9, 3, 9, 9, 5, 2]
	 | - Ratio of unique players targetted : 0.5
	 | - 0.125 of the votes targetting wolves
	 | - 0.125 of villagers targetting themselves
	 | - 0.000 share of villager votes targetting dead players
	 | - 0.000 share of villager votes targetting dead wolves

Day : 1 | Phase : 0 | Round : 1
Villager votes : [3, 9, 9, 7, 7, 8, 6, 7]
	 | - Ratio of unique players targetted : 0.625
	 | - 0.125 of the votes targetting wolves
	 | - 0.125 of villagers targetting themselves
	 | - 0.000 share of villager votes targetting dead players
	 | - 0.000 share of villager votes targetting dead wolves

Day : 1 | Phase : 1 | Round : 0
Villager votes : [2, 8, 9, 8, 9, 0, 3, 8]
	 | - Ratio of unique players targetted : 0.625
	 | - 0.250 of the votes targetting wolves
	 | - 0.0 of villagers targetting themselves
	 | - 0.000 share of villager votes targetting dead players
	 | - 0.000 share of villager vot

In [5]:
avg_records = indicators._game_avg_records(trained_plurality_villager_wins, indicators._plurality_target_indicators)
avg_records_untrained = indicators._game_avg_records(untrained_plurality_villager_wins, indicators._plurality_target_indicators)

In [6]:
stacked = np.stack(list(avg_records.values()))
stacked_untracked = np.stack(list(avg_records_untrained.values()))

print(stacked[:,:,:])

print(stacked[:,:,0])

print(stacked[:,:,0][0])
print(stacked[:,:,0][:,0])

[[[0.5627572  0.04603909 0.21656379 0.         0.        ]
  [0.53986626 0.04063786 0.37757202 0.         0.        ]
  [0.47299383 0.03009259 0.54346708 0.         0.        ]]

 [[0.5930825  0.04580639 0.31334509 0.22050754 0.09082892]
  [0.53434254 0.04805996 0.39506173 0.07975701 0.02704292]
  [0.49848129 0.04600235 0.44816774 0.05070547 0.01646091]]

 [[0.64127726 0.04376947 0.28504673 0.41417445 0.07102804]
  [0.61464174 0.05264798 0.39813084 0.23084112 0.03738318]
  [0.57866044 0.05249221 0.48364486 0.155919   0.02367601]]

 [[0.73763441 0.02580645 0.31397849 0.61935484 0.07311828]
  [0.75913978 0.03870968 0.4688172  0.42150538 0.0516129 ]
  [0.65806452 0.01290323 0.65806452 0.28387097 0.02365591]]]
[[0.5627572  0.53986626 0.47299383]
 [0.5930825  0.53434254 0.49848129]
 [0.64127726 0.61464174 0.57866044]
 [0.73763441 0.75913978 0.65806452]]
[0.5627572  0.53986626 0.47299383]
[0.5627572  0.5930825  0.64127726 0.73763441]


#### Approval

In [27]:
def approval_target_indicators(game, verbose=False):
    wolves = game[0]['werewolves']
    villagers = game[0]['villagers']

    # this will be an object of lists with each list containing the accusation and voting stats for the day
    target_record = {}
    
    vote_rounds = []
    for i, step in enumerate(game):

        if step['phase'] == Phase.NIGHT or i == 0:
            continue
        if step["phase"] == Phase.VOTING:
            vote_rounds.append(step)
        if step['day'] not in target_record.keys():
            target_record[step['day']] = []

        villager_votes = [vote for player, vote in step['votes'].items() if player not in wolves]
        all_votes = list(step['votes'].values())

        villager_targets = [np.where(np.array(villager_vote) == -1)[0] for villager_vote in villager_votes]
        villager_likes = [np.where(np.array(villager_vote) == 1)[0] for villager_vote in villager_votes]
        villager_neutrals = [np.where(np.array(villager_vote) == 0)[0] for villager_vote in villager_votes]

        v_target_counter = Counter(np.concatenate(villager_targets))
        v_like_counter = Counter(np.concatenate(villager_likes))
        v_neutral_counter = Counter(np.concatenate(villager_neutrals))

        ## AVERAGE UNIQUE TARGETS, LIKES, NEUTRALS ## 
        v_avg_target_count = np.mean([len(targets) for targets in villager_targets])
        v_avg_like_count = np.mean([len(targets) for targets in villager_likes])
        v_avg_neutral_count = np.mean([len(targets) for targets in villager_neutrals])

        # do villagers target themselves and or like themselves
        avg_vself_target = sum([1 for k,v in step['votes'].items() if v[int(k.split("_")[-1])] == -1 and k not in wolves]) / float(len(villager_votes))
        avg_vself_like = sum([1 for k,v in step['votes'].items() if v[int(k.split("_")[-1])] == 1 and k not in wolves]) / float(len(villager_votes))

        most_common_n_targets = int(len(v_target_counter)*0.3)
        most_common_n_likes = int(len(v_like_counter)*0.3)

        wolves_in_most_common_targets =\
            [int(wolf.split("_")[-1]) for wolf in wolves if int(wolf.split("_")[-1]) in [idx for idx, _ in v_target_counter.most_common(max(1,most_common_n_targets))]]

        wolves_in_most_common_likes =\
            [int(wolf.split("_")[-1]) for wolf in wolves if int(wolf.split("_")[-1]) in [idx for idx, _ in v_like_counter.most_common(max(1,most_common_n_likes))]]

        if step["phase"] == Phase.VOTING:
            if len(vote_rounds) == 1:
                dead_players = []
                dead_wolves = []
                dead_villagers = []
            else:
                dead_players = list((set(step['executed']) & set(vote_rounds[-2]['executed'])) | set(step['killed']))
                dead_wolves = list(set(wolves) & set(dead_players))
                dead_villagers = list(set(villagers) & set(dead_players))
        else:
            dead_players = list(set(step['executed']) | set(step['killed']))
            dead_wolves = list(set(wolves) & set(dead_players))
            dead_villagers = list(set(villagers) & set(dead_players))
        
        # do the most liked individuals also get the least amount of votes?
        total_target_votes = sum(v_target_counter.values())
        total_like_votes = sum(v_like_counter.values())

        # target percentages
        percent_of_vtargets_toward_dead_players = sum([v_target_counter[int(dead_player.split("_")[-1])] for dead_player in dead_players]) / float(total_target_votes)
        percent_of_vtargets_toward_wolves = sum([v_target_counter[int(wolf.split("_")[-1])] for wolf in wolves]) / float(total_target_votes)
        percent_of_vtargets_toward_dead_wolves = sum([v_target_counter[int(dead_wolf.split("_")[-1])] for dead_wolf in dead_wolves]) / float(total_target_votes)
        percent_of_vtargets_toward_alive_wolves = sum([v_target_counter[int(wolf.split("_")[-1])] for wolf in wolves if wolf not in dead_wolves]) / float(total_target_votes)

        # how many likes are for other trusted villagers?
        percentage_of_vlikes_for_alive_villagers = sum([v_like_counter[int(villager.split("_")[-1])] for villager in villagers if villager not in dead_villagers]) / float(total_like_votes)
        percentage_of_vlikes_for_dead_villagers = sum([v_like_counter[int(dead_villager.split("_")[-1])] for dead_villager in dead_villagers]) / float(total_like_votes)

        percentage_of_vlikes_for_dead_wolves = sum([v_like_counter[int(dead_wolf.split("_")[-1])] for dead_wolf in dead_wolves]) / float(total_like_votes)
        percentage_of_vlikes_for_alive_wolves = sum([v_like_counter[int(wolf.split("_")[-1])] for wolf in wolves if wolf not in dead_wolves]) / float(total_like_votes)


        # TODO: DO I repeat the above for numbers in the top n votes?
        target_record[step['day']].append([v_avg_target_count,
                                           v_avg_like_count,
                                           v_avg_neutral_count,
                                           avg_vself_target,
                                           avg_vself_like,
                                           most_common_n_targets,
                                           len(wolves_in_most_common_targets),
                                           most_common_n_likes,
                                           len(wolves_in_most_common_likes),
                                           percent_of_vtargets_toward_dead_players,
                                           percent_of_vtargets_toward_wolves,
                                           percent_of_vtargets_toward_dead_wolves,
                                           percent_of_vtargets_toward_alive_wolves,
                                           percentage_of_vlikes_for_alive_villagers,
                                           percentage_of_vlikes_for_dead_villagers,
                                           percentage_of_vlikes_for_dead_wolves,
                                           percentage_of_vlikes_for_alive_wolves,
                                           ])
        

        if verbose:
            phase_name = "Voting Phase" if step['phase'] == Phase.VOTING else "Accusation Phase"
            print(f'Day : {step["day"]} | Phase : {step["phase"]} - {phase_name} | Round : {step["round"]}')
            print(f'\t | - avg targetted {v_avg_target_count:.2f}, liked {v_avg_like_count:.2f}, neutral {v_avg_neutral_count:.2f}, with {avg_vself_target:.2f} share of villagers targetting themselves, and {avg_vself_like:.2f} liking themselves')
            print(f'\t | -{len(wolves_in_most_common_targets)} wolves targetted in top {most_common_n_targets} votes')
            print(f'\t | -{len(wolves_in_most_common_likes)} wolves liked in top {most_common_n_likes} likes')
            print(f'\t | - % of votes towards dead players ({percent_of_vtargets_toward_dead_players:.2f}), towards dead wolves ({percent_of_vtargets_toward_dead_wolves:.2f}), towards wolves ({percent_of_vtargets_toward_wolves:.2f}), towards living wolves ({percent_of_vtargets_toward_alive_wolves:.2f})')
            print(f'\t | - % of likes towards dead wolves ({percentage_of_vlikes_for_dead_wolves:.2f}), towards alive wolves ({percentage_of_vlikes_for_alive_wolves:.2f})')
            print(f'\t | - % of likes towards dead villagers ({percentage_of_vlikes_for_dead_villagers:.2f}), towards alive villagers ({percentage_of_vlikes_for_alive_villagers:.2f})')
            print("\n")

    return target_record

In [28]:
_ = approval_target_indicators(trained_approval_villager_wins[0], verbose=True)

Day : 1 | Phase : 0 - Accusation Phase | Round : 0
	 | - avg targetted 4.00, liked 3.00, neutral 3.00, with 0.38 share of villagers targetting themselves, and 0.25 liking themselves
	 | -0 wolves targetted in top 3 votes
	 | -2 wolves liked in top 2 likes
	 | - % of votes towards dead players (0.00), towards dead wolves (0.00), towards wolves (0.06), towards living wolves (0.06)
	 | - % of likes towards dead wolves (0.00), towards alive wolves (0.38)
	 | - % of likes towards dead villagers (0.00), towards alive villagers (0.62)


Day : 1 | Phase : 0 - Accusation Phase | Round : 1
	 | - avg targetted 2.88, liked 3.88, neutral 3.25, with 0.38 share of villagers targetting themselves, and 0.50 liking themselves
	 | -0 wolves targetted in top 3 votes
	 | -2 wolves liked in top 3 likes
	 | - % of votes towards dead players (0.00), towards dead wolves (0.00), towards wolves (0.13), towards living wolves (0.13)
	 | - % of likes towards dead wolves (0.00), towards alive wolves (0.35)
	 | - % o

In [29]:
game_avg_records(trained_approval_villager_wins, approval_target_indicators)

{1: array([[3.43932292, 3.28229167, 3.27838542, 0.35833333, 0.31536458,
         2.65208333, 0.47916667, 2.58958333, 0.59583333, 0.        ,
         0.191969  , 0.        , 0.191969  , 0.79175278, 0.        ,
         0.        , 0.20824722],
        [3.31640625, 3.28385417, 3.39973958, 0.33489583, 0.31276042,
         2.58958333, 0.57708333, 2.62291667, 0.58541667, 0.        ,
         0.20404595, 0.        , 0.20404595, 0.79348917, 0.        ,
         0.        , 0.20651083],
        [3.23020833, 3.39375   , 3.37604167, 0.30052083, 0.34401042,
         2.48541667, 1.05416667, 2.57708333, 0.31666667, 0.        ,
         0.29600124, 0.        , 0.29600124, 0.83735535, 0.        ,
         0.        , 0.16264465]]),
 2: array([[3.46180556, 3.21016865, 3.32802579, 0.35744048, 0.30897817,
         2.475     , 0.48333333, 2.39583333, 0.55208333, 0.2074227 ,
         0.19147725, 0.05698298, 0.13449426, 0.64806072, 0.13840442,
         0.05468132, 0.15885354],
        [3.35887897, 3.26502

### Ties

Ties are quite common, and could possibly be used strategically. Knowning when/if ties are occuring could possibly lead to a better understanding of agent voting patterns.

What we are currenly looking for is:
- What percentage of voting rounds are ties?
- How often do ties in accusation rounds lead to ties in voting rounds?
- If a wolf gets lucky and survives a tied voting round, how likey is it they get executed the next voting round?

There are two functions we use to achieve this:
- `indicators._game_tie_info(game_replay, voting_type=None)` which returns if there was a tie, if and which wolf was targetted, and if a wolf died during the phase. This is done for every day and every phase in a game
- `indicators._process_tie_info(tie_records)` takes the results above and returns:
    - percentage of ties in accusation phases per game
    - percentage of ties in voting phases per game
    - likelihood of a tie in a voting phase given a tie in the prior accusation phases
    - likelihood of a wolf getting targetting in a subsequent voting round after getting lucky and surviving a tie round where they were a target

In [20]:
tgps = \
    np.nanmean(np.stack([indicators._process_tie_info(indicators._game_tie_info(trained_villager_win, voting_type="plurality")) for trained_villager_win in trained_plurality_villager_wins]),axis= 0)
tgas = \
    np.nanmean(np.stack([indicators._process_tie_info(indicators._game_tie_info(trained_villager_win, voting_type="approval")) for trained_villager_win in trained_approval_villager_wins]), axis=0)

print("Plurality tie indicators")
print(f'\tLikelihood of ties in accusation phases : {tgps[0]:.2f}')
print(f'\tLikelihood of ties in voting phases : {tgps[1]:.2f}')
print(f'\tLikelihood of a tie in a voting phase given a tie in the prior accusation phases {tgps[2]:.2f}')
print(f'\tLikelihood of a wolf getting targetting in a subsequent voting round if they survived a tie : {tgps[3]:.2f}')
print("\n")
print("Approval tie indicators")
print(f'\tLikelihood of ties in accusation phases : {tgas[0]:.2f}')
print(f'\tLikelihood of ties in voting phases : {tgas[1]:.2f}')
print(f'\tLikelihood of a tie in a voting phase given a tie in the prior accusation phases {tgas[2]:.2f}')
print(f'\tLikelihood of a wolf getting targetting in a subsequent voting round if they survived a tie : {tgas[3]:.2f}')


Plurality tie indicators
	Likelihood of ties in accusation phases : 0.29
	Likelihood of ties in voting phases : 0.19
	Likelihood of a tie in a voting phase given a tie in the prior accusation phases 0.21
	Likelihood of a wolf getting targetting in a subsequent voting round if they survived a tie : 0.77


Approval tie indicators
	Likelihood of ties in accusation phases : 0.36
	Likelihood of ties in voting phases : 0.31
	Likelihood of a tie in a voting phase given a tie in the prior accusation phases 0.32
	Likelihood of a wolf getting targetting in a subsequent voting round if they survived a tie : 0.63
