In [1]:
import numpy as np
import torch
import sys
sys.path.append('../')
from voting_games.werewolf_env_v0 import plurality_env, Roles, Phase
from notebooks.learning_agents.models import ActorCriticAgent
from notebooks.learning_agents.utils import play_static_game, play_recurrent_game
from notebooks.learning_agents.static_agents import (
    random_plurality_villager, 
    random_coordinated_plurality_villager, 
    random_agent,
    random_plurality_wolf,
    revenge_plurality_wolf,
    coordinated_revenge_plurality_wolf)
import random
import copy
from tqdm import tqdm
from tabulate import tabulate

  from .autonotebook import tqdm as notebook_tqdm


# Plurality Voting

Plurality is a voting method where each voter picks a single candidate, and the candidate with the most votes is selected. It's simplicity has lead to wide adoption, however there are quite a few drawbacks, one of which being the limited expressibility of a voter.

In our Werewolf plurality implementation, an agent can only select a single target, and cannot express their beliefs towards the remaining agents. This is also the voting mechanism used in every paper involving the Werewolf game up to now, so we want to see how our trained agents compare.


## Win Rates

We want to see how our hand-crafted agents play against eachother, with special interest given to coordinated random villagers and wolves. 

As expected, the coordinated random villagers and wolves performed the best out of the static policies {cite}`braverman2008mafia`.
Our agent, trained against coordinated random wolves, performed better than all of our hand-crafted villager policies. They also generalized well against our other wolf policies, and actually had the highest win rates across the board.

Below is a generated table of 1000 runs between each villager policy and each werewolf policy. 

In [6]:
env = plurality_env(num_agents=10, werewolves=2, num_accusations=2)
observations, _, _, _, _ = env.reset()

obs_size= env.convert_obs(observations['player_0']['observation']).shape[-1]

trained_plurality_agent = ActorCriticAgent({"rec_hidden_size": 128,
                                        "rec_layers": 1, 
                                        "joint_mlp_size": 128,
                                        "split_mlp_size": 128,
                                        "num_votes": 1,
                                        "approval_states": 10},
                                        num_players=10,
                                        obs_size=obs_size)
trained_plurality_agent.load_state_dict(torch.load("../notebooks/stored_agents/lstm_first_no_one_hot_128_128/plurality_agent_10_score_46"))

num_games = 1000
print(f'10 players, with 2 wolves - number of games played : {num_games} \n')

rv_wins = []
rv_replays = []
for wolf_policy in [random_agent, random_plurality_wolf, revenge_plurality_wolf, coordinated_revenge_plurality_wolf]:
    wins, replays = play_static_game(env, wolf_policy, random_agent, num_times=num_games)
    rv_wins.append(wins/float(num_games))
    rv_replays.append(replays)

rpv_wins = []
rpv_replays = []
for wolf_policy in [random_agent, random_plurality_wolf, revenge_plurality_wolf, coordinated_revenge_plurality_wolf]:
    wins, replays = play_static_game(env, wolf_policy, random_plurality_villager, num_times=num_games)
    rpv_wins.append(wins/float(num_games))
    rpv_replays.append(replays)

cpv_wins = []
cpv_replays = []
for wolf_policy in [random_agent, random_plurality_wolf, revenge_plurality_wolf, coordinated_revenge_plurality_wolf]:
    wins, replays = play_static_game(env, wolf_policy, random_coordinated_plurality_villager, num_times=num_games)
    cpv_wins.append(wins/float(num_games))
    cpv_replays.append(replays)

tpv_wins = []
tpv_replays = []
for wolf_policy in [random_agent, random_plurality_wolf, revenge_plurality_wolf, coordinated_revenge_plurality_wolf]:
    # wins, replays = play_static_game(env, wolf_policy, random_agent, num_times=num_games)[0]/float(num_games)
    wins, replays = play_recurrent_game(env, wolf_policy, trained_plurality_agent, num_times=num_games, hidden_state_size=128, voting_type="plurality")
    tpv_wins.append(wins/float(num_games))
    tpv_replays.append(replays)

print(tabulate([['Totally Random', *rv_wins], 
                ['Random Targetting of living villagers', *rpv_wins], 
                ['Coorindated random targetting', *cpv_wins], 
                ['Trained villagers', *tpv_wins]], 
               headers=["Villager Strategy", 
                        "Totally Random Wolves", 
                        "Coordinated Random Wolves", 
                        "Revenge Wolves",
                        "Coordinated Revenge Wolves"]))

10 players, with 2 wolves - number of games played : 1000 

Villager Strategy                        Totally Random Wolves    Coordinated Random Wolves    Revenge Wolves    Coordinated Revenge Wolves
-------------------------------------  -----------------------  ---------------------------  ----------------  ----------------------------
Totally Random                                           0.597                        0.042             0.076                         0.074
Random Targetting of living villagers                    0.705                        0.125             0.192                         0.244
Coorindated random targetting                            0.653                        0.314             0.304                         0.281
Trained villagers                                        0.83                         0.473             0.45                          0.503


## Indicators
