In [1]:
import numpy as np
import torch
import sys
sys.path.append('../')
from voting_games.werewolf_env_v0 import pare, Roles, Phase
from notebooks.learning_agents.models import ActorCriticAgent
from notebooks.learning_agents.utils import play_static_game, play_recurrent_game
from notebooks.learning_agents.static_agents import (
    random_approval_villager, 
    random_coordinated_approval_villager, 
    random_agent,
    random_approval_wolf,
    revenge_approval_wolf,
    coordinated_revenge_approval_wolf,
    random_likes_approval_wolf,
    aggressive_approval_wolf,
    )
import notebooks.learning_agents.stats as indicators
import random
import copy
from matplotlib import pyplot as plt
from tqdm import tqdm
from tabulate import tabulate

  from .autonotebook import tqdm as notebook_tqdm


# Approval Voting

Approval voting is a mechanism in which voters can select as many candidates from a list of all possible candidates to approve of. 


## Win Rates

We assume that some of the findings in the plurality werewolf game {cite}`braverman2008mafia` will still hold for approval voting, due to the nature of calculating the consensus amongst all the targetting done. (Target with the most dislikes gets voted out).


Villager Strategy vs. | [RWolves](rawolves) | [CRWolves](crawolves) | [RevWolves](revawolves) | [CRevWolves](crevawolves) | [CRLWolves](crlawolves) | [AggroWolves](aggrowolves) |
| --- | --- | --- | --- | --- | --- | --- |
Totally Random | | | | | | |
Random Targetting of living villagers |  |  |  |  | | |
Coorindated random targetting |  | |  |  | | |
Trained villagers |  | |  | | | |

In [2]:
env = pare(num_agents=10, werewolves=2, num_accusations=2)
observations, _, _, _, _ = env.reset()
obs_size= env.convert_obs(observations['player_0']['observation']).shape[-1]

trained_approval_agent = ActorCriticAgent({"rec_hidden_size": 256,
                                        "rec_layers": 1, 
                                        "joint_mlp_size": 128,
                                        "split_mlp_size": 128,
                                        "num_votes": 10,
                                        "approval_states": 3},
                                        num_players=10,
                                        obs_size=obs_size)
trained_approval_agent.load_state_dict(torch.load("../notebooks/stored_agents/lstm_first_no_one_hot_256_128/approval_agent_10_score_49"))

num_games = 1000
print(f'10 players, with 2 wolves - number of games played : {num_games} \n')

rv_wins = []
rv_replays = []
for wolf_policy in [random_agent, random_approval_wolf, revenge_approval_wolf, coordinated_revenge_approval_wolf, random_likes_approval_wolf, aggressive_approval_wolf]:
    wins, replays = play_static_game(env, wolf_policy, random_agent, num_times=num_games)
    rv_wins.append(wins/float(num_games))
    rv_replays.append(replays)

rav_wins = []
rav_replays = []
for wolf_policy in [random_agent, random_approval_wolf, revenge_approval_wolf, coordinated_revenge_approval_wolf, random_likes_approval_wolf, aggressive_approval_wolf]:
    wins, replays = play_static_game(env, wolf_policy, random_approval_villager, num_times=num_games)
    rav_wins.append(wins/float(num_games))
    rav_replays.append(replays)

cav_wins = []
cav_replays = []
for wolf_policy in [random_agent, random_approval_wolf, revenge_approval_wolf, coordinated_revenge_approval_wolf, random_likes_approval_wolf, aggressive_approval_wolf]:
    wins, replays = play_static_game(env, wolf_policy, random_coordinated_approval_villager, num_times=num_games)
    cav_wins.append(wins/float(num_games))
    cav_replays.append(replays)

tav_wins = []
tav_replays = []
for wolf_policy in [random_agent, random_approval_wolf, revenge_approval_wolf, coordinated_revenge_approval_wolf, random_likes_approval_wolf, aggressive_approval_wolf]:
    wins, replays = play_recurrent_game(env, wolf_policy, trained_approval_agent, num_times=num_games, hidden_state_size=256, voting_type="approval")
    tav_wins.append(wins/float(num_games))
    tav_replays.append(replays)

print(tabulate([['Random', *rv_wins], 
                ['L-Targets', *rav_wins], 
                ['CL-Targets', *cav_wins], 
                ['Trained-CRWolves', *tav_wins]], 
               headers=["Villager Strategy", 
                        "TRWolves", 
                        "CRWolves", 
                        "RevWolves",
                        "CRevWolves",
                        "CRLWolves",
                        "AggroWolves"]))

10 players, with 2 wolves - number of games played : 1000 

Villager Strategy      TRWolves    CRWolves    RevWolves    CRevWolves    CRLWolves    AgroWolves
-------------------  ----------  ----------  -----------  ------------  -----------  ------------
Random                    0.609       0.075        0.137         0.145        0.084         0.005
L-Targets                 0.672       0.119        0.213         0.184        0.116         0.018
CL-Targets                0.625       0.294        0.314         0.299        0.301         0.304
Trained-CRWolves          0.578       0.48         0.583         0.595        0.168


### Days elapsed before a villager win

### Days between wolf executions

### Ties

### Targetting Indicators