In [2]:
import numpy as np
import torch
import sys
sys.path.append('../')
from voting_games.werewolf_env_v0 import plurality_env, Phase, Roles
import random
import copy
from tqdm import tqdm
from learning_agents.actor_critic_model import ActorCriticAgent

In [4]:
@torch.no_grad()
def play_recurrent_game_w_replays(env, wolf_policy, villager_agent, num_times=10, hidden_state_size=None, voting_type=None):
    wins = 0
    # loop = tqdm(range(num_times))
    game_replays = []

    for _ in range(num_times):
        ## Play the game 
        next_observations, _, _, _, _ = env.reset()
        # init recurrent stuff for actor and critic to 0 as well
        magent_obs = {agent: {'obs': [], 
                              # obs size, and 1,1,64 as we pass batch first
                              'hcxs': [(torch.zeros((1,1,hidden_state_size), dtype=torch.float32), torch.zeros((1,1,hidden_state_size), dtype=torch.float32))],
                    } for agent in env.agents if not env.agent_roles[agent]}

        wolf_brain = {'day': 1, 'phase': 0, 'action': None}

        while env.agents:
            observations = copy.deepcopy(next_observations)
            actions = {}

            villagers = set(env.agents) & set(env.world_state["villagers"])
            wolves = set(env.agents) & set(env.world_state["werewolves"])

            # villagers actions
            for villager in villagers:
                #torch.tensor(env.convert_obs(observations['player_0']['observation']), dtype=torch.float)
                torch_obs = torch.tensor(env.convert_obs(observations[villager]['observation']), dtype=torch.float)
                obs = torch.unsqueeze(torch_obs, 0)

                # TODO: Testing this, we may need a better way to pass in villagers
                recurrent_cell = magent_obs[villager]["hcxs"][-1]
                
                # ensure that the obs is of size (batch,seq,inputs)
                policies, _, recurrent_cell = villager_agent(obs, recurrent_cell)
                _, game_action = villager_agent.get_action_from_policies(policies, voting_type=voting_type)

                if voting_type == "plurality":
                    actions[villager] = game_action.item()
                elif voting_type == "approval":
                    actions[villager] = game_action.tolist()

                #store the next recurrent cells
                magent_obs[villager]["hcxs"].append(recurrent_cell)

            # wolf steps
            day = observations[list(observations)[0]]['observation']['day']
            phase = observations[list(observations)[0]]['observation']['phase']
            
            if wolf_brain['day'] != day or wolf_brain['phase'] == Phase.NIGHT:
                wolf_brain = {'day': day, 'phase': phase, 'action': None}
            
            for wolf in wolves:
                action = wolf_policy(env, wolf, action=wolf_brain['action'])
                wolf_brain['action'] = action
                actions[wolf] = action

            next_observations, _, _, _, _ = env.step(actions)

        ## Fill bigger buffer, keeping in mind sequence
        winner = env.world_state['winners']
        if winner == Roles.VILLAGER:
            wins += 1

        game_replays.append(copy.deepcopy(env.history))

        # loop.set_description(f"Villagers won {wins} out of a total of {num_times} games")
    
    return wins, game_replays

In [7]:
def random_coordinated_single_wolf(env, agent, action=None):
    villagers_remaining = set(env.world_state["villagers"]) & set(env.world_state['alive'])
    return action if action != None else int(random.choice(list(villagers_remaining)).split("_")[-1])

In [10]:
env = plurality_env(num_agents=10, werewolves=2)
observations, _, _, _, _ = env.reset()

obs_size= env.convert_obs(observations['player_0']['observation']).shape[-1]
print(obs_size)
observations['player_0']['observation']

untrained_agent = ActorCriticAgent({"rec_hidden_size": 128, 
                                        "rec_layers": 1, 
                                        "hidden_mlp_size": 128,
                                        "num_votes": 1,
                                        "approval_states": 10},
                                        num_players=10,
                                        obs_size=obs_size)

trained_agent = ActorCriticAgent({"rec_hidden_size": 128, 
                                        "rec_layers": 1, 
                                        "hidden_mlp_size": 128,
                                        "num_votes": 1,
                                        "approval_states": 10},
                                        num_players=10,
                                        obs_size=obs_size)
trained_agent.load_state_dict(torch.load("stored_agents/plurality_agent_10_score_50"))

random_agent = None

trained_wins, trained_replays = play_recurrent_game_w_replays(env, random_coordinated_single_wolf, trained_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")
untrained_wins, untrained_replays = play_recurrent_game_w_replays(env, random_coordinated_single_wolf, untrained_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")


33


In [9]:
print(trained_wins)
print(untrained_wins)

4

In [None]:
# load up trained agent
# load up fresh agent for comparisons

# play recurrent game_w_replays with both agents

# try to answer the questions below


# play_recurrent_game_w_replays()

## Stats

Other than training stats, possible in-game statistics may indicate different behavior and voting patterns amongst the agents

Here are some things we might care about finding out:
- [ ] How many execution votes led to ties? 
    - Did these favor the wolves or the villagers?
- [ ] Villagers who have the same votes as others between accusations (is there accord between villagers)
- [ ] What id's do wolves have in games with villager wins


Some stuff might be specific to approval, plurality or ranked voting
Approval 
- [ ] How many villagers vote the exact same way (with approvals and neutrals )
- [ ] How many negative votes do villagers give on average. Does this change with training ? *Stat to track while training


In terms of gathering up the stats, does it make more sense to look at the history, or does it make sense to just gather them while we train? it might be better to do it post game?


In [None]:
def collect_and_report_stats(env, information, ignore_wolf=True, mlflow_uri=None):
    total_self_votes = len([vals for player, vals in information.items() if vals["self_vote"] and (ignore_wolf and env.agent_roles[player] != Roles.WEREWOLF)])
    total_dead_votes = sum([vals["dead_vote"] for player, vals in information.items() if ignore_wolf and env.agent_roles[player] != Roles.WEREWOLF])
    total_viable_votes = sum([vals["viable_vote"] for player, vals in information.items() if ignore_wolf and env.agent_roles[player] != Roles.WEREWOLF])

    avg_self_votes = total_self_votes/len(information)
    avg_dead_votes = total_dead_votes/len(information)
    avg_viable_votes = total_viable_votes/len(information)

    return {
        "total_self_votes": total_self_votes,
        "total_dead_votes": total_dead_votes,
        "total_viable_votes": total_viable_votes,
        "avg_self_votes": avg_self_votes,
        "avg_dead_votes": avg_dead_votes,
        "avg_viable_votes": avg_viable_votes,
        "players_with_viable_votes": len([vals["viable_vote"] for player, vals in information.items() if ignore_wolf and env.agent_roles[player] != pare_Role.WEREWOLF])
    }

## Replay visualization

Need good ways to visualize the gameplay

In [None]:
def print_replay(replay):
    #[wolf for wolf in stage["werewolves"]]
    #print("Werewolves \tVillagers")
    for stage in replay:
        wolf_votes = [(f'p_{wolf.split("_")[-1]}', stage["votes"][wolf]) for wolf in stage["werewolves"] if wolf in stage["votes"]]
        villager_votes = [(f'p{villager.split("_")[-1]}', stage["votes"][villager]) for villager in stage["villagers"] if villager in stage["votes"]]
        print(f'Wolves \t : {wolf_votes} \t\t Villagers : {villager_votes}')
        #print([f'{wolf.split("_")[-1]} : {stage["votes"]["player"_{wolf.split("_")[-1]}]}]' for wolf in stage["werewolves"]])
        # for wolf in stage['werewolves']:
        #     wid = wolf.split("_")[-1]
        #     pid = f"player_{wid}"
        #     stage["votes"][pid]
        #     print(f'p_{wid} : {stage["votes"][pid]}')
        # print(stage['votes'])

print_replay(replay[4])