In [1]:
import numpy as np
import torch
import sys
sys.path.append('../')
from voting_games.werewolf_env_v0 import plurality_env, Roles, Phase
import random
import copy
from tqdm import tqdm
from tabulate import tabulate

# Plurality Voting

Blurb on plurality voting goes here...

We want to see how an static agents vs static wolves fare, before training our PPO agents to hopefully learn to do better

## Pre-Training Baselines

To properly asses our agents, we need baselines. For this purpose we have totally random villagers and semi-random villagers that will only vote for agents that are currently alive.

As for wolves, we have the following behaviors:
- wolves that coordinate and each target one villager
- random wolves that do whatever
- revenge wolves that coordinate and target a random villager that targetted a wolf

In [2]:

# def revenge_coordinated_wolf(env, actions = None):
#     villagers_remaining = set(env.world_state["villagers"]) & set(env.world_state['alive'])
#     wolves_remaining = set(env.world_state["werewolves"]) & set(env.world_state['alive'])

#     # who tried to vote out a wolf last time?
    
#     target = random.choice(list(villagers_remaining))
#     # pick 
#     for wolf in wolves_remaining:
#         actions[wolf] = [0] * len(env.possible_agents)
#         actions[wolf][int(target.split("_")[-1])] = -1
#         for curr_wolf in wolves_remaining:
#             actions[wolf][int(curr_wolf.split("_")[-1])] = 1
#     # for wolf in env.werewolves_remaining:

def random_single_target_villager(env, agent):
    targets = set(env.world_state["alive"]) - set([agent])
    return int(random.choice(list(targets)).split("_")[-1])

# random_coordinated_wolf(env)
def random_agent_action(env, agent, action=None):
   return env.action_space(agent).sample()

def random_coordinated_single_wolf(env, agent, action=None):
    villagers_remaining = set(env.world_state["villagers"]) & set(env.world_state['alive'])
    return action if action != None else int(random.choice(list(villagers_remaining)).split("_")[-1])


def play_static_wolf_game(env, wolf_policy, villager_agent, num_times=100):

    villager_wins = 0
    game_replays = []
    for _ in range(num_times):
        observations, rewards, terminations, truncations, infos = env.reset()
        
        wolf_action = None
        while env.agents:
            actions = {}

            villagers = set(env.agents) & set(env.world_state["villagers"])
            wolves = set(env.agents) & set(env.world_state["werewolves"])

            # villager steps
            for villager in villagers:
                actions[villager] = villager_agent(env, villager)


            # wolf steps
            phase = env.world_state['phase']
            for wolf in wolves:
                wolf_action = wolf_policy(env, wolf, action=wolf_action)
                actions[wolf] = wolf_action
        
            observations, rewards, terminations, truncations, infos = env.step(actions)


            if env.world_state['phase'] == Phase.NIGHT:
                wolf_action = None
            
            if env.world_state['phase'] == Phase.ACCUSATION and phase == Phase.NIGHT:
                wolf_action = None

        winner = env.world_state['winners']
        if winner == Roles.VILLAGER:
            villager_wins += 1

        game_replays.append(copy.deepcopy(env.history))

    return villager_wins, game_replays

env = plurality_env(num_agents=10, werewolves=2, num_accusations=1)
env.reset()
print(f'10 players, with 2 wolves\n')


coordinated_wolves = []
coordinated_wolves.append(play_static_wolf_game(env, random_coordinated_single_wolf, random_single_target_villager, num_times=1000)[0]/1000.0)
coordinated_wolves.append(play_static_wolf_game(env, random_coordinated_single_wolf, random_agent_action, num_times=1000)[0]/1000.0)

random_wolves = []
random_wolves.append(play_static_wolf_game(env, random_agent_action, random_single_target_villager, num_times=1000)[0]/1000.0)
random_wolves.append(play_static_wolf_game(env, random_agent_action, random_agent_action, num_times=1000)[0]/1000.0)

print(tabulate([['Coordinated Wolves', *coordinated_wolves], ['Random Wolves', *random_wolves]], 
               headers=["Wolf Strategy", "Semi-Smart Villager", "Totally Random Villager"]))

print("\n")
env = plurality_env(num_agents=15, werewolves=3, num_accusations=1)
env.reset()
print(f'15 players, with 3 wolves\n')

coordinated_wolves = []
coordinated_wolves.append(play_static_wolf_game(env, random_coordinated_single_wolf, random_single_target_villager, num_times=1000)[0]/1000.0)
coordinated_wolves.append(play_static_wolf_game(env, random_coordinated_single_wolf, random_agent_action, num_times=1000)[0]/1000.0)

random_wolves = []
random_wolves.append(play_static_wolf_game(env, random_agent_action, random_single_target_villager, num_times=1000)[0]/1000.0)
random_wolves.append(play_static_wolf_game(env, random_agent_action, random_agent_action, num_times=1000)[0]/1000.0)

print(tabulate([['Coordinated Wolves', *coordinated_wolves], ['Random Wolves', *random_wolves]], 
               headers=["Wolf Strategy", "Semi-Smart Villager", "Totally Random Villager"]))

10 players, with 2 wolves

Wolf Strategy         Semi-Smart Villager    Totally Random Villager
------------------  ---------------------  -------------------------
Coordinated Wolves                  0.126                      0.045
Random Wolves                       0.694                      0.597


15 players, with 3 wolves

Wolf Strategy         Semi-Smart Villager    Totally Random Villager
------------------  ---------------------  -------------------------
Coordinated Wolves                  0.033                      0.002
Random Wolves                       0.741                      0.58


Now, lets see what trained agents against coordinated wolves looks like