In [1]:
import numpy as np
import torch
import sys
sys.path.append('../')
from voting_games.werewolf_env_v0 import pare, Roles, Phase
import random
import copy
from tqdm import tqdm

# Approval Voting

Blurb on approval voting goes here...

We want to see how an static agents vs static wolves fare, before training our PPO agents to hopefully learn to do better

## Pre-Training Baselines

To properly asses our agents, we need baselines. For this purpose we have totally random villagers and semi-random villagers that will only vote for agents that are currently alive.

As for wolves, we have the following behaviors:
- random wolves that coordinate and each target one villager while approve themselves. The remaining villagers get neutral rankings
- hyper aggressive wolves that simply disapprove of every single non-wolf player
- random wolves that do whatever
- revenge wolves that coordinate and target a villager that targetted them. Choose a revenge target randomly each round

Approval voting is 

In [1]:
def random_wolf(env, agent, action=None):
    if action != None:
        return action

    villagers_remaining = set(env.world_state["villagers"]) & set(env.world_state['alive'])
    wolves_remaining = set(env.world_state["werewolves"]) & set(env.world_state['alive'])

    # pick a living target
    target = random.choice(list(villagers_remaining))

    action = [0] * len(env.possible_agents)
    action[int(target.split("_")[-1])] = -1
    for curr_wolf in wolves_remaining:
        action[int(curr_wolf.split("_")[-1])] = 1

    return action

def aggressive_wolf(env, agent, action=None):
    wolves_remaining = set(env.world_state["werewolves"]) & set(env.world_state['alive'])
    action = [-1] * len(env.possible_agents)
    for curr_wolf in wolves_remaining:
        action[int(curr_wolf.split("_")[-1])] = 1

    return action


def revenge_coordinated_wolf(env, actions = None):
    villagers_remaining = set(env.world_state["villagers"]) & set(env.world_state['alive'])
    wolves_remaining = set(env.world_state["werewolves"]) & set(env.world_state['alive'])

    # who tried to vote out a wolf last time?
    # TODO:
    return None
    # for wolf in env.werewolves_remaining:

def random_single_target_villager(env, agent):
    targets = set(env.world_state["alive"]) - set([agent])
    action = [0] * len(env.possible_agents)
    action[int(agent.split("_")[-1])] = 1
    action[int(random.choice(list(targets)).split("_")[-1])] = -1

    return action
    # for villager in env.villagers_remaining:

# random_coordinated_wolf(env)
def random_agent_action(env, agent, action=None):
   return env.action_space(agent).sample().tolist()


def play_static_wolf_game(env, wolf_policy, villager_agent, num_times=100):

    villager_wins = 0
    game_replays = []
    for _ in range(num_times):
        observations, rewards, terminations, truncations, infos = env.reset()
        
        wolf_action = None
        while env.agents:
            actions = {}

            villagers = set(env.agents) & set(env.world_state["villagers"])
            wolves = set(env.agents) & set(env.world_state["werewolves"])

            # villager steps
            for villager in villagers:
                actions[villager] = villager_agent(env, villager)


            # wolf steps
            phase = env.world_state['phase']
            for wolf in wolves:
                wolf_action = wolf_policy(env, wolf, action=wolf_action)
                actions[wolf] = wolf_action
        
            observations, rewards, terminations, truncations, infos = env.step(actions)


            if env.world_state['phase'] == Phase.NIGHT:
                wolf_action = None
            
            if env.world_state['phase'] == Phase.ACCUSATION and phase == Phase.NIGHT:
                wolf_action = None

        winner = env.world_state['winners']
        if winner == Roles.VILLAGER:
            villager_wins += 1

        game_replays.append(copy.deepcopy(env.history))

    return villager_wins, game_replays

env = pare(num_agents=10, werewolves=2, num_accusations=1)
env.reset()

print("")
print("Random Coordinated Wolves")
print("\t vs. Single Target Random Villagers")
print(f'\t\t Villager wins : {play_static_wolf_game(env, random_wolf, random_single_target_villager, num_times=1000)[0]}')
print("\t vs. Random Villagers")
print(f'\t\t Villager wins : {play_static_wolf_game(env, random_wolf, random_agent_action, num_times=1000)[0]}')
print("------------------------------------\n")
print("Aggresive Wolves")
print("\t vs. Single Target Random Villagers")
print(f'\t\t Villager wins : {play_static_wolf_game(env, aggressive_wolf, random_single_target_villager, num_times=1000)[0]}')
print("\t vs. Random Villagers")
print(f'\t\t Villager wins : {play_static_wolf_game(env, aggressive_wolf, random_agent_action, num_times=1000)[0]}')
print("------------------------------------\n")
print("Random Wolves")
print("\t vs. Single Target Random Villagers")
print(f'\t\t Villager wins : {play_static_wolf_game(env, random_agent_action, random_single_target_villager, num_times=1000)[0]}')
print("\t vs. Random Villagers")
print(f'\t\t Villager wins : {play_static_wolf_game(env, random_agent_action, random_agent_action, num_times=1000)[0]}')
print("------------------------------------\n")

env = pare(num_agents=15, werewolves=3, num_accusations=1)
env.reset()

print("")
print("Random Coordinated Wolves")
print("\t vs. Single Target Random Villagers")
print(f'\t\t Villager wins : {play_static_wolf_game(env, random_wolf, random_single_target_villager, num_times=1000)[0]}')
print("\t vs. Random Villagers")
print(f'\t\t Villager wins : {play_static_wolf_game(env, random_wolf, random_agent_action, num_times=1000)[0]}')
print("------------------------------------\n")
print("Aggresive Wolves")
print("\t vs. Single Target Random Villagers")
print(f'\t\t Villager wins : {play_static_wolf_game(env, aggressive_wolf, random_single_target_villager, num_times=1000)[0]}')
print("\t vs. Random Villagers")
print(f'\t\t Villager wins : {play_static_wolf_game(env, aggressive_wolf, random_agent_action, num_times=1000)[0]}')
print("------------------------------------\n")
print("Random Wolves")
print("\t vs. Single Target Random Villagers")
print(f'\t\t Villager wins : {play_static_wolf_game(env, random_agent_action, random_single_target_villager, num_times=1000)[0]}')
print("\t vs. Random Villagers")
print(f'\t\t Villager wins : {play_static_wolf_game(env, random_agent_action, random_agent_action, num_times=1000)[0]}')
print("------------------------------------\n")

NameError: name 'pare' is not defined

We can see that in the most realistic scenario, random coordinated wolves vs. single target random villagers (that do not target dead players) we see villagers winning under 13% of the time.

## Trained/Training agents 

Agents were trained using an LSTM to have a history of moves to hopefully elucidate who the wolves are. There were very many hyperparameters to choose from with the overarching goal being the impact gameplay settings such as number of accusation rounds would have on the learning and the agents ability to win and to implicitly communicate between eachother.