In [1]:
import numpy as np
import torch
import sys
sys.path.append('../')
from voting_games.werewolf_env_v0 import plurality_env, pare, Phase, Roles
import random
import copy
from tqdm import tqdm
from collections import Counter
import matplotlib.pyplot as plt
from notebooks.learning_agents.models import ActorCriticAgent
from notebooks.learning_agents.utils import convert_obs

# Identifying villager voting behavior patterns and stats

This notebook details the code used to look for certain identifiers in the voting patterns of villagers

## Behavioral Indicators and Stats

Other than training stats, possible in-game statistics may indicate different behavior and voting patterns amongst the agents

We want to 
 - Look at unique votes from villagers throughout accusation and voting. Do these go down?
    - Is there a difference between wins/losses?

 - When are wolves voted out? Earlier or later?
    - If so, when is the second wolf found? How many voting rounds after the first wolf killed



Here are some things we might care about finding out:
- [ ] How many execution votes led to ties? 
    - Did these favor the wolves or the villagers?
- [ ] Villagers who have the same votes as others between accusations (is there accord between villagers)
- [ ] What id's do wolves have in games with villager wins
- [ ] How many distinct votes do we have? (per accusation round, per voting round)


Some stuff might be specific to approval, plurality or ranked voting
Approval 
- [ ] How many villagers vote the exact same way (with approvals and neutrals )
- [ ] How many negative votes do villagers give on average. Does this change with training ? *Stat to track while training


In terms of gathering up the stats, does it make more sense to look at the history, or does it make sense to just gather them while we train? it might be better to do it post game?

## Setup for 

In [2]:
@torch.no_grad()
def play_recurrent_game_w_replays(env, wolf_policy, villager_agent, num_times=10, hidden_state_size=None, voting_type=None):
    wins = 0
    # loop = tqdm(range(num_times))
    game_replays = []

    for _ in range(num_times):
        ## Play the game 
        next_observations, _, _, _, _ = env.reset()
        # init recurrent stuff for actor and critic to 0 as well
        magent_obs = {agent: {'obs': [], 
                              # obs size, and 1,1,64 as we pass batch first
                              'hcxs': [(torch.zeros((1,1,hidden_state_size), dtype=torch.float32), torch.zeros((1,1,hidden_state_size), dtype=torch.float32))],
                    } for agent in env.agents if not env.agent_roles[agent]}

        wolf_action = None
        while env.agents:
            observations = copy.deepcopy(next_observations)
            actions = {}

            villagers = set(env.agents) & set(env.world_state["villagers"])
            wolves = set(env.agents) & set(env.world_state["werewolves"])

            # villagers actions
            for villager in villagers:
                #torch.tensor(env.convert_obs(observations['player_0']['observation']), dtype=torch.float)
                torch_obs = convert_obs(observations[villager]['observation'], voting_type=voting_type, one_hot=False)
                obs = torch.unsqueeze(torch_obs, 0)

                # TODO: Testing this, we may need a better way to pass in villagers
                recurrent_cell = magent_obs[villager]["hcxs"][-1]
                
                # ensure that the obs is of size (batch,seq,inputs)
                policies, _, recurrent_cell = villager_agent(obs, recurrent_cell)
                _, game_action = villager_agent.get_action_from_policies(policies, voting_type=voting_type)

                if voting_type == "plurality":
                    actions[villager] = game_action.item()
                elif voting_type == "approval":
                    actions[villager] = game_action.tolist()

                #store the next recurrent cells
                magent_obs[villager]["hcxs"].append(recurrent_cell)

            phase = env.world_state['phase']
            for wolf in wolves:
                wolf_action = wolf_policy(env, wolf, action=wolf_action)
                actions[wolf] = wolf_action

            next_observations, _, _, _, _ = env.step(actions)

            # clear the wolf action if needed
            if env.world_state['phase'] == Phase.NIGHT:
                wolf_action = None
            
            if env.world_state['phase'] == Phase.ACCUSATION and phase == Phase.NIGHT:
                wolf_action = None
            
        ## Fill bigger buffer, keeping in mind sequence
        winner = env.world_state['winners']
        if winner == Roles.VILLAGER:
            wins += 1

        game_replays.append(copy.deepcopy(env.history))

        # loop.set_description(f"Villagers won {wins} out of a total of {num_times} games")
    
    return wins, game_replays

In [25]:
def random_coordinated_single_wolf(env, agent, action=None):
    villagers_remaining = set(env.world_state["villagers"]) & set(env.world_state['alive'])
    return action if action != None else int(random.choice(list(villagers_remaining)).split("_")[-1])

## Data collection

We are going to use a trained agent, an untrained agent, and a random agent.
This way we can maybe identify patterns that would seperate each one

In [26]:
env = plurality_env(num_agents=10, werewolves=2, num_accusations=2)
observations, _, _, _, _ = env.reset()

obs_size= env.convert_obs(observations['player_0']['observation']).shape[-1]
observations['player_0']['observation']

untrained_agent = ActorCriticAgent({"rec_hidden_size": 128, 
                                        "rec_layers": 1,
                                        "joint_mlp_size": 128,
                                        "split_mlp_size": 128,
                                        "num_votes": 1,
                                        "approval_states": 10},
                                        num_players=10,
                                        obs_size=obs_size)

trained_agent = ActorCriticAgent({"rec_hidden_size": 128,
                                        "rec_layers": 1, 
                                        "joint_mlp_size": 128,
                                        "split_mlp_size": 128,
                                        "num_votes": 1,
                                        "approval_states": 10},
                                        num_players=10,
                                        obs_size=obs_size)
trained_agent.load_state_dict(torch.load("stored_agents/lstm_first_no_one_hot_128_128/plurality_agent_10_score_46"))

# random_agent = None

# trained_wins, trained_replays = play_recurrent_game_w_replays(env, random_coordinated_single_wolf, trained_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")
# untrained_wins, untrained_replays = play_recurrent_game_w_replays(env, random_coordinated_single_wolf, untrained_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")
# random_wins, random_replays = play_recurrent_game_w_replays(env, random_coordinated_single_wolf, random_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")


<All keys matched successfully>

In [22]:
trained_villager_wins = [r for r in trained_replays if r[-1]["winners"] == Roles.VILLAGER]
print(f'Trained villagers won {len(trained_villager_wins)} games')
untrained_villager_wins = [r for r in untrained_replays if r[-1]["winners"] == Roles.VILLAGER]
print(f'Untrained villagers won {len(untrained_villager_wins)} games')

Trained villagers won 469 games
Untrained villagers won 42 games


The way the environment stores history is slightly different than observations. Whereas the latter stores the prior votes, env.history steps have the votes and the outcomes that occured at that particular day/phase/round.
This makes analysis slightly easier, but we still need to track a few things


In [193]:
# TODO: SANITY CHECK THAT NO VILLAGER WINS HAPPENED OTHER THAN AFTER THE VOTING ROUND

{<Phase.VOTING: 1>: 316}


In [23]:
print("Average amount of days until a win is achieved by villagers")
print(f'\t Trained villagers : {np.mean([villager_win[-1]["day"] for villager_win in trained_villager_wins]):2f}')
print(f'\t Untrained villagers : {np.mean([villager_win[-1]["day"] for villager_win in untrained_villager_wins]):2f}')

Average amount of days until a win is achieved by villagers
	 Trained villagers : 2.886994
	 Untrained villagers : 3.190476


We may want to increase the number of players to 15, keeping 2 werewolves, to see if the mean day stretches further

Lets look at unique votes accusation -> voting
How many unique votes are there from accusation to voting?
If they switch, does a bigger count switch down to a little one? or does a smaller count lose a number to the bigger one?

Did a villager switch to a werewolf vote?
wolf to wolf?

What proportion of votes are for werewolves?

How often is the vote against a dead player?

- [ ] In situations where we have a wolf who died, do players still vote against them?

- [ ] Do we have 3 way ties between both werewolves and the werewolf target?
        Is it possible that we actually lose because there is less consensus between agents on who they want to target?

do villagers win more given a first wolf vote?
is there a vote they key on quicker?

On voting rounds:
    - [ ] Does a wolf get lucky, and agents split their votes between the wolves, and the wolf target actually gets killed?

General game questions:
    - Does the first target a wolf pick impact their chance of winning? Alternatively, do villagers key better on certain first wolf votes?
    - Do certain wolf id combinations lead to more villager wins? 



In [11]:
def when_did_wolves_get_killed(game):
    wolves = game[0]['werewolves']

    days_wolves_executed = []
    just_votes = []
    for step in game:
        if step["phase"] == Phase.VOTING:
            # first eecution
            if len(step["executed"]) == 1:
                if step['executed'][0] in wolves:
                    days_wolves_executed.append(step['day'])
            else:
                who_was_killed = list(set(step['executed']) - set(just_votes[-1]['executed']))[0]
                if who_was_killed  in wolves:
                    days_wolves_executed.append(step['day'])

            just_votes.append(step)
    
    if len(days_wolves_executed) < len(wolves):
        print("Not every wolf was killed!")
    
    return days_wolves_executed

In [6]:
wolf_execution_days = [when_did_wolves_get_killed(trained_villager_win) for trained_villager_win in trained_villager_wins]
wolf_execution_duration_between = [b-a for a,b in wolf_execution_days]
print(f'Days between wolf kills for trained agents : {np.mean(wolf_execution_duration_between):.3f}')

wolf_execution_days = [when_did_wolves_get_killed(untrained_villager_win) for untrained_villager_win in untrained_villager_wins]
wolf_execution_duration_between = [b-a for a,b in wolf_execution_days]
print(f'Days between wolf kills for untrained agents : {np.mean(wolf_execution_duration_between):.3f}')

NameError: name 'trained_villager_wins' is not defined

In [25]:
def tie_game_info(game):
	wolves = game[0]['werewolves']

	just_votes = []
	tie_days = []

	# wolf won the tie flip
	lucky_wolf_day = []

	# wolf won the tie flip, with multiple wolfs being tied targets
	super_lucky_wolf_day = []

	for step in game:
		villager_votes = [vote for player, vote in step['votes'].items() if player not in wolves]
		wolf_votes = [vote for player, vote in step['votes'].items() if player in wolves]
		all_votes = list(step['votes'].values())

		villager_vote_counter = Counter(villager_votes)
		all_vote_counter = Counter(all_votes)

		if step["phase"] == Phase.VOTING:
			just_votes.append(step)

    		# was the vote a tie? did it lead to 
			max_votes_on_target = max(all_vote_counter.values())
			targets = [k for k in all_vote_counter if all_vote_counter[k] == max_votes_on_target]

			# we have a tie
			if len(targets) > 1:
				tie_days.append(step["day"])
				# are one of the targets a wolf target?
				is_a_target_a_wolf_target = sum([target in wolf_votes for target in targets])
				
				# is the tie between a dead player and a live player?
				# this won't  trigger a tie trigger though
				if len(step['executed']) == 1:
					dead_players = list(set(step['executed']) | set(step['killed']))
					killed_this_turn = step['executed'][0]
				else:
					dead_players =  list((set(step['executed']) & set(just_votes[-2]['executed'])) | set(step['killed']))
					killed_this_turn = list(set(step['executed']) - set(just_votes[-2]['executed']))[0]

				# is the tie only between dead players?
				is_a_target_a_living_wolf = sum([f'player_{target}' in wolves for target in targets if target not in dead_players])

				# so now, we want to know if we have at tie between a wolf target and a living wolf
				if is_a_target_a_living_wolf and is_a_target_a_wolf_target:
					if killed_this_turn not in wolves:
						# wolves got lucky
						lucky_wolf_day.append(step["day"])
					if is_a_target_a_living_wolf > 1:
						super_lucky_wolf_day.append(step["day"])

	return tie_days, lucky_wolf_day, super_lucky_wolf_day

tie_game_stats = [tie_game_info(trained_villager_win) for trained_villager_win in trained_villager_wins]
tie_games = [tie_game for tie_game in tie_game_stats if len(tie_game[0]) >= 1]
wolf_ties = [tie_game for tie_game in tie_game_stats if len(tie_game[1]) >= 1]
super_lucky_wolf_ties = [tie_game for tie_game in tie_game_stats if len(tie_game[2]) >= 1]
print(f' {len(tie_games)/len(trained_villager_wins):.2f} winning games had a round randomly determined due to a tie')
print(f' {len(wolf_ties)/len(trained_villager_wins):.2f} winning games had a round randomly where a wolf was targetted, but lucked out')
print(f' {len(super_lucky_wolf_ties)/len(trained_villager_wins):.2f} winning games had a round randomly where more than one wolf was targetted, but lucked out')

trained_villager_losses = [r for r in trained_replays if r[-1]["winners"] == Roles.WEREWOLF]
tie_games_stats = [tie_game_info(trained_villager_loss) for trained_villager_loss in trained_villager_losses]
tie_games = [tie_game for tie_game in tie_game_stats if len(tie_game[0]) >= 1]
wolf_ties = [tie_game for tie_game in tie_game_stats if len(tie_game[1]) >= 1]
super_lucky_wolf_ties = [tie_game for tie_game in tie_game_stats if len(tie_game[2]) >= 1]
print(f' {len(tie_games)/len(trained_villager_losses):.2f} losing games had a round randomly determined due to a tie')
print(f' {len(wolf_ties)/len(trained_villager_wins):.2f} losing games had a round randomly where a wolf was targetted, but lucked out')
print(f' {len(super_lucky_wolf_ties)/len(trained_villager_wins):.2f} losing games had a round randomly where more than one wolf was targetted, but lucked out')


 0.45 winning games had a round randomly determined due to a tie
 0.04 winning games had a round randomly where a wolf was targetted, but lucked out
 0.04 winning games had a round randomly where more than one wolf was targetted, but lucked out
 0.40 losing games had a round randomly determined due to a tie
 0.04 losing games had a round randomly where a wolf was targetted, but lucked out
 0.04 losing games had a round randomly where more than one wolf was targetted, but lucked out


In [None]:
def get_indicators_per_game(game_replay):
    wolves = game_replay[0]['werewolves']

    just_votes = []
    for step in trained_villager_wins[0]:
        if step["phase"] == Phase.NIGHT:
            continue
        if step["phase"] == Phase.VOTING:
            just_votes.append(step)

        villager_votes = [vote for player, vote in step['votes'].items() if player not in wolves]
        all_votes = list(step['votes'].values())

        villager_vote_counter = Counter(villager_votes)
        all_vote_counter = Counter(all_votes)

        unique_villager_votes = len(villager_vote_counter)
        percent_of_villagers_targetting_wolves = sum([villager_vote_counter[int(wolf.split("_")[-1])] for wolf in wolves]) / float(len(villager_votes))

        # want to be careful, because on voting rounds, there is also the executed player in this set. so we need to remove them first
        if step["phase"] == Phase.VOTING:
            just_votes.append(step)
            
            if len(just_votes) == 1:
                percent_of_villagers_targetting_dead_players = 0 / float(len(villager_votes))
            else:
                percent_of_villagers_targetting_dead_players = sum([villager_vote_counter[dead_player] for dead_player in list((set(step['executed']) & set(just_votes[-2]['executed'])) | set(step['killed']))]) / float(len(villager_votes))
        else:
            percent_of_villagers_targetting_dead_players = sum([villager_vote_counter[dead_player] for dead_player in list(set(step['executed']) | set(step['killed']))]) / float(len(villager_votes))

    # per round
    

    
    # print(f'Wolves : {wolves}\n')

In [12]:
wolves = trained_villager_wins[0][0]['werewolves']
print(f'Wolves : {wolves}\n')

just_votes = []
for step in trained_villager_wins[0]:
    if step["phase"] == Phase.NIGHT:
        continue
    if step["phase"] == Phase.VOTING:
        just_votes.append(step)

    villager_votes = [vote for player, vote in step['votes'].items() if player not in wolves]
    all_votes = list(step['votes'].values())

    villager_vote_counter = Counter(villager_votes)
    all_vote_counter = Counter(all_votes)

    unique_villager_votes = len(villager_vote_counter)
    percent_of_villagers_targetting_wolves = sum([villager_vote_counter[int(wolf.split("_")[-1])] for wolf in wolves]) / float(len(villager_votes))

    # want to be careful, because on voting rounds, there is also the executed player in this set. so we need to remove them first
    if step["phase"] == Phase.VOTING:
        just_votes.append(step)
        
        if len(just_votes) == 1:
            percent_of_villagers_targetting_dead_players = 0 / float(len(villager_votes))
        else:
            percent_of_villagers_targetting_dead_players = sum([villager_vote_counter[dead_player] for dead_player in list((set(step['executed']) & set(just_votes[-2]['executed'])) | set(step['killed']))]) / float(len(villager_votes))
    else:
        percent_of_villagers_targetting_dead_players = sum([villager_vote_counter[dead_player] for dead_player in list(set(step['executed']) | set(step['killed']))]) / float(len(villager_votes))

    # percent_of_villagers_targetting_a_dead_wolf = None
    
    print(f'Day : {step["day"]} | Phase : {step["phase"]} | Round : {step["round"]}')
    print(f'Villager votes : {villager_votes}')
    print(f'\t | - {unique_villager_votes} players targetted, with {percent_of_villagers_targetting_wolves:.3f} of the votes targetting wolves')
    print(f'\t | - {percent_of_villagers_targetting_dead_players:.3f} share of the votes targetting dead players\n')


    #print(f'Villager counters : {Counter(villager_votes)}')

    #print(f'All votes : {all_votes}')
    #print(f'All votes counters : {Counter(all_votes)}')

Wolves : ['player_7', 'player_9']

Day : 1 | Phase : 0 | Round : 0
Villager votes : [10, 10, 10, 10, 10, 10, 10, 10]
	 | - 1 players targetted, with 0.000 of the votes targetting wolves
	 | - 0.000 share of the votes targetting dead players

Day : 1 | Phase : 0 | Round : 0
Villager votes : [2, 8, 0, 9, 8, 9, 9, 6]
	 | - 5 players targetted, with 0.375 of the votes targetting wolves
	 | - 0.000 share of the votes targetting dead players

Day : 1 | Phase : 0 | Round : 1
Villager votes : [8, 5, 5, 5, 8, 5, 7, 8]
	 | - 3 players targetted, with 0.125 of the votes targetting wolves
	 | - 0.000 share of the votes targetting dead players

Day : 1 | Phase : 1 | Round : 0
Villager votes : [5, 7, 5, 5, 8, 8, 7, 6]
	 | - 4 players targetted, with 0.250 of the votes targetting wolves
	 | - 0.000 share of the votes targetting dead players

Day : 2 | Phase : 0 | Round : 0
Villager votes : [2, 9, 9, 9, 7, 8]
	 | - 4 players targetted, with 0.667 of the votes targetting wolves
	 | - 0.000 share of the

## Lets try doing this to approval voting games


In [3]:
def random_wolf(env, agent, action=None):
    if action != None:
        return action

    villagers_remaining = set(env.world_state["villagers"]) & set(env.world_state['alive'])
    wolves_remaining = set(env.world_state["werewolves"]) & set(env.world_state['alive'])

    # pick a living target
    target = random.choice(list(villagers_remaining))

    action = [0] * len(env.possible_agents)
    action[int(target.split("_")[-1])] = -1
    for curr_wolf in wolves_remaining:
        action[int(curr_wolf.split("_")[-1])] = 1

    return action

In [4]:
env = pare(num_agents=10, werewolves=2, num_accusations=2)
observations, _, _, _, _ = env.reset()

obs_size= env.convert_obs(observations['player_0']['observation']).shape[-1]
observations['player_0']['observation']

approval_untrained_agent = ActorCriticAgent({"rec_hidden_size": 256, 
                                        "rec_layers": 1,
                                        "joint_mlp_size": 128,
                                        "split_mlp_size": 128,
                                        "num_votes": 10,
                                        "approval_states": 3},
                                        num_players=10,
                                        obs_size=obs_size)

approval_trained_agent = ActorCriticAgent({"rec_hidden_size": 256,
                                        "rec_layers": 1, 
                                        "joint_mlp_size": 128,
                                        "split_mlp_size": 128,
                                        "num_votes": 10,
                                        "approval_states": 3},
                                        num_players=10,
                                        obs_size=obs_size)
approval_trained_agent.load_state_dict(torch.load("stored_agents/lstm_first_no_one_hot_256_128/approval_agent_10_score_49"))

# random_agent = None

approval_trained_wins, approval_trained_replays = play_recurrent_game_w_replays(env, random_wolf, approval_trained_agent, num_times=1000, hidden_state_size=256, voting_type="approval")
approval_untrained_wins, approval_untrained_replays = play_recurrent_game_w_replays(env, random_wolf, approval_untrained_agent, num_times=1000, hidden_state_size=256, voting_type="approval")
# random_wins, random_replays = play_recurrent_game_w_replays(env, random_coordinated_single_wolf, random_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")


In [5]:
approval_trained_villager_wins = [r for r in approval_trained_replays if r[-1]["winners"] == Roles.VILLAGER]
print(f'Trained villagers won {len(approval_trained_villager_wins)} games')
approval_untrained_villager_wins = [r for r in approval_untrained_replays if r[-1]["winners"] == Roles.VILLAGER]
print(f'Untrained villagers won {len(approval_untrained_villager_wins)} games')

Trained villagers won 477 games
Untrained villagers won 75 games


In [12]:
wolf_execution_days = [when_did_wolves_get_killed(trained_villager_win) for trained_villager_win in approval_trained_villager_wins]
wolf_execution_duration_between = [b-a for a,b in wolf_execution_days]
print(f'Days between wolf kills for trained agents : {np.mean(wolf_execution_duration_between):.3f}')

wolf_execution_days = [when_did_wolves_get_killed(untrained_villager_win) for untrained_villager_win in approval_untrained_villager_wins]
wolf_execution_duration_between = [b-a for a,b in wolf_execution_days]
print(f'Days between wolf kills for untrained agents : {np.mean(wolf_execution_duration_between):.3f}')

Days between wolf kills for trained agents : 1.465
Days between wolf kills for untrained agents : 1.733


In [17]:
# some speficic approval stuff?


def votes_info(game):
    wolves = game[0]['werewolves']

    for step in game:
        if step['phase'] == Phase.NIGHT:
            continue

        print(f'Day : {step["day"]} | Phase : {step["phase"]} | Round : {step["round"]}')
        if step["phase"] == Phase.VOTING:
            print("\t Voting Phase")
        else:
            print("\t Accusation Phase")

        villager_votes = [vote for player, vote in step['votes'].items() if player not in wolves]
        all_votes = list(step['votes'].values())

        villager_targets = [np.where(np.array(villager_vote) == -1)[0] for villager_vote in villager_votes]
        villager_likes = [np.where(np.array(villager_vote) == 1)[0] for villager_vote in villager_votes]
        villager_neutrals = [np.where(np.array(villager_vote) == 0)[0] for villager_vote in villager_votes]

        v_target_counter = Counter(np.concatenate(villager_targets))
        v_like_counter = Counter(np.concatenate(villager_likes))
        v_neutral_counter = Counter(np.concatenate(villager_neutrals))

        print(f'Villagers on average targetted {np.mean([len(targets) for targets in villager_targets]):.2f} others, liked {np.mean([len(targets) for targets in villager_likes]):.2f} others, felt neutral for {np.mean([len(targets) for targets in villager_neutrals]):.2f} others')
        # is one of the targets dead? 
        # how many top targets are wolves?
        wolves_in_most_common_targets =\
            [int(wolf.split("_")[-1]) for wolf in wolves if int(wolf.split("_")[-1]) in [idx for idx, _ in v_target_counter.most_common(max(1,int(len(v_target_counter)*0.5)))]]

        # TODO: if only one of the 2 wolves are top targets, I wonder if they just get lucky for the second wolf kill
        # TODO: do we want to focus on num voting rounds in this equation?
        print(f'{len(wolves_in_most_common_targets)} wolves targetted in top {int(len(v_target_counter)*0.5)} votes')

        # TODO: do we want to focus on num voting rounds in this equation?

        wolves_in_most_common_likes =\
            [int(wolf.split("_")[-1]) for wolf in wolves if int(wolf.split("_")[-1]) in [idx for idx, _ in v_like_counter.most_common(max(1,int(len(v_like_counter)*0.5)))]]
        print(f'{len(wolves_in_most_common_likes)} wolves liked in top {int(len(v_like_counter)*0.5)} likes')
        # do the most liked individuals also get the least amount of votes?

        # how many likes are for other trusted villagers?
        # how many likes are towards wolves?
        # We are hoping that a like is used as a communicative tool

        
        print("\n")

votes_info(approval_trained_villager_wins[0])

Day : 1 | Phase : 0 | Round : 0
	 Accusation Phase
Villagers on average targetted 0.00 others, liked 0.00 others, felt neutral for 10.00 others
0 wolves targetted in top 0 votes
0 wolves liked in top 0 likes


Day : 1 | Phase : 0 | Round : 0
	 Accusation Phase
Villagers on average targetted 2.88 others, liked 3.75 others, felt neutral for 3.38 others
2 wolves targetted in top 5 votes
0 wolves liked in top 5 likes


Day : 1 | Phase : 0 | Round : 1
	 Accusation Phase
Villagers on average targetted 3.12 others, liked 4.62 others, felt neutral for 2.25 others
2 wolves targetted in top 4 votes
1 wolves liked in top 5 likes


Day : 1 | Phase : 1 | Round : 0
	 Voting Phase
Villagers on average targetted 3.62 others, liked 3.75 others, felt neutral for 2.62 others
1 wolves targetted in top 4 votes
1 wolves liked in top 4 likes


Day : 2 | Phase : 0 | Round : 0
	 Accusation Phase
Villagers on average targetted 4.14 others, liked 3.00 others, felt neutral for 2.86 others
2 wolves targetted in to

Should we give a penalty to players that target dead wolves? More than just targetting dead players in general?

In [18]:
from collections import Counter
import matplotlib.pyplot as plt

In [21]:
labels, values = zip(*Counter([item for sublist in [villager_win[-1]["werewolves"] for villager_win in approval_trained_villager_wins] for item in sublist]).items())
indexes = np.arange(len(labels))
width = 10

# plt.bar(indexes, values, width)
# plt.xticks(indexes + width * 0.5, labels)
# plt.tight_layout()
# plt.xticks(rotation=60)
# plt.show()npa

In [None]:
# load up trained agent
# load up fresh agent for comparisons

# play recurrent game_w_replays with both agents

# try to answer the questions below


# play_recurrent_game_w_replays()

In [None]:
def collect_and_report_stats(env, information, ignore_wolf=True, mlflow_uri=None):
    total_self_votes = len([vals for player, vals in information.items() if vals["self_vote"] and (ignore_wolf and env.agent_roles[player] != Roles.WEREWOLF)])
    total_dead_votes = sum([vals["dead_vote"] for player, vals in information.items() if ignore_wolf and env.agent_roles[player] != Roles.WEREWOLF])
    total_viable_votes = sum([vals["viable_vote"] for player, vals in information.items() if ignore_wolf and env.agent_roles[player] != Roles.WEREWOLF])

    avg_self_votes = total_self_votes/len(information)
    avg_dead_votes = total_dead_votes/len(information)
    avg_viable_votes = total_viable_votes/len(information)

    return {
        "total_self_votes": total_self_votes,
        "total_dead_votes": total_dead_votes,
        "total_viable_votes": total_viable_votes,
        "avg_self_votes": avg_self_votes,
        "avg_dead_votes": avg_dead_votes,
        "avg_viable_votes": avg_viable_votes,
        "players_with_viable_votes": len([vals["viable_vote"] for player, vals in information.items() if ignore_wolf and env.agent_roles[player] != pare_Role.WEREWOLF])
    }

## Replay visualization

Need good ways to visualize the gameplay

In [None]:
def print_replay(replay):
    #[wolf for wolf in stage["werewolves"]]
    #print("Werewolves \tVillagers")
    for stage in replay:
        wolf_votes = [(f'p_{wolf.split("_")[-1]}', stage["votes"][wolf]) for wolf in stage["werewolves"] if wolf in stage["votes"]]
        villager_votes = [(f'p{villager.split("_")[-1]}', stage["votes"][villager]) for villager in stage["villagers"] if villager in stage["votes"]]
        print(f'Wolves \t : {wolf_votes} \t\t Villagers : {villager_votes}')
        #print([f'{wolf.split("_")[-1]} : {stage["votes"]["player"_{wolf.split("_")[-1]}]}]' for wolf in stage["werewolves"]])
        # for wolf in stage['werewolves']:
        #     wid = wolf.split("_")[-1]
        #     pid = f"player_{wid}"
        #     stage["votes"][pid]
        #     print(f'p_{wid} : {stage["votes"][pid]}')
        # print(stage['votes'])

print_replay(replay[4])

## Speeding up the gameplay process

In [29]:
@torch.no_grad()
def play_recurrent_game(env, wolf_policy, villager_agent, num_times=10, hidden_state_size=None, voting_type=None):
    
    wins = 0
    # loop = tqdm(range(num_times))
    for _ in range(num_times):
        ## Play the game 
        next_observations, rewards, terminations, truncations, infos = env.reset()
        # init recurrent stuff for actor and critic to 0 as well
        magent_obs = {agent: {'obs': [], 
                              # obs size, and 1,1,64 as we pass batch first
                              'hcxs': [(torch.zeros((1,1,hidden_state_size), dtype=torch.float32), torch.zeros((1,1,hidden_state_size), dtype=torch.float32))],
                    } for agent in env.agents if not env.agent_roles[agent]}
    
        wolf_action = None
        while env.agents:
            observations = copy.deepcopy(next_observations)
            actions = {}

            villagers = set(env.agents) & set(env.world_state["villagers"])
            wolves = set(env.agents) & set(env.world_state["werewolves"])

            # villagers actions
            for villager in villagers:
                #torch.tensor(env.convert_obs(observations['player_0']['observation']), dtype=torch.float)
                torch_obs = torch.tensor(env.convert_obs(observations[villager]['observation']), dtype=torch.float)
                obs = torch.unsqueeze(torch_obs, 0)

                # TODO: Testing this, we may need a better way to pass in villagers
                recurrent_cell = magent_obs[villager]["hcxs"][-1]
                
                # ensure that the obs is of size (batch,seq,inputs)
                policies, _, recurrent_cell = villager_agent(obs, recurrent_cell)
                _, game_action = villager_agent.get_action_from_policies(policies, voting_type=voting_type)

                if voting_type == "plurality":
                    actions[villager] = game_action.item()
                elif voting_type == "approval":
                    actions[villager] = game_action.tolist()

                #store the next recurrent cells
                magent_obs[villager]["hcxs"].append(recurrent_cell)

            # wolf steps
            phase = env.world_state['phase']
            for wolf in wolves:
                wolf_action = wolf_policy(env, wolf, action=wolf_action)
                actions[wolf] = wolf_action
        
            next_observations, _, _, _, _ = env.step(actions)
            
            # clear the wolf action if needed
            if env.world_state['phase'] == Phase.NIGHT:
                wolf_action = None
            
            if env.world_state['phase'] == Phase.ACCUSATION and phase == Phase.NIGHT:
                wolf_action = None

        ## Fill bigger buffer, keeping in mind sequence
        winner = env.world_state['winners']
        if winner == Roles.VILLAGER:
            wins += 1

        # loop.set_description(f"Villagers won {wins} out of a total of {num_times} games")
    
    return wins

In [44]:
@torch.no_grad()
def play_recurrent_faster_game(env, wolf_policy, villager_agent, num_times=10, hidden_state_size=None, voting_type=None):
    
    wins = 0
    # loop = tqdm(range(num_times))
    for _ in range(num_times):
        ## Play the game 
        next_observations, rewards, terminations, truncations, infos = env.reset()
        # init recurrent stuff for actor and critic to 0 as well
        magent_obs = {agent: {'obs': [], 
                              # obs size, and 1,1,64 as we pass batch first
                              'hcxs': [(torch.zeros((1,1,hidden_state_size), dtype=torch.float32), torch.zeros((1,1,hidden_state_size), dtype=torch.float32))],
                    } for agent in env.agents if not env.agent_roles[agent]}
    
        wolf_action = None
        while env.agents:
            observations = copy.deepcopy(next_observations)
            actions = {}

            villagers = set(env.agents) & set(env.world_state["villagers"])
            wolves = set(env.agents) & set(env.world_state["werewolves"])

            # villagers actions
            v_obs = torch.cat([torch.unsqueeze(torch.tensor(env.convert_obs(observations[villager]['observation']), dtype=torch.float), 0) for villager in villagers])
            
            # TODO: maybe this can be sped up? 
            hxs, cxs = zip(*[(hxs, cxs) for hxs, cxs in [magent_obs[villager]["hcxs"][-1] for villager in villagers]])
            hxs = torch.swapaxes(torch.cat(hxs),0,1)
            cxs = torch.swapaxes(torch.cat(cxs),0,1)

            # TODO : make this policies to allow for approval speed up too
            policy, _ , cells = villager_agent(v_obs, (hxs, cxs))

            v_actions = policy[0].sample().tolist()
            hxs_new, cxs_new = cells
            hxs_new = torch.swapaxes(hxs_new,1,0)
            cxs_new = torch.swapaxes(cxs_new,1,0)

            for i, villager in enumerate(villagers):
                actions[villager] = v_actions[i]
                magent_obs[villager]['hcxs'].append((torch.unsqueeze(hxs_new[i], 0), torch.unsqueeze(cxs_new[i], 0)))

            # # batch, sequence, input
            # for villager in villagers:
            #     #torch.tensor(env.convert_obs(observations['player_0']['observation']), dtype=torch.float)
            #     torch_obs = torch.tensor(env.convert_obs(observations[villager]['observation']), dtype=torch.float)
            #     obs = torch.unsqueeze(torch_obs, 0)

            #     # TODO: Testing this, we may need a better way to pass in villagers
            #     recurrent_cell = magent_obs[villager]["hcxs"][-1]
                
            #     # ensure that the obs is of size (batch,seq,inputs)
            #     policies, _, recurrent_cell = villager_agent(obs, recurrent_cell)
            #     _, game_action = villager_agent.get_action_from_policies(policies, voting_type=voting_type)

            #     if voting_type == "plurality":
            #         actions[villager] = game_action.item()
            #     elif voting_type == "approval":
            #         actions[villager] = game_action.tolist()

            #     #store the next recurrent cells
            #     magent_obs[villager]["hcxs"].append(recurrent_cell)

            # wolf steps
            phase = env.world_state['phase']
            for wolf in wolves:
                wolf_action = wolf_policy(env, wolf, action=wolf_action)
                actions[wolf] = wolf_action
        
            next_observations, _, _, _, _ = env.step(actions)
            
            # clear the wolf action if needed
            if env.world_state['phase'] == Phase.NIGHT:
                wolf_action = None
            
            if env.world_state['phase'] == Phase.ACCUSATION and phase == Phase.NIGHT:
                wolf_action = None

        ## Fill bigger buffer, keeping in mind sequence
        winner = env.world_state['winners']
        if winner == Roles.VILLAGER:
            wins += 1

        # loop.set_description(f"Villagers won {wins} out of a total of {num_times} games")
    
    return wins

In [None]:
@torch.no_grad()
def play_recurrent_faster_game_on_gpu(env, wolf_policy, villager_agent, num_times=10, hidden_state_size=None, voting_type=None):
    
    wins = 0
    # loop = tqdm(range(num_times))
    for _ in range(num_times):
        ## Play the game 
        next_observations, rewards, terminations, truncations, infos = env.reset()
        # init recurrent stuff for actor and critic to 0 as well
        magent_obs = {agent: {'obs': [], 
                              # obs size, and 1,1,64 as we pass batch first
                              'hcxs': [(torch.zeros((1,1,hidden_state_size), dtype=torch.float32, device=torch.device("cuda:0")), torch.zeros((1,1,hidden_state_size), dtype=torch.float32, device=torch.device("cuda:0")))],
                    } for agent in env.agents if not env.agent_roles[agent]}
    
        wolf_action = None
        while env.agents:
            observations = copy.deepcopy(next_observations)
            actions = {}

            villagers = set(env.agents) & set(env.world_state["villagers"])
            wolves = set(env.agents) & set(env.world_state["werewolves"])

            # villagers actions
            v_obs = torch.cat([torch.unsqueeze(torch.tensor(env.convert_obs(observations[villager]['observation']), dtype=torch.float, device=torch.device("cuda:0")), 0) for villager in villagers])
            
            # TODO: maybe this can be sped up? 
            hxs, cxs = zip(*[(hxs, cxs) for hxs, cxs in [magent_obs[villager]["hcxs"][-1] for villager in villagers]])
            hxs = torch.swapaxes(torch.cat(hxs),0,1)
            cxs = torch.swapaxes(torch.cat(cxs),0,1)

            # TODO : make this policies to allow for approval speed up too
            policy, _ , cells = villager_agent(v_obs, (hxs, cxs))

            v_actions = policy[0].sample().tolist()
            hxs_new, cxs_new = cells
            hxs_new = torch.swapaxes(hxs_new,1,0)
            cxs_new = torch.swapaxes(cxs_new,1,0)

            for i, villager in enumerate(villagers):
                actions[villager] = v_actions[i]
                magent_obs[villager]['hcxs'].append((torch.unsqueeze(hxs_new[i], 0), torch.unsqueeze(cxs_new[i], 0)))

            # # batch, sequence, input
            # for villager in villagers:
            #     #torch.tensor(env.convert_obs(observations['player_0']['observation']), dtype=torch.float)
            #     torch_obs = torch.tensor(env.convert_obs(observations[villager]['observation']), dtype=torch.float)
            #     obs = torch.unsqueeze(torch_obs, 0)

            #     # TODO: Testing this, we may need a better way to pass in villagers
            #     recurrent_cell = magent_obs[villager]["hcxs"][-1]
                
            #     # ensure that the obs is of size (batch,seq,inputs)
            #     policies, _, recurrent_cell = villager_agent(obs, recurrent_cell)
            #     _, game_action = villager_agent.get_action_from_policies(policies, voting_type=voting_type)

            #     if voting_type == "plurality":
            #         actions[villager] = game_action.item()
            #     elif voting_type == "approval":
            #         actions[villager] = game_action.tolist()

            #     #store the next recurrent cells
            #     magent_obs[villager]["hcxs"].append(recurrent_cell)

            # wolf steps
            phase = env.world_state['phase']
            for wolf in wolves:
                wolf_action = wolf_policy(env, wolf, action=wolf_action)
                actions[wolf] = wolf_action
        
            next_observations, _, _, _, _ = env.step(actions)
            
            # clear the wolf action if needed
            if env.world_state['phase'] == Phase.NIGHT:
                wolf_action = None
            
            if env.world_state['phase'] == Phase.ACCUSATION and phase == Phase.NIGHT:
                wolf_action = None

        ## Fill bigger buffer, keeping in mind sequence
        winner = env.world_state['winners']
        if winner == Roles.VILLAGER:
            wins += 1

        # loop.set_description(f"Villagers won {wins} out of a total of {num_times} games")
    
    return wins

In [31]:
import time

In [46]:
start = time.time()
wins = play_recurrent_faster_game(env, random_coordinated_single_wolf, trained_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")
end = time.time()
print(f'Wins : {wins}, hopefully faster function time : {end-start:.3f}')

# start = time.time()
# play_recurrent_faster_game_on_gpu(env, random_coordinated_single_wolf, trained_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")
# end = time.time()
# print(f'hopefully faster function time on GPU : {end-start:.3f}')


start = time.time()
wins = play_recurrent_game(env, random_coordinated_single_wolf, trained_agent, num_times=1000, hidden_state_size=128, voting_type="plurality")
end = time.time()
print(f'Wins: {wins}, Regular function time : {end-start:.3f}')

hopefully faster function time : 32.235
Regular function time : 74.958
