In [0]:
# !unzip gymhearts.zip
# %cd robohearts/

In [0]:
import gym
import multiprocessing
from gymhearts.Hearts import *
from gymhearts.Agent.human import Human
from gymhearts.Agent.random_agent import RandomAgent
from gymhearts.Agent.reinforce_agent import REINFORCE_Agent
from tqdm import tqdm_notebook
import numpy
import copy

In [0]:
NUM_TESTS = 10
NUM_EPISODES = 1000
TRAINING_ITERS = 100
MAX_SCORE = 100
BATCH_SIZE = 16
run_train = True

feature_list = ['in_hand', 'in_play', 'played_cards', 'won_cards', 'scores']

playersNameList = ['REINFORCE', 'Rando', 'Randy', 'Randall']
agent_list = [0, 0, 0, 0]

# Human vs Random
reinforce_config = {
    'print_info' : False,
    'gamma' : 1,
    'feature_list' : feature_list,
    'alpha': 3e-6,
}
agent_list[0] = REINFORCE_Agent(playersNameList[0], reinforce_config)
agent_list[1] = RandomAgent(playersNameList[1], {'print_info' : False})
agent_list[2] = RandomAgent(playersNameList[2], {'print_info' : False})
agent_list[3] = RandomAgent(playersNameList[3], {'print_info' : False})

In [0]:
env = gym.make('Hearts_Card_Game-v0')
env.__init__(playersNameList, MAX_SCORE)
if run_train:
    returns = []
    for trn_episode in tqdm_notebook(range(TRAINING_ITERS)):
        batch = []
        for i in range(BATCH_SIZE):
            observation = env.reset()
            episode_history = []
            while True:
                now_event = observation['event_name']
                IsBroadcast = observation['broadcast']
                
                if now_event == 'RoundEnd' and len(ep_history) > 0:
                    # Add episode info and return to batch
                    batch.append((copy.deepcopy(episode_history), -reward['REINFORCE']))
                    episode_history = []
                    
                if now_event == 'GameOver':
                    break

                action = None
                if IsBroadcast == True:
                    for agent in agent_list:
                        agent.Do_Action(observation)
                else:
                    playName = observation['data']['playerName']
                    for agent in agent_list:
                        if agent.name == playName:
                            action = agent.Do_Action(observation)

                observation_prime, reward, done, info = env.step(action)
                
                if observation['event_name'] == 'PlayTrick' and observation['data']['playerName'] == 'REINFORCE':
                    state_features, valid_features = agent_list[0].generate_features(observation)
                    episode_history.append((state_features, valid_features, action))
                    
                observation = observation_prime
                
        returns.append(agent_list[0].update(batch))
        
    agent_list[0].pi.save_pi_model()
    agent_list[0].baseline.save_base_model()
    avg_returns = numpy.array(returns)
    numpy.savetxt('reinforce.txt', avg_returns)\n"

In [0]:
def run_test(num_won):
    # Weird hack to make progress bars render properly
    print(' ', end='', flush=True)
    for i_ep in tqdm_notebook(range(NUM_EPISODES)):
        observation = env.reset()
        while True:
            now_event = observation['event_name']
            IsBroadcast = observation['broadcast']
            action = None
            if IsBroadcast == True:
                for agent in agent_list:
                    agent.Do_Action(observation)
            else:
                playName = observation['data']['playerName']
                for agent in agent_list:
                    if agent.name == playName:
                        action = agent.Do_Action(observation)
            if now_event == 'GameOver':
                num_won += int(observation['data']['Winner'] == 'REINFORCE')
                break
            observation, reward, done, info = env.step(action)
    return num_won

In [0]:
env = gym.make('Hearts_Card_Game-v0')
env.__init__(playersNameList, MAX_SCORE)
agent_list[0] = REINFORCE_Agent(playersNameList[0], params={'feature_list' : feature_list,})
agent_list[0].pi.nn.eval()
agent_list[0].baseline.nn.eval()

reinforce_wins = [0] * NUM_TESTS        
pool = multiprocessing.Pool(processes=NUM_TESTS)
reinforce_wins = pool.map(run_test, reinforce_wins)
print(reinforce_wins)
pool.close()
pool.join()

In [0]:
def run_test_2(num_won):
    # Weird hack to make progress bars render properly
    print(' ', end='', flush=True)
    for i_ep in tqdm_notebook(range(NUM_EPISODES)):
        observation = env.reset()
        while True:
            now_event = observation['event_name']
            IsBroadcast = observation['broadcast']
            action = None
            if IsBroadcast == True:
                for agent in agent_list:
                    agent.Do_Action(observation)
            else:
                playName = observation['data']['playerName']
                for agent in agent_list:
                    if agent.name == playName:
                        action = agent.Do_Action(observation)
            if now_event == 'GameOver':
                num_won += int(observation['data']['Winner'] == 'Randman')
                break
            observation, reward, done, info = env.step(action)
    return num_won

In [0]:
# EVALUATE THE RANDOM AGENT
env = gym.make('Hearts_Card_Game-v0')
env.__init__(playersNameList, MAX_SCORE)
playersNameList[0] = 'Randman'
agent_list[0] = RandomAgent(playersNameList[0])
rand_wins = [0] * NUM_TESTS
        
pool = multiprocessing.Pool(processes=NUM_TESTS)
rand_wins = pool.map(run_test_2, rand_wins)
print(rand_wins)
pool.close()
pool.join()

In [0]:
print(f"Monte Carlo won {sum(reinforce_wins)/len(reinforce_wins)} times on average :: {str(reinforce_wins)}")
print(f"Random won {sum(rand_wins)/len(rand_wins)} times on average :: {str(rand_wins)}")