In [1]:
# !unzip robohearts.zip
#%cd robohearts/

In [2]:
import gym
import multiprocessing
from gymhearts.Hearts import *
from gymhearts.Agent.agent_random import RandomAgent
from gymhearts.Agent.agent_mc_nn import MonteCarloNN
from gymhearts.Agent.utils_env import *
from gymhearts.Agent.utils_nn import *
from tqdm import tqdm_notebook

In [11]:
# ----------- TRAINING CONFIGURATION -------------
# Number of games to train on 
TRAINING_ITERS = 2500

# Number of episodes to run during model evaluation
NUM_EPISODES = 1000

# Number of model evaluations to average together
NUM_TESTS = 10

# Max score for players to win the game
MAX_SCORE = 100

# Set to false to skip training
run_train = True

# Set to true to resume training from an existing model
continue_train = False

# Run testing on a random agent for comparison
run_random = False

# Features to include in model :: [in_hand, in_play, played_cards, won_cards, scores]
feature_list = ['in_hand', 'in_play']

# Name of the file that is saved :: {model_name}.th
model_name = 'mc_nn'

# Configuration parameters for the model
mc_nn_config = {
    'print_info' : False,
    'epsilon' : .01,
    'gamma' : 1.0,
    'alpha': 0.001,
    'feature_list' : feature_list
}

if continue_train:
    mc_nn_config['load_model'] = model_name

playersNameList = ['MonteCarlo', 'Rando', 'Randy', 'Randall']
agent_list = [0, 0, 0, 0]

agent_list[0] = MonteCarloNN(playersNameList[0], mc_nn_config)
agent_list[1] = RandomAgent(playersNameList[1], {'print_info' : False})
agent_list[2] = RandomAgent(playersNameList[2], {'print_info' : False})
agent_list[3] = RandomAgent(playersNameList[3], {'print_info' : False})

In [12]:
# ---------- TRAIN MONTE CARLO NN AGENT ------------
env = gym.make('Hearts_Card_Game-v0')
env.__init__(playersNameList, MAX_SCORE)
if run_train:
    for trn_episode in tqdm_notebook(range(TRAINING_ITERS)):
        # Save the model every 50 steps
        if trn_episode % 50 == 0:
            save_model(agent_list[0].nn, model_name, 'mc_nn')
        observation = env.reset()
        history = []
        while True:
            #env.render()
            now_event = observation['event_name']
            IsBroadcast = observation['broadcast']
            # update my agent before clearing state
            if now_event == 'RoundEnd':
                agent_list[0].update_weights(history, -reward['MonteCarlo'])
                history = []
            if now_event == 'GameOver':
                  break
            if observation['event_name']=='PlayTrick' and observation['data']['playerName'] == 'MonteCarlo':
                # don't add score, they don't change till at end of round
                history.append((observation, agent_list[0].played_cards, agent_list[0].won_cards))

            action = None
            if IsBroadcast == True:
                for agent in agent_list:
                    agent.Do_Action(observation)
            else:
                playName = observation['data']['playerName']
                for agent in agent_list:
                    if agent.name == playName:
                        action = agent.Do_Action(observation)

            observation, reward, done, info = env.step(action)
    save_model(agent_list[0].nn, model_name, 'mc_nn')

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))




In [13]:
# Function to test mc nn model with multiprocessing
def run_test(num_won):
    # Weird hack to make progress bars render properly
    print(' ', end='', flush=True)
    for i_ep in tqdm_notebook(range(NUM_EPISODES)):
        observation = env.reset()
        while True:
            now_event = observation['event_name']
            IsBroadcast = observation['broadcast']
            action = None
            if IsBroadcast == True:
                for agent in agent_list:
                    agent.Do_Action(observation)
            else:
                playName = observation['data']['playerName']
                for agent in agent_list:
                    if agent.name == playName:
                        action = agent.Do_Action(observation)
            if now_event == 'GameOver':
                num_won += int(observation['data']['Winner'] == 'MonteCarlo')
                break
            observation, reward, done, info = env.step(action)
    return num_won

In [14]:
# ----------- EVALUATE MC NN AGENT ---------------
    
env = gym.make('Hearts_Card_Game-v0')
env.__init__(playersNameList, MAX_SCORE)

# Evaluation parameters for testing
mc_nn_config = {
    'print_info' : False,
    'load_model' : model_name,
    'feature_list' : feature_list
}

agent_list[0] = MonteCarloNN(playersNameList[0], mc_nn_config)
mc_wins = [0] * NUM_TESTS 

pool = multiprocessing.Pool(processes=NUM_TESTS)
mc_wins = pool.map(run_test, mc_wins)
print(mc_wins)
pool.close()
pool.join()

          

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))











[2, 1, 0, 0, 2, 2, 5, 3, 2, 2]


In [15]:
# Function to test random model with multiprocessing
def run_test_random(num_won):
    # Weird hack to make progress bars render properly
    print(' ', end='', flush=True)
    for i_ep in tqdm_notebook(range(NUM_EPISODES)):
        observation = env.reset()
        while True:
            now_event = observation['event_name']
            IsBroadcast = observation['broadcast']
            action = None
            if IsBroadcast == True:
                for agent in agent_list:
                    agent.Do_Action(observation)
            else:
                playName = observation['data']['playerName']
                for agent in agent_list:
                    if agent.name == playName:
                        action = agent.Do_Action(observation)
            if now_event == 'GameOver':
                num_won += int(observation['data']['Winner'] == 'Randman')
                break
            observation, reward, done, info = env.step(action)
    return num_won

In [16]:
# ----------- EVALUTATE RANDOM AGENT ---------------
if run_random:
    env = gym.make('Hearts_Card_Game-v0')
    env.__init__(playersNameList, MAX_SCORE)
    playersNameList[0] = 'Randman'
    agent_list[0] = RandomAgent(playersNameList[0])
    rand_wins = [0] * NUM_TESTS
    pool = multiprocessing.Pool(processes=NUM_TESTS)
    rand_wins = pool.map(run_test_random, rand_wins)
    print(rand_wins)
    pool.close()
    pool.join()

In [17]:
print(f"Monte Carlo won {sum(mc_wins)/len(mc_wins)} times on average :: {str(mc_wins)}")
for wins in mc_wins:
    print(wins)
if run_random:
    print(f"Random won {sum(rand_wins)/len(rand_wins)} times on average :: {str(rand_wins)}")

Monte Carlo won 1.9 times on average :: [2, 1, 0, 0, 2, 2, 5, 3, 2, 2]
2
1
0
0
2
2
5
3
2
2
