In [1]:
# !unzip robohearts.zip
#%cd robohearts/

In [9]:
import gym
import multiprocessing
from gymhearts.Hearts import *
from gymhearts.Agent.human import Human
from gymhearts.Agent.random_agent import RandomAgent
from gymhearts.Agent.monte_carlo import MonteCarlo
from tqdm import tqdm_notebook

In [47]:
NUM_TESTS = 10
NUM_EPISODES = 1000
TRAINING_ITERS = 10000
MAX_SCORE = 100

run_train = True

playersNameList = ['MonteCarlo', 'Rando', 'Randy', 'Randall']
agent_list = [0, 0, 0, 0]

# Human vs Random
mc_config = {
    'print_info' : False,
    'epsilon' : .05,
    'gamma' : 0.99,
    'alpha': .0001
}
agent_list[0] = MonteCarlo(playersNameList[0], mc_config)
agent_list[1] = RandomAgent(playersNameList[1], {'print_info' : False})
agent_list[2] = RandomAgent(playersNameList[2], {'print_info' : False})
agent_list[3] = RandomAgent(playersNameList[3], {'print_info' : False})

In [48]:
# TRAIN THE MONTE CARLO AGENT
env = gym.make('Hearts_Card_Game-v0')
env.__init__(playersNameList, MAX_SCORE)
weights = []
if run_train:
    for trn_episode in tqdm_notebook(range(TRAINING_ITERS)):
        observation = env.reset()
        history = []
        while True:
            #env.render()

            now_event = observation['event_name']
            IsBroadcast = observation['broadcast']
            action = None
            if IsBroadcast == True:
                for agent in agent_list:
                    agent.Do_Action(observation)
            else:
                playName = observation['data']['playerName']
                for agent in agent_list:
                    if agent.name == playName:
                        action = agent.Do_Action(observation)

            # update my agent
            if now_event == 'RoundEnd':
                errors = agent_list[0].update_weights(history, -reward['MonteCarlo'])
                history = []
                weights = agent_list[0].weight_vec
                #print(sum(errors) / len(errors))
            if now_event == 'GameOver':
                  break
            if not IsBroadcast and observation['data']['playerName'] == 'MonteCarlo':
                history.append(observation)
            observation, reward, done, info = env.step(action)


HBox(children=(IntProgress(value=0, max=10000), HTML(value='')))

In [49]:
# Uncomment this line for pretrained weights
# weights = [-0.46031637, -1.02296217, -1.64597146, 0.50871499, 0.05907032, -0.04527117,
# -2.67590788, -1.78400492, -0.08667306, 0.48108891, 0.66066313, -1.57675411,
# -0.56494518, -0.07736412, -0.3257198, -0.65003209, -0.63740714, 0.44494984,
# -0.1545964, 0.67457139, 2.31472314, 0.8694452, -2.29173301, 0.52783125,
# -0.86950875, -1.77655688, -3.29970913, -0.242993, -1.57548922, -1.34238258,
# 0.36816378, -3.23065985, -0.07919411, -2.1089143, -3.12815169, -0.74580836,
# 0.98398675, -0.75271283, -0.81051661, -0.60567687, -3.42010519, -0.63186969,
# -2.02352157, -0.27534069, -0.28736574, -1.15836776, -3.28679005, -0.33767846,
# -0.41568405, 0.2782292, -1.23761129, -1.80559854]

# EVALUATE THE MONTE CARLO AGENT
    
env = gym.make('Hearts_Card_Game-v0')
env.__init__(playersNameList, MAX_SCORE)
agent_list[0] = MonteCarlo(playersNameList[0], params={'weight_vec' : weights})
mc_wins = [0] * NUM_TESTS
def run_test(num_won):
    # Weird hack to make progress bars render properly
    print(' ', end='', flush=True)
    for i_ep in tqdm_notebook(range(NUM_EPISODES)):
        observation = env.reset()
        while True:
            now_event = observation['event_name']
            IsBroadcast = observation['broadcast']
            action = None
            if IsBroadcast == True:
                for agent in agent_list:
                    agent.Do_Action(observation)
            else:
                playName = observation['data']['playerName']
                for agent in agent_list:
                    if agent.name == playName:
                        action = agent.Do_Action(observation)
            if now_event == 'GameOver':
                num_won += int(observation['data']['Winner'] == 'MonteCarlo')
                break
            observation, reward, done, info = env.step(action)
    return num_won
        
pool = multiprocessing.Pool(processes=NUM_TESTS)
mc_wins = pool.map(run_test, mc_wins)
print(mc_wins)
pool.close()
pool.join()

          

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

[296, 321, 319, 313, 319, 284, 311, 304, 308, 303]


In [25]:
# EVALUATE THE RANDOM AGENT
env = gym.make('Hearts_Card_Game-v0')
env.__init__(playersNameList, MAX_SCORE)
playersNameList[0] = 'Randman'
agent_list[0] = RandomAgent(playersNameList[0])
rand_wins = [0] * NUM_TESTS
def run_test(num_won):
    # Weird hack to make progress bars render properly
    print(' ', end='', flush=True)
    for i_ep in tqdm_notebook(range(NUM_EPISODES)):
        observation = env.reset()
        while True:
            now_event = observation['event_name']
            IsBroadcast = observation['broadcast']
            action = None
            if IsBroadcast == True:
                for agent in agent_list:
                    agent.Do_Action(observation)
            else:
                playName = observation['data']['playerName']
                for agent in agent_list:
                    if agent.name == playName:
                        action = agent.Do_Action(observation)
            if now_event == 'GameOver':
                num_won += int(observation['data']['Winner'] == 'Randman')
                break
            observation, reward, done, info = env.step(action)
    return num_won
        
pool = multiprocessing.Pool(processes=NUM_TESTS)
rand_wins = pool.map(run_test, rand_wins)
print(rand_wins)
pool.close()
pool.join()

          

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))

[244, 278, 237, 258, 237, 253, 258, 275, 274, 248]


In [50]:
print(f"Monte Carlo won {sum(mc_wins)/len(mc_wins)} times on average :: {str(mc_wins)}")
print(f"Random won {sum(rand_wins)/len(rand_wins)} times on average :: {str(rand_wins)}")
print(f"\n\nThe Monte Carlo weights are: {str(weights)}")

Monte Carlo won 307.8 times on average :: [296, 321, 319, 313, 319, 284, 311, 304, 308, 303]
Random won 256.2 times on average :: [244, 278, 237, 258, 237, 253, 258, 275, 274, 248]


The Monte Carlo weights are: [ 2.06465561 -0.54803295 -0.5606525  -0.53443504 -0.56476593 -0.52700239
 -0.56047572 -0.56108895 -0.5440682  -0.54220423 -0.51039098 -0.5632033
 -0.57550882 -0.55404006 -0.5128013  -0.52260362 -0.53944989 -0.53640682
 -0.54893151 -0.47431386 -0.52071536 -0.54099153 -0.55418736 -0.5690746
 -0.54448835 -0.56689379 -0.5560535  -0.55724317 -0.57090783 -0.52755335
 -0.55410102 -0.55017804 -0.58567657 -0.56643317 -0.55748289 -0.57912836
 -1.76601824 -1.07758716 -1.59515983 -0.54671175 -0.51136727 -0.49869797
 -0.46793592 -0.56632468 -0.52785875 -0.57460898 -0.57388379 -0.58120085
 -0.84917458 -1.25580474 -1.57165733 -1.82312463]
