In [1]:
from DeepLearning.Environments.NoSetupEnv import NoSetupDenseRewardEnv
from DeepLearning.Environments.SetupEnv import SetupRandomWithRoadsEnv, SetupOnlyEnv
from DeepLearning.Environments.CatanEnv import CatanEnv, FirstSettlementEnv
from DeepLearning.Environments.SelfPlayEnv import SelfPlayEnv, SelfPlaySetupDotTotalEnv
from DeepLearning.PPO import MaskablePPO
from DeepLearning.GetActionMask import getSetupWithRoadsActionMask, getActionMask
from DeepLearning.GetObservation import getSetupRandomWithRoadsObservation, getObservation, getObservationSimplified, lowerBoundsSimplified, upperBoundsSimplified
from Agents.AgentModel import AgentMultiModel
import os

os.environ["UPDATE_MODELS"] = "False"
os.environ["MODEL_NAME"] = "None"

setupOnlyEnv = SetupOnlyEnv(lowerBoundsSimplified, upperBoundsSimplified, getActionMask, getObservationSimplified)
firstSettlementEnv = FirstSettlementEnv()

info = {
    "env": "FirstSettlementEnv",
    "Timesteps": "35_000 + 2_000_000",
    "Opponents": "3x(Random)",
    "Rewards": "50 - Num turns to build settlement"
}

name = "Production->FirstSettlement_50turns"

model = MaskablePPO("MlpPolicy", setupOnlyEnv, gamma=0.4, verbose=1, getActionMask=getActionMask, getObservation=getObservationSimplified, info=info, saveName=name+"-Part1", tensorboard_log="./tensorboard_logs/")
model.learn(total_timesteps=35_000, tb_log_name=f"{name}-Part1")
model.set_env(firstSettlementEnv)
model.saveName = name + "-Part2"
model.learn(total_timesteps=2_000_000, tb_log_name=f"{name}-Part2")
model.save(f"DeepLearning/Models/{name}")


486
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./tensorboard_logs/Production->FirstSettlement_50turns-Part1_2


  logger.warn(


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 2        |
|    ep_rew_mean     | 12.8     |
| time/              |          |
|    fps             | 178      |
|    iterations      | 1        |
|    time_elapsed    | 11       |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2          |
|    ep_rew_mean          | 14.3       |
| time/                   |            |
|    fps                  | 170        |
|    iterations           | 2          |
|    time_elapsed         | 23         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.03035746 |
|    clip_fraction        | 0.754      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.69      |
|    explained_variance   | -0.0195    |
|    learning_rate        | 0.0003     |
|   

In [None]:
# Test Trained model using env format, use to debug running environments 

from sb3_contrib.common.maskable.utils import get_action_masks
from DeepLearning.Environments.NoSetupEnv import NoSetupDenseRewardEnv, NoSetupEnv
from DeepLearning.Environments.SetupEnv import SetupRandomWithRoadsEnv
from DeepLearning.Environments.CatanEnv import CatanEnv, FirstSettlementEnv
from DeepLearning.Environments.SelfPlayEnv import SelfPlayEnv, SelfPlaySetupDotTotalEnv
import os
from Game.CatanPlayer import PlayerStatsTracker
from DeepLearning.PPO import MaskablePPO
from Agents.AgentRandom2 import AgentRandom2
from Agents.AgentMCTS import AgentMCTS
from tabulate import tabulate
from DeepLearning.Stats import headers
import pandas as pd
import random
from CatanData.GameStateViewer import SaveGameStateImage, DisplayImage


os.environ["UPDATE_MODELS"] = "False"
os.environ["MODEL_NAME"] = "None"
env = FirstSettlementEnv()
modelName = "FirstSettlement_reward25_5M"

model = MaskablePPO.load(f'DeepLearning/Models/{modelName}.zip')

rewardList = []
winner = [0,0,0,0]

players = [ AgentRandom2("P0", 0),
            AgentRandom2("P1", 1),
            AgentRandom2("P2", 2),
            AgentRandom2("P3", 3)]

for episode in range(1):
    done = False
    state, info = env.reset()#players=players)

    while done != True:
        action_masks = get_action_masks(env)
        action, _states = model.predict(state, action_masks=action_masks)
        state, reward, done, _, info = env.step(action.item())
        rewardList.append(reward)

    winner[env.game.gameState.winner] += 1
    
    env.game.gameState.players[0].generatePlayerStats()
    env.game.gameState.players[3].generatePlayerStats()

    print(env.game.gameState.players[0].stats)
    DisplayImage(env.game.gameState)
    print(rewardList[-1] - 25)




print("\n\nWinnings: ", winner)


# Brick, ore, wool, wheat, wood

In [9]:
"""
Running Agent simulations
"""
from Agents.AgentRandom2 import AgentRandom2
from Agents.AgentMCTS import AgentMCTS
from Agents.AgentUCT import AgentUCT
from Agents.AgentModel import AgentMultiModel, AgentModel
from Game.CatanGame import *
from CatanSimulator import CreateGame
from DeepLearning.PPO import MaskablePPO
from Game.CatanPlayer import PlayerStatsTracker
from tabulate import tabulate
from DeepLearning.Stats import headers
import dill as pickle
import pandas as pd
from CatanData.GameStateViewer import SaveGameStateImage, DisplayImage
import time
import math
from DeepLearning.GetObservation import getObservationSimplified


winner = [0,0,0,0]
player0Stats = PlayerStatsTracker()
player1Stats = PlayerStatsTracker()

players = [ AgentModel("P0", 0, model=MaskablePPO.load('DeepLearning/Models/Production->FirstSettlement_50turns-Part2_966656.zip')),
            AgentRandom2("P1", 1),
            AgentRandom2("P2", 2),
            AgentRandom2("P3", 3)]

COLLECT_STATS = True

for episode in range(1):
    game = CreateGame(players)
    game = pickle.loads(pickle.dumps(game, -1))
    while True:
        currPlayer = game.gameState.players[game.gameState.currPlayer]

        agentAction = currPlayer.DoMove(game)
        agentAction.ApplyAction(game.gameState)

        if currPlayer.seatNumber == 0 and agentAction.type == 'EndTurn':
            DisplayImage(game.gameState)
            time.sleep(1)

        if game.gameState.currState == "OVER":
            break

    winner[game.gameState.winner] += 1

    # Stats
    if COLLECT_STATS:
        game.gameState.players[0].generatePlayerStats()
        game.gameState.players[1].generatePlayerStats()

        player0Stats += game.gameState.players[0].stats
        player1Stats += game.gameState.players[1].stats

# Collect stats
if COLLECT_STATS:
    player0Stats.getAverages()
    player1Stats.getAverages()

    player0Data = player0Stats.getList()
    player1Data = player1Stats.getList()

    p_hat0 = winner[0] / sum(winner)
    p_hat1 = winner[1] / sum(winner)
    margin_error0 = round(100*(1.96 * math.sqrt((p_hat0 * (1 - p_hat0)) / sum(winner))), 2)
    margin_error1 = round(100*(1.96 * math.sqrt((p_hat1 * (1 - p_hat1)) / sum(winner))), 2)
    player0Data.insert(0, margin_error0)
    player1Data.insert(0, margin_error1)
    player0Data.insert(0, winner[0]/sum(winner))
    player1Data.insert(0, winner[1]/sum(winner))
    player0Data.insert(0, "Player0")
    player1Data.insert(0, "Player1")

    table = tabulate([player0Data, player1Data], headers=headers, tablefmt='simple')
    print(table)


print("\n\nWinnings: ", winner)


# Save to csv
# fileName = f'Production->FirstSettlement_50turns_vs_3AgentRandom.csv'
# df = pd.DataFrame([player0Data, player1Data], columns=headers)
# df.to_csv(f'DeepLearning/Data/{fileName}', index=False)

# Brick, ore, wool, wheat, wood

KeyboardInterrupt: 

In [6]:
# Update models functions
from DeepLearning.PPO import MaskablePPO
from DeepLearning.GetObservation import getObservation

model=MaskablePPO.load('DeepLearning/Models/Full_vp_100k.zip')
print(model.getActionMask)
print(model.getObservation)
# model.getObservation = getObservation
# model.save('DeepLearning/Models/Full_vp_100k.zip')

<function getActionMask at 0x177b0e660>
<function getObservation at 0x177b0df80>
