Training

In [None]:
from DeepLearning.Environments.CatanEnv import CatanEnv, SelfPlayDistribution, CatanTradingEnv, SelfPlayDistTradingEnv
from DeepLearning.PPO import MaskablePPO
from DeepLearning.GetActionMask import getActionMask, getActionMaskTrading
from DeepLearning.GetObservation import getObservation, getObservationSimplified, getSetupObservationValue, getObservationTrading
from Agents.AgentModel import AgentMultiModel
from Agents.AgentRandom2 import AgentRandom2
from Agents.AgentNoMoves import AgentNoMoves
import os

os.environ["UPDATE_MODELS_UNIFORM"] = "False"
os.environ["UPDATE_MODELS_DIST"] = "False"
os.environ["MODEL_NAME"] = "None"
os.environ["MODEL_1_NAME"] = ""
os.environ["MODEL_2_NAME"] = ""
os.environ["MODEL_3_NAME"] = ""

env = CatanTradingEnv(trading=True)
actionMask = getActionMaskTrading
observation = getObservationTrading
gamma = 0.99

info = {
    "env": "CatanEnv",
    "Timesteps": "1M",
    "Opponents": "self.play",
    "Rewards": "Setup, Dense, Bank Trades"
}
name = "TradingBase_PlayerTradingAdded_vpActionsBack"

netArchDict = dict(pi=[128, 128], vf=[128, 128])

# model = MaskablePPO("MlpPolicy", env, policy_kwargs=dict(net_arch=netArchDict), gamma=gamma, verbose=1, getActionMask=actionMask, getObservation=observation, info=info, saveName=name,tensorboard_log="./tensorboard_logs/")
model=MaskablePPO.load("DeepLearning/Models/TradingBase_PlayerTradingAdded_vpActionsBack/TradingBase_PlayerTradingAdded_vpActionsBack_800k.zip", env=env)
model.saveName = name
model.learn(total_timesteps=2_000_000, tb_log_name=f"{name}")
# model.save("DeepLearning/Models/TradingBase_PlayerTradingAdded_20Turns/Final")


Test with Env

In [None]:
# Test Trained model using env format, use to debug running environments 

from sb3_contrib.common.maskable.utils import get_action_masks
from DeepLearning.Environments.NoSetupEnv import NoSetupDenseRewardEnv, NoSetupEnv
from DeepLearning.Environments.SetupEnv import SetupRandomWithRoadsEnv
from DeepLearning.Environments.CatanEnv import CatanEnv, CatanTradingEnv
from DeepLearning.Environments.SelfPlayEnv import SelfPlayEnv, SelfPlaySetupDotTotalEnv
import os
from Game.CatanPlayer import PlayerStatsTracker
from DeepLearning.PPO import MaskablePPO
from Agents.AgentRandom2 import AgentRandom2
from Agents.AgentNoMoves import AgentNoMoves
from Agents.AgentMCTS import AgentMCTS
from tabulate import tabulate
from DeepLearning.Stats import headers
import pandas as pd
import random
from CatanData.GameStateViewer import SaveGameStateImage, DisplayImage
import time
from collections import deque

os.environ["UPDATE_MODELS"] = "False"
os.environ["MODEL_NAME"] = "None"

model=MaskablePPO.load("DeepLearning/Models/DummyTrading.zip")

rewardList = []
winner = [0,0,0,0]

players = [ AgentRandom2("P0", 0, playerTrading=True),
            AgentRandom2("P1", 1, playerTrading=True),
            AgentRandom2("P2", 2, playerTrading=True),
            AgentRandom2("P3", 3, playerTrading=True),]

env = CatanTradingEnv()

total_actions = 0

for episode in range(1):
    done = False
    state, info = env.reset()#players=players)

    while done != True:
        action_masks = get_action_masks(env)
        action, _states = model.predict(state, action_masks=action_masks)
        state, reward, done, _, info = env.step(action.item())
        rewardList.append(reward)
        total_actions += 1

    winner[env.game.gameState.winner] += 1
    
    # env.game.gameState.players[0].generatePlayerStats()
    # env.game.gameState.players[3].generatePlayerStats()

    # print(env.game.gameState.players[0].stats)
    # DisplayImage(env.game.gameState)
    # print(rewardList[-1] - 25)
# print(rewardList)
# print(sum(rewardList))

print(f"TotalActions:{total_actions}")
print("\n\nWinnings: ", winner)



# Brick, ore, wool, wheat, wood

Test Models v Opponents

In [13]:
"""
Running Agent simulations
"""
from Agents.AgentRandom2 import AgentRandom2
from Agents.AgentMCTS import AgentMCTS
from Agents.AgentUCT import AgentUCT
from Agents.AgentNoMoves import AgentNoMoves
from Agents.AgentModel import AgentMultiModel, AgentModel
from Game.CatanGame import *
from CatanSimulator import CreateGame
from DeepLearning.PPO import MaskablePPO
from Game.CatanPlayer import PlayerStatsTracker
from tabulate import tabulate
from DeepLearning.Stats import headers
import dill as pickle
import pandas as pd
from CatanData.GameStateViewer import SaveGameStateImage, DisplayImage
import time
import math
from DeepLearning.GetObservation import getObservationSimplified
from collections import deque


# best_model = AgentMultiModel("P1", 1, model=MaskablePPO.load("DeepLearning/Models/NoSetup/NoSetupDenseRewardEnv-10M.zip"), setupModel=MaskablePPO.load("DeepLearning/Models/Setup/SetupRandom_wins_1M.zip"), fullSetup=False)

winner = [0,0,0,0]
player0Stats = PlayerStatsTracker()
Player0LosingStats = PlayerStatsTracker()
player1Stats = PlayerStatsTracker()

players = [ AgentModel("P0", 0, recordStats=True, playerTrading=True, model=MaskablePPO.load("DeepLearning/Models/Trading_20Turns_CitySettlement_SmallTrading/Trading_20Turns_CitySettlement_SmallTrading_20480.zip")),
            AgentRandom2("P1", 1, recordStats=True, playerTrading=True),
            AgentRandom2("P2", 2, recordStats=True, playerTrading=True),
            AgentRandom2("P3", 3, recordStats=True, playerTrading=True),]

COLLECT_STATS = True
for episode in range(1000):
    game = CreateGame(players)
    game = pickle.loads(pickle.dumps(game, -1))
    numTurns = 0
    while True:
        currPlayer = game.gameState.players[game.gameState.currPlayer]

        agentAction = currPlayer.DoMove(game)
        agentAction.ApplyAction(game.gameState)

        if currPlayer.seatNumber == 0 and agentAction.type == 'EndTurn':
            # DisplayImage(game.gameState, agentAction)
            # time.sleep(1)
            numTurns += 1

        if game.gameState.currState == "OVER": # or numTurns >= 20:
            # DisplayImage(game.gameState, agentAction)
            break
    
    # print("Winner: ", game.gameState.winner)
    winner[game.gameState.winner] += 1
    lost = game.gameState.winner != 0

    # Stats
    if COLLECT_STATS:
        game.gameState.players[0].generatePlayerStats()
        game.gameState.players[1].generatePlayerStats()

        player0Stats += game.gameState.players[0].stats
        player1Stats += game.gameState.players[1].stats
        if lost:
            Player0LosingStats += game.gameState.players[0].stats

# Collect stats
if COLLECT_STATS:
    player0Stats.getAverages()
    Player0LosingStats.getAverages()
    player1Stats.getAverages()
    player0Data = player0Stats.getList()
    player0LosingData = Player0LosingStats.getList()
    player1Data = player1Stats.getList()

    p_hat0 = winner[0] / sum(winner)
    p_hat1 = winner[1] / sum(winner)
    margin_error0 = round(100*(1.96 * math.sqrt((p_hat0 * (1 - p_hat0)) / sum(winner))), 2)
    margin_error1 = round(100*(1.96 * math.sqrt((p_hat1 * (1 - p_hat1)) / sum(winner))), 2)
    player0Data.insert(0, margin_error0)
    player0LosingData.insert(0, -1)
    player1Data.insert(0, margin_error1)
    player0Data.insert(0, winner[0]/sum(winner))
    player0LosingData.insert(0, -1)
    player1Data.insert(0, winner[1]/sum(winner))
    player0Data.insert(0, "Player0")
    player0LosingData.insert(0, "Player0LossesStats")
    player1Data.insert(0, "Player1")

    table = tabulate([player0Data, player0LosingData, player1Data], headers=headers, tablefmt='simple')
    print(table)

print(f"\nNum turns: {numTurns}")

print("\n\nWinnings: ", winner)


# Brick, ore, wool, wheat, wood

AgentName             WinRate    MarginError    numTurns    victoryPoints    numRoadsBuilt    devCardsBought  usedDevCards                         settlementsBuilt    citiesBuilt    devCardVP    largestArmy    longestRoad  resourcesReceived                          totalResourcesReceivedPerTurn    totalResourcesDiscarded    totalResourcesStolen  resourcesFromDevCard                   totalResourcesFromDevCard  resourcesFromBankTrade                finalResourceProduction              finalTradeRates                                                                                    setupResourceProduction                totalSetupResourceProduction  setupTradeRates                        setupResourceDiversity    turnsForFirstSettlement    noSettlementsBuilt    turnsForFirstCity    noCitysBuilt    numRoadsFor1stSettlement    totalResourcesFromBankTrade    goodSettlementBankTrades    badSettlementBankTrades    goodCityBankTrades    badCityBankTrades    goodRoadBankTrades    badRoadBankTr

In [None]:
# # Save to csv
# fileName = f'TradingBase_vpActionAdjusted3M_v_3Random.csv'
# df = pd.DataFrame([player0Data, player0LosingData, player1Data], columns=headers)
# df.to_csv(f'DeepLearning/Data/Trading/{fileName}', index=False)

# from DeepLearning.GetActionMask import allActionsDict

# print(allActionsDict)

# model.save("DeepLearning/Models/GammaTest-04/Final")

# model = MaskablePPO.load("DeepLearning/Models/Setup_CityThenRoad/Setup_CityThenRoad_133120.zip")
# model.getObservation = getSetupObservationValue
# model.save("DeepLearning/Models/Setup_CityThenRoad/Setup_CityThenRoad.zip")

