In [None]:
from DeepLearning.Environments.NoSetupEnv import NoSetupDenseRewardEnv
from DeepLearning.Environments.SetupEnv import SetupRandomWithRoadsEnv
from DeepLearning.Environments.CatanEnv import CatanEnv
from DeepLearning.Environments.SelfPlayEnv import SelfPlayEnv, SelfPlaySetupDotTotalEnv
from DeepLearning.PPO import MaskablePPO
from DeepLearning.GetActionMask import getSetupWithRoadsActionMask, getActionMask
from DeepLearning.GetObservation import getSetupRandomWithRoadsObservation, getObservation
from Agents.AgentModel import AgentMultiModel
import os

os.environ["UPDATE_MODELS"] = "False"
os.environ["MODEL_NAME"] = "None"

# setupOpponentModel = MaskablePPO.load('DeepLearning/Models/SetupOnly_DotTotal_100k.zip')
# opponentModel = MaskablePPO.load('DeepLearning/Models/SelfPlay_SetupDotTotal_7vp_2M.zip')
# opponents = [
#     AgentMultiModel("P1", 1, setupModel=setupOpponentModel, model=opponentModel, fullSetup=False),
#     AgentMultiModel("P1", 1, setupModel=setupOpponentModel, model=opponentModel, fullSetup=False),
#     AgentMultiModel("P2", 2, setupModel=setupOpponentModel, model=opponentModel, fullSetup=False),
#     AgentMultiModel("P3", 3, setupModel=setupOpponentModel, model=opponentModel, fullSetup=False)
# ]

#env = NoSetupDenseRewardEnv(setupModel=MaskablePPO.load('DeepLearning/Models/SetupRandom_wins_1M.zip'))
env = SelfPlayEnv()

info = {
    "env": "SelfPlayEnv",
    "Timesteps": "13M",
    "Opponents": "3x(same model self play)",
    "Rewards": "vp Actions"
}

name = "SelfPlay_7vp_13M"

model = MaskablePPO("MlpPolicy", env, gamma=0.4, verbose=1, getActionMask=getActionMask, getObservation=getObservation, info=info, tensorboard_log="./tensorboard_logs/")
model.learn(total_timesteps=13_000_000, tb_log_name=name)
# model.save(f"DeepLearning/Models/{name}")


In [None]:
# Test Trained model using env format, use to debug running environments 

from sb3_contrib.common.maskable.utils import get_action_masks
from DeepLearning.Environments.NoSetupEnv import NoSetupDenseRewardEnv, NoSetupEnv
from DeepLearning.Environments.SetupEnv import SetupRandomWithRoadsEnv
from DeepLearning.Environments.CatanEnv import CatanEnv
from DeepLearning.Environments.SelfPlayEnv import SelfPlayEnv, SelfPlaySetupDotTotalEnv
import os
from Game.CatanPlayer import PlayerStatsTracker
from DeepLearning.PPO import MaskablePPO
from Agents.AgentRandom2 import AgentRandom2
from Agents.AgentMCTS import AgentMCTS
from tabulate import tabulate
from DeepLearning.Stats import headers
import pandas as pd
import random

def run():
    # env = NoSetupDenseRewardEnv(setupModel=MaskablePPO.load('DeepLearning/Models/SetupRandom_wins_1M.zip'))
    os.environ["UPDATE_MODELS"] = "False"
    os.environ["MODEL_NAME"] = "None"
    env = NoSetupEnv(setupModel=MaskablePPO.load('DeepLearning/Models/SetupOnly_DotTotal_100k.zip'))
    modelName = "model_iteration_751"

    # model.save(f"DeepLearning/Models/{modelName}")
    model = MaskablePPO.load(f'DeepLearning/SelfPlayModels/{modelName}.zip')

    rewardList = []
    winner = [0,0,0,0]

    stats = PlayerStatsTracker()
    randomStats = PlayerStatsTracker()

    players = [ AgentRandom2("P0", 0),
                AgentRandom2("P1", 1),
                AgentRandom2("P2", 2),
                AgentRandom2("P3", 3)]

    for episode in range(100):
        done = False
        state, info = env.reset()#players=players)

        while done != True:
            action_masks = get_action_masks(env)
            action, _states = model.predict(state, action_masks=action_masks)
            state, reward, done, _, info = env.step(action.item())
            rewardList.append(reward)

        winner[env.game.gameState.winner] += 1
        
        env.game.gameState.players[0].generatePlayerStats()
        env.game.gameState.players[3].generatePlayerStats()

        stats += env.game.gameState.players[0].stats
        randomStats += env.game.gameState.players[3].stats

    # Collect stats
    opponentName = "AgentRandom"

    stats.getAverages()
    randomStats.getAverages()

    agentData = stats.getList()
    agentData.insert(0, winner[0]/sum(winner))
    agentData.insert(0, modelName)
    randomData = randomStats.getList()
    randomData.insert(0, winner[3]/sum(winner))
    randomData.insert(0, opponentName)

    table = tabulate([agentData, randomData], headers=headers, tablefmt='simple')
    print(table)

    # Save to CSV
    fileName = f'{modelName}_vs_3{opponentName}.csv'
    # df = pd.DataFrame([agentData, randomData], columns=headers)
    # df.to_csv(f'DeepLearning/Data/{fileName}', index=False)


    print("\n\nWinnings: ", winner)

run()

# Brick, ore, wool, wheat, wood

In [15]:
"""
Running Agent simulations
"""
from Agents.AgentRandom2 import AgentRandom2
from Agents.AgentMCTS import AgentMCTS
from Agents.AgentUCT import AgentUCT
from Agents.AgentModel import AgentMultiModel, AgentModel
from Game.CatanGame import *
from CatanSimulator import CreateGame
from DeepLearning.PPO import MaskablePPO
from Game.CatanPlayer import PlayerStatsTracker
from tabulate import tabulate
from DeepLearning.Stats import headers
import dill as pickle
import pandas as pd
from CatanData.GameStateViewer import SaveGameStateImage, DisplayImage
import time
import math


winner = [0,0,0,0]
player0Stats = PlayerStatsTracker()
player1Stats = PlayerStatsTracker()

players = [ AgentMultiModel("P0", 0, setupModel=MaskablePPO.load('DeepLearning/Models/SetupRoadsSelfPlay_v_SelfPlay_SetupDotTotal_wins_1M.zip'), fullSetup=True, model=MaskablePPO.load('DeepLearning/Models/SelfPlay/SelfPlay_SetupDotTotal_7vp_2M.zip')),
            AgentMultiModel("P1", 1, setupModel=MaskablePPO.load('DeepLearning/Models/Setup/SetupRandom_wins_1M.zip'), fullSetup=False, model=MaskablePPO.load('DeepLearning/Models/NoSetup/NoSetupDenseRewardEnv-10M.zip')),
            AgentRandom2("P2", 2),
            AgentRandom2("P3", 3)]

COLLECT_STATS = True

for episode in range(500):
    game = CreateGame(players)
    game = pickle.loads(pickle.dumps(game, -1))
    while True:
        currPlayer = game.gameState.players[game.gameState.currPlayer]

        agentAction = currPlayer.DoMove(game)
        agentAction.ApplyAction(game.gameState)

        if game.gameState.currState == "OVER":
            # DisplayImage(game.gameState)
            break
    winner[game.gameState.winner] += 1

    # Stats
    if COLLECT_STATS:
        game.gameState.players[0].generatePlayerStats()
        game.gameState.players[1].generatePlayerStats()

        player0Stats += game.gameState.players[0].stats
        player1Stats += game.gameState.players[1].stats

# Collect stats
if COLLECT_STATS:
    player0Stats.getAverages()
    player1Stats.getAverages()

    player0Data = player0Stats.getList()
    player1Data = player1Stats.getList()

    p_hat0 = winner[0] / sum(winner)
    p_hat1 = winner[1] / sum(winner)
    margin_error0 = round(100*(1.96 * math.sqrt((p_hat0 * (1 - p_hat0)) / sum(winner))), 2)
    margin_error1 = round(100*(1.96 * math.sqrt((p_hat1 * (1 - p_hat1)) / sum(winner))), 2)
    player0Data.insert(0, margin_error0)
    player1Data.insert(0, margin_error1)
    player0Data.insert(0, winner[0]/sum(winner))
    player1Data.insert(0, winner[1]/sum(winner))
    player0Data.insert(0, "Player0")
    player1Data.insert(0, "Player1")

    table = tabulate([player0Data, player1Data], headers=headers, tablefmt='simple')
    print(table)


print("\n\nWinnings: ", winner)


# Save to csv
# fileName = f'SelfPlay_SetupDotTotal_7vp_2M_vs_NoSetupDenseRewardEnv-10M.csv'
# df = pd.DataFrame([player0Data, player1Data], columns=headers)
# df.to_csv(f'DeepLearning/Data/{fileName}', index=False)

# Brick, ore, wool, wheat, wood

AgentName      WinRate    MarginError    numTurns    victoryPoints  finalTradeRates                       numRoadsBuilt    devCardsBought  usedDevCards                        settlementsBuilt    citiesBuilt    devCardVP    largestArmy    longestRoad  resourcesReceived                          totalResourcesReceivedPerTurn    totalResourcesDiscarded    totalResourcesStolen  resourcesFromDevCard               totalResourcesFromDevCard  resourcesFromBankTrade               totalResourcesFromBankTrade  finalResourceProduction               setupResourceProduction              totalSetupResourceProduction  setupTradeRates                       setupResourceDiversity
-----------  ---------  -------------  ----------  ---------------  ----------------------------------  ---------------  ----------------  --------------------------------  ------------------  -------------  -----------  -------------  -------------  ---------------------------------------  -------------------------------  -----

In [6]:
# Update models functions
from DeepLearning.PPO import MaskablePPO
from DeepLearning.GetObservation import getObservation

model=MaskablePPO.load('DeepLearning/Models/Full_vp_100k.zip')
print(model.getActionMask)
print(model.getObservation)
# model.getObservation = getObservation
# model.save('DeepLearning/Models/Full_vp_100k.zip')

<function getActionMask at 0x177b0e660>
<function getObservation at 0x177b0df80>
