In [None]:
from DeepLearning.PPO import MaskablePPO
from DeepLearning.Thesis.Environments.TurnLimitDense import TurnLimitDenseScaledLoss
from DeepLearning.GetActionMask import getActionMask
from DeepLearning.Thesis.Observations.get_observation import getObservation
import os

os.environ["TURN_LIMIT"] = "50"

env = TurnLimitDenseScaledLoss()
actionMask = getActionMask
observation = getObservation

netArchDict = dict(pi=[128, 128, 128], vf=[128, 128, 128])
gamma = 0.99
n_steps = 4096

saveName = "TurnLimitDenseScaledLoss"
savePath = f"DeepLearning/Thesis/DenseRewards/Models/{saveName}"

model = MaskablePPO("MlpPolicy", env, verbose=1, policy_kwargs=dict(net_arch=netArchDict), gamma=gamma, n_steps=n_steps, getActionMask=actionMask, getObservation=observation, savePath=savePath, tensorboard_log="./tensorboard_logs_thesis/")
# model = MaskablePPO.load("DeepLearning/Thesis/DenseRewards/Models/Reward_build_trade/Reward_build_trade_2M.zip", env)
model.savePath = savePath
model.learn(total_timesteps=20_000_000, tb_log_name=saveName)

In [32]:
"""
Running Agent simulations
"""
from Agents.AgentRandom2 import AgentRandom2
from Agents.AgentMCTS import AgentMCTS
from Agents.AgentUCT import AgentUCT
from Agents.AgentModel import AgentMultiModel, AgentModel
from Game.CatanGame import *
from CatanSimulator import CreateGame
from DeepLearning.PPO import MaskablePPO
from Game.CatanPlayer import PlayerStatsTracker
from tabulate import tabulate
from DeepLearning.Stats import headers
import dill as pickle
from CatanData.GameStateViewer import SaveGameStateImage, DisplayImage
import math
import time

winner = [0,0,0,0]
player0Stats = PlayerStatsTracker()
Player0LosingStats = PlayerStatsTracker()
player1Stats = PlayerStatsTracker()
player2Stats = PlayerStatsTracker()
player3Stats = PlayerStatsTracker()

setupModel = MaskablePPO.load("DeepLearning/Thesis/Setup/Models/SetupRandom/model_832512.zip")

testModel0 = MaskablePPO.load("DeepLearning/Thesis/DenseRewards/Models/Strategy_GenericTurnLimit/model_3M.zip")
# testModel1 = MaskablePPO.load("DeepLearning/Thesis/Opponents/Models/Distribution/model_14966784.zip")
# testModel2 = MaskablePPO.load("DeepLearning/Thesis/Rewards/Models/Reward_win/Reward_win_10M.zip")
# testModel3 = MaskablePPO.load("DeepLearning/Thesis/Opponents/Models/VsModel/model_1536000.zip")

players = [ AgentModel("P0", 0, recordStats=True, playerTrading=False, model=testModel0),
            # AgentUCT("P1", 0, recordStats=True, simulationCount=500),
            # AgentModel("P1", 1, recordStats=True, playerTrading=False, model=testModel1),
            # AgentModel("P2", 2, recordStats=True, playerTrading=False, model=testModel2),
            # AgentModel("P3", 3, recordStats=True, playerTrading=False, model=testModel3),
            AgentRandom2("P1", 1, recordStats=True, playerTrading=False),
            AgentRandom2("P2", 2, recordStats=True, playerTrading=False),
            AgentRandom2("P3", 3, recordStats=True, playerTrading=False),
            ]


COLLECT_STATS = True
for episode in range(2000):
    game = CreateGame(players)
    game = pickle.loads(pickle.dumps(game, -1))
    numTurns = 0
    while True:
        currPlayer = game.gameState.players[game.gameState.currPlayer]

        agentAction = currPlayer.DoMove(game)
        agentAction.ApplyAction(game.gameState)

        if currPlayer.seatNumber == 0 and agentAction.type == 'EndTurn':
            # DisplayImage(game.gameState, agentAction)
            # time.sleep(1)
            numTurns += 1

        if game.gameState.currState == "OVER":
            # DisplayImage(game.gameState, agentAction)
            break
    
    # print("Winner: ", game.gameState.winner)
    winner[game.gameState.winner] += 1
    lost = game.gameState.winner != 0

    # print(winner)

    # Stats
    if COLLECT_STATS:
        game.gameState.players[0].generatePlayerStats()
        game.gameState.players[1].generatePlayerStats()
        game.gameState.players[2].generatePlayerStats()
        game.gameState.players[3].generatePlayerStats()

        player0Stats += game.gameState.players[0].stats
        player1Stats += game.gameState.players[1].stats
        player2Stats += game.gameState.players[2].stats
        player3Stats += game.gameState.players[3].stats
        if lost:
            Player0LosingStats += game.gameState.players[0].stats

# Collect stats
if COLLECT_STATS:
    player0Stats.getAverages()
    Player0LosingStats.getAverages()
    player1Stats.getAverages()
    player2Stats.getAverages()
    player3Stats.getAverages()
    player0Data = player0Stats.getList()
    player0LosingData = Player0LosingStats.getList()
    player1Data = player1Stats.getList()
    player2Data = player2Stats.getList()
    player3Data = player3Stats.getList()

    p_hat0 = winner[0] / sum(winner)
    p_hat1 = winner[1] / sum(winner)
    p_hat2 = winner[0] / sum(winner)
    p_hat3 = winner[1] / sum(winner)
    margin_error0 = round(100*(1.96 * math.sqrt((p_hat0 * (1 - p_hat0)) / sum(winner))), 2)
    margin_error1 = round(100*(1.96 * math.sqrt((p_hat1 * (1 - p_hat1)) / sum(winner))), 2)
    margin_error2 = round(100*(1.96 * math.sqrt((p_hat0 * (1 - p_hat0)) / sum(winner))), 2)
    margin_error3 = round(100*(1.96 * math.sqrt((p_hat1 * (1 - p_hat1)) / sum(winner))), 2)
    player0Data.insert(0, margin_error0)
    player0LosingData.insert(0, -1)
    player1Data.insert(0, margin_error1)
    player2Data.insert(0, margin_error2)
    player3Data.insert(0, margin_error3)
    player0Data.insert(0, winner[0]/sum(winner))
    player0LosingData.insert(0, -1)
    player1Data.insert(0, winner[1]/sum(winner))
    player2Data.insert(0, winner[2]/sum(winner))
    player3Data.insert(0, winner[3]/sum(winner))
    player0Data.insert(0, "Player0")
    player0LosingData.insert(0, "Player0LossesStats")
    player1Data.insert(0, "Player1")
    player2Data.insert(0, "Player2")
    player3Data.insert(0, "Player3")

    table = tabulate([player0Data, player0LosingData, player1Data, player2Data, player3Data], headers=headers, tablefmt='simple')
    print(table)

print(f"\nNum turns: {numTurns}")

print("\n\nWinnings: ", winner)


# Brick, ore, wool, wheat, wood

AgentName             WinRate    MarginError    numTurns    victoryPoints    numRoadsBuilt    devCardsBought  usedDevCards                         settlementsBuilt    citiesBuilt    devCardVP    largestArmy    longestRoad  resourcesReceived                         totalResourcesReceivedPerTurn    totalResourcesDiscarded    totalResourcesStolen  resourcesFromDevCard                   totalResourcesFromDevCard  resourcesFromBankTrade               finalResourceProduction                 finalTradeRates                                                                                   setupResourceProduction                totalSetupResourceProduction  setupTradeRates                        setupResourceDiversity    turnsForFirstSettlement    noSettlementsBuilt    turnsForFirstCity    noCitysBuilt    numRoadsFor1stSettlement    totalResourcesFromBankTrade    goodSettlementBankTrades    badSettlementBankTrades    goodCityBankTrades    badCityBankTrades    goodRoadBankTrades    badRoadBankTr

In [None]:
# import pandas as pd

# # Save to csv
# fileName = f'Task_30Turns_v_Random.csv'
# df = pd.DataFrame([player0Data, player0LosingData, player1Data], columns=headers)
# df.to_csv(f'DeepLearning/Thesis/Tasks/Data/{fileName}', index=False)