In [1]:
from DeepLearning.PPO import MaskablePPO
from DeepLearning.Thesis.Observations.nodes_v_hexes import getHexInNodeObs, hexInNodeLowerBound, hexInNodeUpperBound
from DeepLearning.Thesis.Environments.nodes_v_hexes import ObsTestingEnv
from DeepLearning.GetActionMask import getActionMask
import os

env = ObsTestingEnv(lowerBounds=hexInNodeLowerBound, upperBounds=hexInNodeUpperBound, getObservationFunction=getHexInNodeObs)
actionMask = getActionMask
observation = getHexInNodeObs

saveName = "observation_hexInNode"
savePath = f"DeepLearning/Thesis/Observations/Models/{saveName}"

model = MaskablePPO("MlpPolicy", env, verbose=1, getActionMask=actionMask, getObservation=observation, savePath=savePath, tensorboard_log="./tensorboard_logs_thesis/")
model.learn(total_timesteps=1_000_000, tb_log_name=saveName)
# model.save("DeepLearning/Models/TradingBase_PlayerTradingAdded_20Turns/Final")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./tensorboard_logs_thesis/observation_hexInNode_1


  logger.warn(


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 105      |
|    ep_rew_mean     | 6.11     |
| time/              |          |
|    fps             | 958      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 96.6       |
|    ep_rew_mean          | 6.19       |
| time/                   |            |
|    fps                  | 805        |
|    iterations           | 2          |
|    time_elapsed         | 5          |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.01972838 |
|    clip_fraction        | 0.205      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.91      |
|    explained_variance   | 0.0995     |
|    learning_rate        | 0.0003     |
|   



-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 92.3        |
|    ep_rew_mean          | 7.11        |
| time/                   |             |
|    fps                  | 671         |
|    iterations           | 11          |
|    time_elapsed         | 33          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.024323277 |
|    clip_fraction        | 0.276       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.82       |
|    explained_variance   | 0.85        |
|    learning_rate        | 0.0003      |
|    loss                 | -0.041      |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.0586     |
|    value_loss           | 0.0681      |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 93.3  

<DeepLearning.PPO.MaskablePPO at 0x16bc87950>

In [3]:
"""
Running Agent simulations
"""
from Agents.AgentRandom2 import AgentRandom2
from Agents.AgentMCTS import AgentMCTS
from Agents.AgentUCT import AgentUCT
from Agents.AgentModel import AgentMultiModel, AgentModel
from Game.CatanGame import *
from CatanSimulator import CreateGame
from DeepLearning.PPO import MaskablePPO
from Game.CatanPlayer import PlayerStatsTracker
from tabulate import tabulate
from DeepLearning.Stats import headers
import dill as pickle
from CatanData.GameStateViewer import SaveGameStateImage, DisplayImage
import math

winner = [0,0,0,0]
player0Stats = PlayerStatsTracker()
Player0LosingStats = PlayerStatsTracker()
player1Stats = PlayerStatsTracker()

testModel = MaskablePPO.load("DeepLearning/Thesis/Observations/Models/observation_hexInNode/Final.zip")
players = [ AgentModel("P0", 0, recordStats=True, playerTrading=False, model=testModel),
            AgentRandom2("P1", 1, recordStats=True, playerTrading=False),
            AgentRandom2("P2", 2, recordStats=True, playerTrading=False),
            AgentRandom2("P3", 3, recordStats=True, playerTrading=False),]

COLLECT_STATS = True
for episode in range(5000):
    game = CreateGame(players)
    game = pickle.loads(pickle.dumps(game, -1))
    numTurns = 0
    while True:
        currPlayer = game.gameState.players[game.gameState.currPlayer]

        agentAction = currPlayer.DoMove(game)
        agentAction.ApplyAction(game.gameState)

        if currPlayer.seatNumber == 0 and agentAction.type == 'EndTurn':
        #     DisplayImage(game.gameState, agentAction)
        #     time.sleep(1)
            numTurns += 1

        if game.gameState.currState == "OVER":
            # DisplayImage(game.gameState, agentAction)
            break
    
    # print("Winner: ", game.gameState.winner)
    winner[game.gameState.winner] += 1
    lost = game.gameState.winner != 0

    # Stats
    if COLLECT_STATS:
        game.gameState.players[0].generatePlayerStats()
        game.gameState.players[1].generatePlayerStats()

        player0Stats += game.gameState.players[0].stats
        player1Stats += game.gameState.players[1].stats
        if lost:
            Player0LosingStats += game.gameState.players[0].stats

# Collect stats
if COLLECT_STATS:
    player0Stats.getAverages()
    Player0LosingStats.getAverages()
    player1Stats.getAverages()
    player0Data = player0Stats.getList()
    player0LosingData = Player0LosingStats.getList()
    player1Data = player1Stats.getList()

    p_hat0 = winner[0] / sum(winner)
    p_hat1 = winner[1] / sum(winner)
    margin_error0 = round(100*(1.96 * math.sqrt((p_hat0 * (1 - p_hat0)) / sum(winner))), 2)
    margin_error1 = round(100*(1.96 * math.sqrt((p_hat1 * (1 - p_hat1)) / sum(winner))), 2)
    player0Data.insert(0, margin_error0)
    player0LosingData.insert(0, -1)
    player1Data.insert(0, margin_error1)
    player0Data.insert(0, winner[0]/sum(winner))
    player0LosingData.insert(0, -1)
    player1Data.insert(0, winner[1]/sum(winner))
    player0Data.insert(0, "Player0")
    player0LosingData.insert(0, "Player0LossesStats")
    player1Data.insert(0, "Player1")

    table = tabulate([player0Data, player0LosingData, player1Data], headers=headers, tablefmt='simple')
    print(table)

print(f"\nNum turns: {numTurns}")

print("\n\nWinnings: ", winner)


# Brick, ore, wool, wheat, wood

AgentName             WinRate    MarginError    numTurns    victoryPoints    numRoadsBuilt    devCardsBought  usedDevCards                         settlementsBuilt    citiesBuilt    devCardVP    largestArmy    longestRoad  resourcesReceived                           totalResourcesReceivedPerTurn    totalResourcesDiscarded    totalResourcesStolen  resourcesFromDevCard                   totalResourcesFromDevCard  resourcesFromBankTrade               finalResourceProduction              finalTradeRates                                                                                      setupResourceProduction                totalSetupResourceProduction  setupTradeRates                        setupResourceDiversity    turnsForFirstSettlement    noSettlementsBuilt    turnsForFirstCity    noCitysBuilt    numRoadsFor1stSettlement    totalResourcesFromBankTrade    goodSettlementBankTrades    badSettlementBankTrades    goodCityBankTrades    badCityBankTrades    goodRoadBankTrades    badRoadBank

In [1]:
# import pandas as pd

# # # Save to csv
# fileName = f'observation_hexInNode_v_3Random.csv'
# df = pd.DataFrame([player0Data, player0LosingData, player1Data], columns=headers)
# df.to_csv(f'DeepLearning/Thesis/Observations/Data/{fileName}', index=False)

from DeepLearning.Thesis.Observations.onehot_v_label import labelLowerBound
from DeepLearning.Thesis.Observations.nodes_v_hexes import nodeInHexLowerBound
from DeepLearning.Thesis.Observations.single_opponent import singleOpponentLowerBound

print(len(labelLowerBound))
print(len(nodeInHexLowerBound))
print(len(singleOpponentLowerBound))

491
2257
1875
