In [1]:
from DeepLearning.PPO import MaskablePPO
from DeepLearning.Thesis.Environments.SelfPlay import SelfPlayDistribution
from DeepLearning.GetActionMask import getActionMask
from DeepLearning.Thesis.Observations.get_observation import getObservation, lowerBound, upperBound
import os

os.environ["UPDATE_MODELS_UNIFORM"] = "False"
os.environ["UPDATE_MODELS_DIST"] = "False"
os.environ["MODEL_NAME"] = "None"
os.environ["MODEL_1_NAME"] = ""
os.environ["MODEL_2_NAME"] = ""
os.environ["MODEL_3_NAME"] = ""

env = SelfPlayDistribution()
actionMask = getActionMask
observation = getObservation

netArchDict = dict(pi=[128, 128, 128], vf=[128, 128, 128])
gamma = 0.99
n_steps = 4096

saveName = "SelfPlay_Distribution"
savePath = f"DeepLearning/Thesis/Rewards/Models/{saveName}"

model = MaskablePPO("MlpPolicy", env, verbose=1, policy_kwargs=dict(net_arch=netArchDict), gamma=gamma, n_steps=n_steps, getActionMask=actionMask, getObservation=observation, savePath=savePath, tensorboard_log="./tensorboard_logs_thesis/")
model.savePath = savePath
model.learn(total_timesteps=15_000_000, tb_log_name=saveName)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./tensorboard_logs_thesis/SelfPlay_Distribution_1


  logger.warn(


CheckingWinRate(Distribution): 0.08
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 81.5     |
|    ep_rew_mean     | -6.8     |
| time/              |          |
|    fps             | 382      |
|    iterations      | 1        |
|    time_elapsed    | 10       |
|    total_timesteps | 4096     |
---------------------------------
CheckingWinRate(Distribution): 0.21
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 84.7       |
|    ep_rew_mean          | -5.62      |
| time/                   |            |
|    fps                  | 263        |
|    iterations           | 2          |
|    time_elapsed         | 31         |
|    total_timesteps      | 8192       |
| train/                  |            |
|    approx_kl            | 0.01824773 |
|    clip_fraction        | 0.208      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.87      |
|    explained

<DeepLearning.PPO.MaskablePPO at 0x17f42ac50>

In [3]:
"""
Running Agent simulations
"""
from Agents.AgentRandom2 import AgentRandom2
from Agents.AgentMCTS import AgentMCTS
from Agents.AgentUCT import AgentUCT
from Agents.AgentModel import AgentMultiModel, AgentModel
from Game.CatanGame import *
from CatanSimulator import CreateGame
from DeepLearning.PPO import MaskablePPO
from Game.CatanPlayer import PlayerStatsTracker
from tabulate import tabulate
from DeepLearning.Stats import headers
import dill as pickle
from CatanData.GameStateViewer import SaveGameStateImage, DisplayImage
import math
import time

winner = [0,0,0,0]
player0Stats = PlayerStatsTracker()
Player0LosingStats = PlayerStatsTracker()
player1Stats = PlayerStatsTracker()

testModel = MaskablePPO.load("DeepLearning/Thesis/Opponents/Models/Uniform/model_14667776.zip")
testModel2 = MaskablePPO.load("DeepLearning/Thesis/Opponents/Models/Distribution/model_14966784.zip")
testModel3 = MaskablePPO.load("DeepLearning/Thesis/Rewards/Models/Reward_win/Reward_win_10M.zip")
# # testModel4 = MaskablePPO.load("DeepLearning/Thesis/Hyperparameters/Models/NetworkArch_512_512/Final.zip")

players = [ AgentModel("P0", 0, recordStats=True, playerTrading=False, model=testModel),
            # AgentUCT("P1", 0, recordStats=True, simulationCount=500),
            AgentModel("P1", 1, recordStats=True, playerTrading=False, model=testModel2),
            AgentModel("P2", 2, recordStats=True, playerTrading=False, model=testModel3),
            # AgentModel("P3", 3, recordStats=True, playerTrading=False, model=testModel2),
            # AgentRandom2("P1", 1, recordStats=True, playerTrading=False),
            # AgentRandom2("P2", 2, recordStats=True, playerTrading=False),
            AgentRandom2("P3", 3, recordStats=True, playerTrading=False),
            ]


COLLECT_STATS = True
for episode in range(5000):
    game = CreateGame(players)
    game = pickle.loads(pickle.dumps(game, -1))
    numTurns = 0
    while True:
        currPlayer = game.gameState.players[game.gameState.currPlayer]

        agentAction = currPlayer.DoMove(game)
        agentAction.ApplyAction(game.gameState)

        # if currPlayer.seatNumber == 0 and agentAction.type == 'EndTurn':
        #     DisplayImage(game.gameState, agentAction)
        #     time.sleep(0.25)
        #     numTurns += 1

        if game.gameState.currState == "OVER":
            # DisplayImage(game.gameState, agentAction)
            break
    
    # print("Winner: ", game.gameState.winner)
    winner[game.gameState.winner] += 1
    lost = game.gameState.winner != 0

    # print(winner)

    # Stats
    if COLLECT_STATS:
        game.gameState.players[0].generatePlayerStats()
        game.gameState.players[1].generatePlayerStats()

        player0Stats += game.gameState.players[0].stats
        player1Stats += game.gameState.players[1].stats
        if lost:
            Player0LosingStats += game.gameState.players[0].stats

# Collect stats
if COLLECT_STATS:
    player0Stats.getAverages()
    Player0LosingStats.getAverages()
    player1Stats.getAverages()
    player0Data = player0Stats.getList()
    player0LosingData = Player0LosingStats.getList()
    player1Data = player1Stats.getList()

    p_hat0 = winner[0] / sum(winner)
    p_hat1 = winner[1] / sum(winner)
    margin_error0 = round(100*(1.96 * math.sqrt((p_hat0 * (1 - p_hat0)) / sum(winner))), 2)
    margin_error1 = round(100*(1.96 * math.sqrt((p_hat1 * (1 - p_hat1)) / sum(winner))), 2)
    player0Data.insert(0, margin_error0)
    player0LosingData.insert(0, -1)
    player1Data.insert(0, margin_error1)
    player0Data.insert(0, winner[0]/sum(winner))
    player0LosingData.insert(0, -1)
    player1Data.insert(0, winner[1]/sum(winner))
    player0Data.insert(0, "Player0")
    player0LosingData.insert(0, "Player0LossesStats")
    player1Data.insert(0, "Player1")

    table = tabulate([player0Data, player0LosingData, player1Data], headers=headers, tablefmt='simple')
    print(table)

print(f"\nNum turns: {numTurns}")

print("\n\nWinnings: ", winner)


# Brick, ore, wool, wheat, wood

AgentName             WinRate    MarginError    numTurns    victoryPoints    numRoadsBuilt    devCardsBought  usedDevCards                         settlementsBuilt    citiesBuilt    devCardVP    largestArmy    longestRoad  resourcesReceived                           totalResourcesReceivedPerTurn    totalResourcesDiscarded    totalResourcesStolen  resourcesFromDevCard                   totalResourcesFromDevCard  resourcesFromBankTrade               finalResourceProduction               finalTradeRates                                                                                     setupResourceProduction                totalSetupResourceProduction  setupTradeRates                        setupResourceDiversity    turnsForFirstSettlement    noSettlementsBuilt    turnsForFirstCity    noCitysBuilt    numRoadsFor1stSettlement    totalResourcesFromBankTrade    goodSettlementBankTrades    badSettlementBankTrades    goodCityBankTrades    badCityBankTrades    goodRoadBankTrades    badRoadBank

In [4]:
import pandas as pd

# Save to csv
fileName = f'SP_Distribution_v_Random.csv'
df = pd.DataFrame([player0Data, player0LosingData, player1Data], columns=headers)
df.to_csv(f'DeepLearning/Thesis/Opponents/Data/{fileName}', index=False)