In [31]:
# !pip install tensorflow
# !pip install gym
# !pip install keras
# !pip install keras-rl2

In [32]:
from gym import Env
from gym.spaces import Dict, Discrete, Box
from enum import Enum
from collections import defaultdict
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
import numpy as np
import random

In [33]:
class TerrEnums(Enum):
    # north america
    alaska = 0
    alberta = 1
    northWestTerritory = 2
    ontario = 3
    quebec = 4
    greenland = 5
    westernUnitedStates = 6
    easternUnitedStates = 7
    centralAmerica = 8
    
    # south america
    venezuela = 9
    brazil = 10
    peru = 11
    argentina = 12
    
    # europe
    iceland = 13
    greatBritain = 14
    westernEurope = 15
    northernEurope = 16
    southernEurope = 17
    scandinavia = 18
    ukraine = 19
    
    # africa
    northAfrica = 20
    egypt = 21
    eastAfrica = 22
    congo = 23
    southAfrica = 24
    madagascar = 25
    
    # asia
    middleEast = 26
    afghanistan = 27
    ural = 28
    india = 29
    china = 30
    siberia = 31
    siam = 32
    mongolia = 33
    irkutsk = 34
    yakutsk = 35
    kamchatka = 36
    japan = 37
    
    # australia
    indonesia = 38
    newGuinea = 39
    westernAustralia = 40
    easternAustralia = 41

In [34]:
class ContEnums(Enum):
    northAmerica = 0
    southAmerica = 1
    europe = 2
    africa = 3
    asia = 4
    australia = 5

In [35]:
class TroopEnums(Enum):
    infantry = 0
    cavalry = 1
    artillery = 2

In [36]:
def initialiseTerritories(): 
    terrList = []
    
    # north american territories
    terrList.append(Territory(TerrEnums.alaska, "Alaska", None, ContEnums.northAmerica, 0, [TerrEnums.alberta, TerrEnums.northWestTerritory, TerrEnums.kamchatka]))
    terrList.append(Territory(TerrEnums.alberta, "Alberta", None, ContEnums.northAmerica, 0, [TerrEnums.alaska, TerrEnums.northWestTerritory, TerrEnums.ontario, TerrEnums.westernUnitedStates]))
    terrList.append(Territory(TerrEnums.northWestTerritory, "North West Territory", None, ContEnums.northAmerica, 0, [TerrEnums.alaska, TerrEnums.alberta, TerrEnums.ontario, TerrEnums.greenland]))
    terrList.append(Territory(TerrEnums.ontario, "Ontario", None, ContEnums.northAmerica, 0, [TerrEnums.northWestTerritory, TerrEnums.quebec, TerrEnums.alberta, TerrEnums.westernUnitedStates, TerrEnums.easternUnitedStates, TerrEnums.greenland]))
    terrList.append(Territory(TerrEnums.quebec, "Quebec", None, ContEnums.northAmerica, 0, [TerrEnums.ontario, TerrEnums.easternUnitedStates, TerrEnums.greenland]))
    terrList.append(Territory(TerrEnums.greenland, "Greenland", None, ContEnums.northAmerica, 0, [TerrEnums.northWestTerritory, TerrEnums.ontario, TerrEnums.quebec, TerrEnums.iceland]))
    terrList.append(Territory(TerrEnums.westernUnitedStates, "Western United States", None, ContEnums.northAmerica, 0, [TerrEnums.alberta, TerrEnums.ontario, TerrEnums.easternUnitedStates, TerrEnums.centralAmerica]))
    terrList.append(Territory(TerrEnums.easternUnitedStates, "Eastern United States", None, ContEnums.northAmerica, 0, [TerrEnums.westernUnitedStates, TerrEnums.ontario, TerrEnums.quebec, TerrEnums.centralAmerica]))
    terrList.append(Territory(TerrEnums.centralAmerica, "Central America", None, ContEnums.northAmerica, 0, [TerrEnums.westernUnitedStates, TerrEnums.easternUnitedStates, TerrEnums.venezuela]))
    
    # south american territories
    terrList.append(Territory(TerrEnums.venezuela, "Venezuela", None, ContEnums.southAmerica, 0, [TerrEnums.centralAmerica, TerrEnums.brazil, TerrEnums.peru]))
    terrList.append(Territory(TerrEnums.brazil, "Brazil", None, ContEnums.southAmerica, 0, [TerrEnums.venezuela, TerrEnums.peru, TerrEnums.argentina, TerrEnums.northAfrica]))
    terrList.append(Territory(TerrEnums.peru, "Peru", None, ContEnums.southAmerica, 0, [TerrEnums.brazil, TerrEnums.venezuela, TerrEnums.argentina]))
    terrList.append(Territory(TerrEnums.argentina, "Argentina", None, ContEnums.southAmerica, 0, [TerrEnums.peru, TerrEnums.brazil]))
    
    # european territories
    terrList.append(Territory(TerrEnums.iceland, "Iceland", None, ContEnums.europe, 0, [TerrEnums.greenland, TerrEnums.greatBritain, TerrEnums.scandinavia]))
    terrList.append(Territory(TerrEnums.greatBritain, "Great Britain", None, ContEnums.europe, 0, [TerrEnums.iceland, TerrEnums.scandinavia, TerrEnums.westernEurope, TerrEnums.northernEurope]))
    terrList.append(Territory(TerrEnums.westernEurope, "Western Europe", None, ContEnums.europe, 0, [TerrEnums.northAfrica, TerrEnums.greatBritain, TerrEnums.southernEurope, TerrEnums.northernEurope]))
    terrList.append(Territory(TerrEnums.northernEurope, "Northern Europe", None, ContEnums.europe, 0, [TerrEnums.greatBritain, TerrEnums.scandinavia, TerrEnums.ukraine, TerrEnums.southernEurope, TerrEnums.westernEurope]))
    terrList.append(Territory(TerrEnums.southernEurope, "Southern Europe", None, ContEnums.europe, 0, [TerrEnums.northAfrica, TerrEnums.egypt, TerrEnums.middleEast, TerrEnums.westernEurope, TerrEnums.northernEurope, TerrEnums.ukraine]))
    terrList.append(Territory(TerrEnums.scandinavia, "Scandinavia", None, ContEnums.europe, 0, [TerrEnums.ukraine, TerrEnums.iceland, TerrEnums.greatBritain, TerrEnums.northernEurope]))
    terrList.append(Territory(TerrEnums.ukraine, "Ukraine", None, ContEnums.europe, 0, [TerrEnums.southernEurope, TerrEnums.northernEurope, TerrEnums.scandinavia, TerrEnums.ural, TerrEnums.afghanistan, TerrEnums.middleEast]))
         
    # african territories
    terrList.append(Territory(TerrEnums.northAfrica, "North Africa", None, ContEnums.africa, 0, [TerrEnums.brazil, TerrEnums.westernEurope, TerrEnums.southernEurope, TerrEnums.egypt, TerrEnums.eastAfrica, TerrEnums.congo]))
    terrList.append(Territory(TerrEnums.egypt, "Egypt", None, ContEnums.africa, 0, [TerrEnums.northAfrica, TerrEnums.southernEurope, TerrEnums.eastAfrica, TerrEnums.middleEast]))
    terrList.append(Territory(TerrEnums.eastAfrica, "East Africa", None, ContEnums.africa, 0, [TerrEnums.middleEast, TerrEnums.egypt, TerrEnums.congo, TerrEnums.southAfrica, TerrEnums.madagascar]))
    terrList.append(Territory(TerrEnums.congo, "Congo", None, ContEnums.africa, 0, [TerrEnums.northAfrica, TerrEnums.eastAfrica, TerrEnums.southAfrica]))
    terrList.append(Territory(TerrEnums.southAfrica, "South Africa", None, ContEnums.africa, 0, [TerrEnums.madagascar, TerrEnums.congo, TerrEnums.eastAfrica]))
    terrList.append(Territory(TerrEnums.madagascar, "Madagascar", None, ContEnums.africa, 0, [TerrEnums.southAfrica, TerrEnums.eastAfrica]))
    
    # asian territories
    terrList.append(Territory(TerrEnums.middleEast, "Middle East", None, ContEnums.asia, 0, [TerrEnums.egypt, TerrEnums.southernEurope, TerrEnums.eastAfrica, TerrEnums.afghanistan, TerrEnums.india]))
    terrList.append(Territory(TerrEnums.afghanistan, "Afghanistan", None, ContEnums.asia, 0, [TerrEnums.ukraine, TerrEnums.middleEast, TerrEnums.india, TerrEnums.china, TerrEnums.ural]))
    terrList.append(Territory(TerrEnums.ural, "Ural", None, ContEnums.asia, 0, [TerrEnums.ukraine, TerrEnums.afghanistan, TerrEnums.china, TerrEnums.siberia]))
    terrList.append(Territory(TerrEnums.india, "India", None, ContEnums.asia, 0, [TerrEnums.middleEast, TerrEnums.afghanistan, TerrEnums.china, TerrEnums.siam]))
    terrList.append(Territory(TerrEnums.china, "China", None, ContEnums.asia, 0, [TerrEnums.siam, TerrEnums.india, TerrEnums.afghanistan, TerrEnums.ural, TerrEnums.siberia, TerrEnums.mongolia]))
    terrList.append(Territory(TerrEnums.siberia, "Siberia", None, ContEnums.asia, 0, [TerrEnums.ural, TerrEnums.yakutsk, TerrEnums.irkutsk, TerrEnums.mongolia, TerrEnums.china]))
    terrList.append(Territory(TerrEnums.siam, "Siam", None, ContEnums.asia, 0, [TerrEnums.india, TerrEnums.china, TerrEnums.indonesia]))
    terrList.append(Territory(TerrEnums.mongolia, "Mongolia", None, ContEnums.asia, 0, [TerrEnums.china, TerrEnums.siberia, TerrEnums.irkutsk, TerrEnums.kamchatka, TerrEnums.japan]))
    terrList.append(Territory(TerrEnums.irkutsk, "Irkutsk", None, ContEnums.asia, 0, [TerrEnums.mongolia, TerrEnums.siberia, TerrEnums.yakutsk, TerrEnums.kamchatka]))
    terrList.append(Territory(TerrEnums.yakutsk, "Yakutsk", None, ContEnums.asia, 0, [TerrEnums.siberia, TerrEnums.irkutsk, TerrEnums.kamchatka]))
    terrList.append(Territory(TerrEnums.kamchatka, "Kamchatka", None, ContEnums.asia, 0, [TerrEnums.alaska, TerrEnums.japan, TerrEnums.yakutsk, TerrEnums.irkutsk, TerrEnums.mongolia]))
    terrList.append(Territory(TerrEnums.japan, "Japan", None, ContEnums.asia, 0, [TerrEnums.mongolia, TerrEnums.kamchatka]))
    
    # australian territories
    terrList.append(Territory(TerrEnums.indonesia, "Indonesia", None, ContEnums.australia, 0, [TerrEnums.siam, TerrEnums.newGuinea, TerrEnums.westernAustralia]))
    terrList.append(Territory(TerrEnums.newGuinea, "New Guinea", None, ContEnums.australia, 0, [TerrEnums.indonesia, TerrEnums.westernAustralia, TerrEnums.easternAustralia]))
    terrList.append(Territory(TerrEnums.westernAustralia, "Western Australia", None, ContEnums.australia, 0, [TerrEnums.indonesia, TerrEnums.newGuinea, TerrEnums.easternAustralia]))
    terrList.append(Territory(TerrEnums.easternAustralia, "Eastern Australia", None, ContEnums.australia, 0, [TerrEnums.newGuinea, TerrEnums.westernAustralia]))

    return terrList

In [37]:
def initialisePlayers():
    player0 = Player(0, 0, [], [])
    player1 = SimpleAgent(1, 0, [], [], random.randint(0, 2))
    player2 = SimpleAgent(2, 0, [], [], random.randint(0, 2))
    player3 = SimpleAgent(3, 0, [], [], random.randint(0, 2))
    
    playerList = [player0, player1, player2, player3]
    
    return playerList

In [38]:
def initialiseCards():
    cardList = []
    troopTypeCounter = 0
    territories = list(range(0, len(TerrEnums)))
    random.shuffle(territories)
    while territories:
        # random territory each time
        randomTerrIndex = territories.pop()
        if troopTypeCounter <= 13: # for infantry
            cardList.append(Card(TroopEnums.infantry.value, TerrEnums(randomTerrIndex).value))
        elif troopTypeCounter <= 27: # for cavelry
            cardList.append(Card(TroopEnums.cavalry.value, TerrEnums(randomTerrIndex).value))
        elif troopTypeCounter <= 41: # for artillery
            cardList.append(Card(TroopEnums.artillery.value, TerrEnums(randomTerrIndex).value))
            
        troopTypeCounter += 1
            
    return cardList

In [39]:
def initialiseContinents(terrList): 
    contList = []
    for continent in ContEnums:
        tempList = []
        for territory in terrList:
            # if terr continent value is equal to current continent in loop then add
            if territory.parentContinent.value == continent.value:
                tempList.append(territory)
        
        if continent.value == 0:
            contList.append(Continent(continent, "North America", tempList, 5, len(tempList)))
        elif continent.value == 1:
            contList.append(Continent(continent, "South America", tempList, 2, len(tempList)))
        elif continent.value == 2:
            contList.append(Continent(continent, "Europe", tempList, 5, len(tempList)))
        elif continent.value == 3:
            contList.append(Continent(continent, "Africa", tempList, 3, len(tempList)))
        elif continent.value == 4:
            contList.append(Continent(continent, "Asia", tempList, 7, len(tempList)))
        elif continent.value == 5:
            contList.append(Continent(continent, "Australia", tempList, 2, len(tempList)))
        
    return contList

In [40]:
class Territory:
    def __init__(self, index, name, ownedBy, parentContinent, currTroops, connections):
        self.index = index
        self.name = name
        self.ownedBy = ownedBy
        self.parentContinent = parentContinent
        self.currTroops = currTroops
        self.connections = connections

In [41]:
class Continent:
    def __init__(self, index, name, territories, troopsWhenFull, noOfCountries):
        self.index = index
        self.name = name
        self.territories = territories
        self.troopsWhenFull = troopsWhenFull
        self.noOfCountries = noOfCountries

In [42]:
class Card:
    def __init__(self, troop, territory):
        self.troop = troop
        self.territory = territory

In [43]:
class Player:
    def __init__(self, index, troopTotal, territories, cards):
        self.index = index
        self.troopTotal = troopTotal
        self.territories = territories
        self.cards = cards

In [44]:
class SimpleAgent:
    def __init__(self, index, troopTotal, territories, cards, strategy):
        self.index = index
        self.troopTotal = troopTotal
        self.territories = territories
        self.cards = cards
        self.strategy = strategy

In [45]:
class Board:
    def __init__(self):
        self.players = initialisePlayers()
        self.noOfPlayers = len(self.players)
        self.terrList = initialiseTerritories()
        self.continents = initialiseContinents(self.terrList)
        self.cards = initialiseCards()

In [46]:
# returns list of random numbers between 1 to 6
def rollDice(noOfDice):
    numList = []
    for i in range(noOfDice):
        numList.append(random.randint(1, 6))
        
    return numList

In [47]:
# checks if card submitted is owned by player
def checkCardTerritory(card, player):
    cardTerr = card.territory
    terrList = player.territories

    terrIndexList = []
    for terr in terrList:
        terrIndexList.append(terr.index.value)
    
    # if the card had a territory that is owned by the player than add 2 more troops
    if cardTerr in terrIndexList:
        return 2 
    else:
        return 0

In [48]:
# trades in cards when possible
def tradingCards(player, board):
    cards = player.cards
    tradedTroops = 0
    
    # checking if possible to trade in anything as 3 cards minimum
    if len(player.cards) > 2:
        cardTypeList = []
        for card in player.cards:
            cardTypeList.append(card.troop)

        # checking for one of team type since highest value
        if cardTypeList.count(0) >= 1 and cardTypeList.count(1) >= 1 and cardTypeList.count(2) >= 1:
            infantryCardIndex = cardTypeList.index(0)
            cardTypeList.pop(infantryCardIndex)
            poppedCard = cards.pop(infantryCardIndex)
            tradedTroops += checkCardTerritory(poppedCard, player)

            cavalryCardIndex = cardTypeList.index(1)
            cardTypeList.pop(cavalryCardIndex)
            poppedCard = cards.pop(cavalryCardIndex)
            tradedTroops += checkCardTerritory(poppedCard, player)

            artilleryCardIndex = cardTypeList.index(2)
            cardTypeList.pop(artilleryCardIndex)
            poppedCard = cards.pop(artilleryCardIndex)
            tradedTroops += checkCardTerritory(poppedCard, player)

            tradedTroops += 10
            return tradedTroops

        # checking for 3 artilleries
        if cardTypeList.count(2) >= 3:
            artilleryCardIndex = cardTypeList.index(2)
            cardTypeList.pop(artilleryCardIndex)
            poppedCard = cards.pop(artilleryCardIndex)
            tradedTroops += checkCardTerritory(poppedCard, player)

            artilleryCardIndex = cardTypeList.index(2)
            cardTypeList.pop(artilleryCardIndex)
            poppedCard = cards.pop(artilleryCardIndex)
            tradedTroops += checkCardTerritory(poppedCard, player)

            artilleryCardIndex = cardTypeList.index(2)
            cardTypeList.pop(artilleryCardIndex)
            poppedCard = cards.pop(artilleryCardIndex)
            tradedTroops += checkCardTerritory(poppedCard, player)

            tradedTroops += 8
            return tradedTroops

        # checking for 3 calvary
        if cardTypeList.count(1) >= 3:
            cavalryCardIndex = cardTypeList.index(1)
            cardTypeList.pop(cavalryCardIndex)
            poppedCard = cards.pop(cavalryCardIndex)
            tradedTroops += checkCardTerritory(poppedCard, player)

            cavalryCardIndex = cardTypeList.index(1)
            cardTypeList.pop(cavalryCardIndex)
            poppedCard = cards.pop(cavalryCardIndex)
            tradedTroops += checkCardTerritory(poppedCard, player)

            cavalryCardIndex = cardTypeList.index(1)
            cardTypeList.pop(cavalryCardIndex)
            poppedCard = cards.pop(cavalryCardIndex)
            tradedTroops += checkCardTerritory(poppedCard, player)

            tradedTroops += 6
            return tradedTroops

        # checking for 3 infantry
        if cardTypeList.count(0) >= 3:
            infantryCardIndex = cardTypeList.index(0)
            cardTypeList.pop(infantryCardIndex)
            poppedCard = cards.pop(infantryCardIndex)
            tradedTroops += checkCardTerritory(poppedCard, player)

            infantryCardIndex = cardTypeList.index(0)
            cardTypeList.pop(infantryCardIndex)
            poppedCard = cards.pop(infantryCardIndex)
            tradedTroops += checkCardTerritory(poppedCard, player)

            infantryCardIndex = cardTypeList.index(0)
            cardTypeList.pop(infantryCardIndex)
            poppedCard = cards.pop(infantryCardIndex)
            tradedTroops += checkCardTerritory(poppedCard, player)

            tradedTroops += 4
            return tradedTroops

    return tradedTroops

In [49]:
def receiveAndPlaceTroops(player, board, strat):
    noOfTerrs = len(player.territories)
    if noOfTerrs <= 0:
        return
    
    # using int to ignore decimal
    troopsRecieved = int(noOfTerrs / 3)
    
    # can not recieve less than 3 troops in a turn
    if troopsRecieved < 3:
        troopsRecieved = 3
    
    # checking if player owns a whole continent to recieve bonus troops
    for continent in board.continents:
        continentSet = set(continent.territories)
        playerTerrSet = set(player.territories)
        if continentSet.issubset(playerTerrSet):
            troopsRecieved += continent.troopsWhenFull
    
    # automatically trading in cards when possible
    troopsRecieved += tradingCards(player, board)
        
    player.troopTotal += troopsRecieved
    
    if strat == 0: # passive strat
        # always places recieved troops in terr with least troops
        troopCountList = []
        for terr in player.territories:
            troopCountList.append(terr.currTroops)
        leastTroopTerr = min(troopCountList)
        leastTroopIndex = troopCountList.index(leastTroopTerr)
        
        player.territories[leastTroopIndex].currTroops += troopsRecieved
    elif strat == 1: # pacifist strat
        # divides troops recieved equally between terrs
        playerTerrList = list(range(0, len(player.territories) - 1))
        random.shuffle(playerTerrList)
        
        i = 0
        while troopsRecieved != 0:
            if len(playerTerrList) == 0:
                return
            currTerr = playerTerrList[i % len(playerTerrList)]
            player.territories[currTerr].currTroops += 1
            
            troopsRecieved -= 1
            i += 1
    elif strat == 2: # aggressive strat
        # places troops in terr with most troops
        troopCountList = []
        for terr in player.territories:
            troopCountList.append(terr.currTroops)
        mostTroopTerr = max(troopCountList)
        mostTroopIndex = troopCountList.index(mostTroopTerr)
        
        player.territories[mostTroopIndex].currTroops += troopsRecieved
    

In [50]:
def attacking(player, board, strat):
    # used to check if player took over a territory or not
    startingTerrCount = len(player.territories)
    if startingTerrCount <= 0:
        return
    
    if strat == 0: # passive strat
        # does not attack
        return
    elif strat == 1: # pacifist strat
        # only attacks 1 to 3 times
        attackingRounds = random.randint(1, 3)
    elif strat == 2: # aggressive strat
        # attacks 7 to 9 times
        attackingRounds = random.randint(7, 9)

    for i in range(attackingRounds):
        # getting list of indexes of possible territories to attack
        playerTerrList = player.territories
        possibleAttacks = []
        troopCount = []
        
        for terr in playerTerrList:
            # player can not attack with just 1 troop
            if terr.currTroops < 2:
                continue
            for connection in terr.connections:
                isOwnTerr = False
                # checking if owner of territory has less than 10 left
                currTerr = board.terrList[connection.value]
                defendingPlayerTerrs = len(board.players[currTerr.ownedBy].territories)
                
                # smaller nations should be targetted
                if defendingPlayerTerrs >= 10:
                    smallPlayer = False
                else:
                    smallPlayer = True
                    
                for terr2 in playerTerrList:
                    if terr2.index == connection:
                        isOwnTerr = True
                
                # making sure that you can't attack your own terr
                if isOwnTerr == False:
                    # using a tuple to show which territory to attack from
                    possibleAttacks.append((terr.index.value, connection.value, smallPlayer))
                    # mirror array with how many troops there are for each territory above
                    troopCount.append((terr.currTroops, board.terrList[connection.value].currTroops, smallPlayer))
                    
        if len(possibleAttacks) == 0:
            return
        
        smallPlayerAttacks = []
        for possAttack in troopCount:
            if possAttack[2] == True:
                smallPlayerAttacks.append(possAttack)
        
        # if there is an attack on a small nation then attack that
        if len(smallPlayerAttacks) > 0:
            # currently will just choose to attack the territory that has the biggest discrepiancy
            
            smallPlayerAttacks = list(set(smallPlayerAttacks))
            troopDifference = []
            for possAttack in smallPlayerAttacks:
                troopDifference.append(possAttack[0] - possAttack[1])

            maxTroopDifference = max(troopDifference)
            chosenBattleIndex = troopDifference.index(maxTroopDifference)
            randomBattle = random.randint(0, len(possibleAttacks) - 1)
            chosenBattle = possibleAttacks[randomBattle]

            blitzBattle(chosenBattle, board, player)
        else:
            # choose to attack the territory that has the biggest discrepiancy
            troopDifference = []
            for possAttack in troopCount:
                troopDifference.append(possAttack[0] - possAttack[1])

            maxTroopDifference = max(troopDifference)
            chosenBattleIndex = troopDifference.index(maxTroopDifference)
            randomBattle = random.randint(0, len(possibleAttacks) - 1)
            chosenBattle = possibleAttacks[chosenBattleIndex]

            blitzBattle(chosenBattle, board, player)
        
    # if attacking player wins 1 terr in round then recieve card
    endingTerrCount = len(player.territories)
    if endingTerrCount > startingTerrCount and len(board.cards) != 0:
        randomCardIndex = random.randint(0, len(board.cards) - 1)
        player.cards.append(board.cards.pop(randomCardIndex))

In [51]:
# blitz battle will continue going until there is a winner
def blitzBattle(battle, board, player):
    attackingTerr = board.terrList[battle[0]]
    defendingTerr = board.terrList[battle[1]]
    defendingPlayer = board.players[defendingTerr.ownedBy]
    
    totalAttackingTroops = attackingTerr.currTroops
    totalDefendingTroops = defendingTerr.currTroops
    
    # since its blitz it will keep going till 1 side loses
    attackingWins = None
    while not (totalAttackingTroops < 1 or totalDefendingTroops == 0):
        # always defending and attacking with max amount of troops
        if totalAttackingTroops >= 3:
            attackingTroops = 3
        elif totalAttackingTroops == 2:
            attackingTroops = 2

        if totalDefendingTroops >= 2:
            defendingTroops = 2
        else:
            defendingTroops = 1
        
        attackingRolls = rollDice(attackingTroops)
        defendingRolls = rollDice(defendingTroops)
        
        attackingRolls.sort(reverse = True)
        defendingRolls.sort(reverse = True)
        
        attackingTroopsLost = 0
        defendingTroopsLost = 0
        if len(defendingRolls) == 2:
            if attackingRolls[0] > defendingRolls[0]:
                defendingTroopsLost += 1
            else:
                attackingTroopsLost += 1
            
            if attackingRolls[1] > defendingRolls[1]:
                defendingTroopsLost += 1
            else:
                attackingTroopsLost += 1
        elif len(defendingRolls) == 1:
            if attackingRolls[0] > defendingRolls[0]:
                defendingTroopsLost += 1
            else:
                attackingTroopsLost += 1
                
        totalAttackingTroops -= attackingTroopsLost
        totalDefendingTroops -= defendingTroopsLost
        
    if totalAttackingTroops > 1:
        attackingWins = True
    else:
        attackingWins = False

    if attackingWins == False:
        # entering here means that the defender won
        None
    else:
        # entering here means that the attacker won
        defendingTerr.ownedBy = player.index
        defendingTerr.currTroops = 1
        attackingTerr.currTroops -= 1
        defendingPlayer.territories.remove(defendingTerr)
        player.territories.append(defendingTerr) 

In [52]:
# checks if there is a route between terrs that the player wants to move troops between
def isReachable(player, a, b):
    edgeDict = defaultdict(list)
    for terr in player.territories:
        for connection in terr.connections:
            edgeDict[terr.index.value].append(connection.value)
    
    noOfTerrs = len(edgeDict)
    visited = [False] * (42)
    queue = []
    
    queue.append(a.index.value)
    visited[a.index.value] = True
    
    while queue:
        n = queue.pop(0)
        if n == b.index.value:
            return True
        
        for i in edgeDict[n]:
            if visited[i] == False:
                queue.append(i)
                visited[i] = True
                
    return False

In [53]:
def fortifying(player, board, strat):
    if strat == 0: # passive strat
        # getting terr with most and least troops
        troopCount = []
        for terr in player.territories:
            troopCount.append(terr.currTroops)
            
        if len(troopCount) == 0 or len(troopCount) == 1 or len(troopCount) == 2:
            return
            
        maxTroops = max(troopCount)
        minTroops = min(troopCount)
        totalTroops = maxTroops + minTroops

        maxTroopsIndex = troopCount.index(maxTroops)
        minTroopIndex = troopCount.index(minTroops)

        maxTroopTerr = player.territories[maxTroopsIndex]
        minTroopTerr = player.territories[minTroopIndex]
        
        # checking if there is valid path
        if isReachable(player, maxTroopTerr, minTroopTerr) == True:
            # splitting troops between terrs with most and least troops
            if totalTroops % 2 == 0:
                maxTroopTerr.currTroops = totalTroops / 2
                minTroopTerr.currTroops = totalTroops / 2
            else:
                maxTroopTerr.currTroops = int(totalTroops / 2) + 1
                minTroopTerr.currTroops = int(totalTroops / 2)
    elif strat == 1: # pacifist strat
        return
    elif strat == 2: # aggressive strat
        # getting terr with most and least troops
        troopCount = []
        for terr in player.territories:
            troopCount.append(terr.currTroops)
            
        if len(troopCount) == 0 or len(troopCount) == 1 or len(troopCount) == 2:
            return
            
        maxTroops = max(troopCount)
        sortedTroops = troopCount.copy()
        sortedTroops.sort()
        max2Troops = sortedTroops[-2] # getting 2nd highest value in list
        
        totalTroops = maxTroops + max2Troops

        maxTroopsIndex = troopCount.index(maxTroops)
        max2TroopsIndex = troopCount.index(max2Troops)

        maxTroopTerr = player.territories[maxTroopsIndex]
        max2TroopTerr = player.territories[max2TroopsIndex]
        
        # checking if there is valid path
        if isReachable(player, maxTroopTerr, max2TroopTerr) == True:
            # moves troops from terr with most troops and 2nd most
            if totalTroops % 2 == 0:
                maxTroopTerr.currTroops = totalTroops / 2
                max2TroopTerr.currTroops = totalTroops / 2
            else:
                maxTroopTerr.currTroops = int(totalTroops / 2) + 1
                max2TroopTerr.currTroops = int(totalTroops / 2)

In [54]:
# openAI gym environment is extended
class RiskEnv(Env):
    # init the game when called
    def __init__(self):
        # shows different strategies that can be used
        self.action_space = Discrete(3)
        # will hold percentage of board owned
        self.observation_space = Box(low = np.array([0]), high = np.array([100]))
        
        # initialise board
        STARTING_TROOPS = 30

        self.board = Board()
    
        startingRolls = []
        # making sure that all values are different to avoid confusion with starting order
        while len(list(set(startingRolls))) != self.board.noOfPlayers:
            startingRolls = rollDice(self.board.noOfPlayers)

        # sorting rolls with their values and getting the turn order
        sortedRolls = sorted(((value, index) for index, value in enumerate(startingRolls)), reverse = True)
        self.turnOrder = []
        for i in range(len(sortedRolls)):
            self.turnOrder.append(sortedRolls[i][1])

        # spreading territories between players
        tempTerrList = self.board.terrList.copy()
        i = 0
        while len(tempTerrList) != 0:
            self.currPlayerTurn = self.turnOrder[i % self.board.noOfPlayers] # getting the current player turn from rolls
            randomTerrIndex = random.randint(0, len(tempTerrList) - 1) # getting a random territory index
            currTerr = tempTerrList.pop(randomTerrIndex) # getting actual territory
            self.board.players[self.currPlayerTurn].territories.append(currTerr) # adding territory to players

            i += 1

        for player in self.board.players:
            # putting at least 1 troop on every territory
            for terr in player.territories:
                terr.ownedBy = player.index
                terr.currTroops = 1
                player.troopTotal += 1

            # randomizing the places of the leftover troops
            leftoverTroops = STARTING_TROOPS - player.troopTotal
            for i in range(leftoverTroops):
                randomTerrIndex = random.randint(0, len(player.territories) - 1)
                player.territories[randomTerrIndex].currTroops += 1
                player.troopTotal += 1

        self.turns = 0
        self.turnOrderIndex = 0
        self.reward = 0
        self.state = (len(self.board.players[0].territories) / 42) * 100
    
    # represents a turn in the game
    def step(self, action):
        TOTAL_TERRS = 42
        
        # decide which strat to play
        recievingStrat = action
        attackingStrat = action
        fortifyingStrat = action

        currPlayerTurn = self.turnOrder[self.turnOrderIndex % self.board.noOfPlayers] # constantly changing player according to value of starting dice

        if currPlayerTurn == 0:
            receiveAndPlaceTroops(self.board.players[0], self.board, recievingStrat)
            attacking(self.board.players[0], self.board, attackingStrat)
            fortifying(self.board.players[0], self.board, fortifyingStrat)

            if len(self.board.players[0].territories) == 42:
                gameOver = True

        elif currPlayerTurn == 1:
            receiveAndPlaceTroops(self.board.players[1], self.board, self.board.players[1].strategy)
            attacking(self.board.players[1], self.board, self.board.players[1].strategy)
            fortifying(self.board.players[1], self.board, self.board.players[1].strategy)

            if len(self.board.players[1].territories) == 42:
                gameOver = True

        elif currPlayerTurn == 2:
            receiveAndPlaceTroops(self.board.players[2], self.board, self.board.players[2].strategy)
            attacking(self.board.players[2], self.board, self.board.players[2].strategy)
            fortifying(self.board.players[2], self.board, self.board.players[2].strategy)

            if len(self.board.players[2].territories) == 42:
                gameOver = True
        else:
            receiveAndPlaceTroops(self.board.players[3], self.board, self.board.players[3].strategy)
            attacking(self.board.players[3], self.board, self.board.players[3].strategy)
            fortifying(self.board.players[3], self.board, self.board.players[3].strategy)

            if len(self.board.players[3].territories) == 42:
                gameOver = True
                
        boardOwnedAfter = len(self.board.players[0].territories)
            
        self.turnOrderIndex += 1
        self.turns += 1
        
        self.reward = 0
        # calculate reward
        if boardOwnedAfter == TOTAL_TERRS: # if player wins
            self.reward += 500
            done = True
        elif len(self.board.players[1].territories) == TOTAL_TERRS or len(self.board.players[2].territories) == TOTAL_TERRS or len(self.board.players[3].territories) == TOTAL_TERRS: # if player loses
            self.reward += -50
            done = True
        elif boardOwnedAfter == 0: # if player is knocked out
            self.reward += -75
            done = True
        elif self.turns == 200:
            # if game lasts more than 200 turns take into account how many territories player had
            playerTerrs = len(self.board.players[0].territories)
            agent1Terrs = len(self.board.players[1].territories)
            agent2Terrs = len(self.board.players[2].territories)
            agent3Terrs = len(self.board.players[3].territories)
            
            terrsList = [playerTerrs, agent1Terrs, agent2Terrs, agent3Terrs]
            terrsList.sort()
            
            if playerTerrs == terrsList[3]:
                self.reward += 250
            elif playerTerrs == terrsList[2]:
                self.reward += 100
            elif playerTerrs == terrsList[1]:
                self.reward += 0
            elif playerTerrs == terrsList[0]:
                self.reward += -50
            done = True
        else:
            done = False
        
        # every full turn deduct one point to discourage taking long
        self.reward -= 1

        # Set placeholder for info, needed for openAI
        info = {}
        
        # Return step information
        return self.state, self.reward, done, info
    
    # used by openAI gym when rendering is involved
    def render(self):
        # not used since there is no GUI
        pass
    
    # restarting the environment for a new game
    def reset(self):
        STARTING_TROOPS = 30
        self.board = Board()
    
        startingRolls = []
        # making sure that all values are different to avoid confusion with starting order
        while len(list(set(startingRolls))) != self.board.noOfPlayers:
            startingRolls = rollDice(self.board.noOfPlayers)

        # sorting rolls with their values and getting the turn order
        sortedRolls = sorted(((value, index) for index, value in enumerate(startingRolls)), reverse = True)
        self.turnOrder = []
        for i in range(len(sortedRolls)):
            self.turnOrder.append(sortedRolls[i][1])

        # spreading territories between players
        tempTerrList = self.board.terrList.copy()
        i = 0
        while len(tempTerrList) != 0:
            self.currPlayerTurn = self.turnOrder[i % self.board.noOfPlayers] # getting the current player turn from rolls
            randomTerrIndex = random.randint(0, len(tempTerrList) - 1) # getting a random territory index
            currTerr = tempTerrList.pop(randomTerrIndex) # getting actual territory
            self.board.players[self.currPlayerTurn].territories.append(currTerr) # adding territory to players

            i += 1

        for player in self.board.players:
            # putting at least 1 troop on every territory
            for terr in player.territories:
                terr.ownedBy = player.index
                terr.currTroops = 1
                player.troopTotal += 1

            # randomizing the places of the leftover troops
            leftoverTroops = STARTING_TROOPS - player.troopTotal
            for i in range(leftoverTroops):
                randomTerrIndex = random.randint(0, len(player.territories) - 1)
                player.territories[randomTerrIndex].currTroops += 1
                player.troopTotal += 1
        
        self.turns = 0
        self.turnOrderIndex = 0
        self.state = (len(self.board.players[0].territories) / 42) * 100
        self.reward = 0
        
        return self.state

In [55]:
env = RiskEnv()

In [56]:
# testing the game with random actions and getting average score
episodes = 100
for episode in range(episodes):
    state = env.reset()
    done = False
    score = 0
    totalScore = 0
    
    while not done:
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score += reward
    
    totalScore += score
    print("\nEpisode:", episode)
    print("Score:", score)
    
print("\n\nAverage Score:", totalScore / episodes)


Episode: 0
Score: 50

Episode: 1
Score: 50

Episode: 2
Score: 50

Episode: 3
Score: 50

Episode: 4
Score: -100

Episode: 5
Score: 50

Episode: 6
Score: 50

Episode: 7
Score: -100

Episode: 8
Score: 50

Episode: 9
Score: 50

Episode: 10
Score: 50

Episode: 11
Score: -100

Episode: 12
Score: -200

Episode: 13
Score: 50

Episode: 14
Score: 50

Episode: 15
Score: 50

Episode: 16
Score: 50

Episode: 17
Score: -100

Episode: 18
Score: 50

Episode: 19
Score: 50

Episode: 20
Score: 50

Episode: 21
Score: 50

Episode: 22
Score: 50

Episode: 23
Score: 50

Episode: 24
Score: -100

Episode: 25
Score: 50

Episode: 26
Score: 50

Episode: 27
Score: 50

Episode: 28
Score: 50

Episode: 29
Score: 50

Episode: 30
Score: 50

Episode: 31
Score: 50

Episode: 32
Score: 50

Episode: 33
Score: 50

Episode: 34
Score: 50

Episode: 35
Score: 50

Episode: 36
Score: -100

Episode: 37
Score: 50

Episode: 38
Score: 50

Episode: 39
Score: 50

Episode: 40
Score: 50

Episode: 41
Score: 50

Episode: 42
Score: 50

Episod

In [57]:
states = env.observation_space.shape # getting shape of observation space
actions = env.action_space.n # getting number of actions

In [58]:
def build_model(states, actions):
    # building model with 3 dense layers
    model = Sequential()
    model.add(Dense(24, activation = 'relu', input_shape = states))
    model.add(Dense(24, activation = 'relu'))
    model.add(Dense(actions, activation = 'linear'))
    
    return model

In [59]:
# del model

In [60]:
model = build_model(states, actions)

In [61]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 24)                48        
                                                                 
 dense_4 (Dense)             (None, 24)                600       
                                                                 
 dense_5 (Dense)             (None, 3)                 75        
                                                                 
Total params: 723
Trainable params: 723
Non-trainable params: 0
_________________________________________________________________


In [62]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy() # soft-maxing Q learning
    memory = SequentialMemory(limit = 500000, window_length = 1)
    dqn = DQNAgent(model = model, memory = memory, policy = policy, nb_actions = actions, nb_steps_warmup = 1000, target_model_update = 1e-2)
    
    return dqn

In [63]:
dqn = build_agent(model, actions) # deep q learning regression method
dqn.compile(Adam(lr = 1e-3), metrics = ['mae']) # using mean absolute error
dqn.fit(env, nb_steps = 500000, visualize = False, verbose = 1)

  super(Adam, self).__init__(name, **kwargs)


Training for 500000 steps ...
Interval 1 (0 steps performed)
  252/10000 [..............................] - ETA: 5s - reward: -0.0079

  updates=self.state_updates,


50 episodes - episode_reward: 0.000 [-250.000, 50.000] - loss: 94.520 - mae: 6.302 - mean_q: 9.190

Interval 2 (10000 steps performed)
50 episodes - episode_reward: 4.000 [-250.000, 50.000] - loss: 99.066 - mae: 9.082 - mean_q: 13.148

Interval 3 (20000 steps performed)
50 episodes - episode_reward: 27.000 [-200.000, 50.000] - loss: 111.047 - mae: 11.269 - mean_q: 16.339

Interval 4 (30000 steps performed)
50 episodes - episode_reward: 12.000 [-250.000, 50.000] - loss: 109.259 - mae: 12.124 - mean_q: 17.590

Interval 5 (40000 steps performed)
50 episodes - episode_reward: 15.880 [-200.000, 50.000] - loss: 107.734 - mae: 13.222 - mean_q: 19.282

Interval 6 (50000 steps performed)
50 episodes - episode_reward: 38.000 [-100.000, 50.000] - loss: 112.080 - mae: 14.251 - mean_q: 20.728

Interval 7 (60000 steps performed)
50 episodes - episode_reward: 15.000 [-200.000, 50.000] - loss: 110.372 - mae: 14.163 - mean_q: 20.601

Interval 8 (70000 steps performed)
50 episodes - episode_reward: 30.5

50 episodes - episode_reward: 14.000 [-200.000, 50.000] - loss: 116.563 - mae: 10.699 - mean_q: 15.242

Interval 40 (390000 steps performed)
50 episodes - episode_reward: 15.000 [-250.000, 50.000] - loss: 116.141 - mae: 11.083 - mean_q: 15.820

Interval 41 (400000 steps performed)
50 episodes - episode_reward: 16.000 [-200.000, 50.000] - loss: 114.515 - mae: 12.356 - mean_q: 17.735

Interval 42 (410000 steps performed)
50 episodes - episode_reward: 10.000 [-250.000, 50.000] - loss: 114.852 - mae: 11.401 - mean_q: 16.309

Interval 43 (420000 steps performed)
50 episodes - episode_reward: 35.000 [-100.000, 50.000] - loss: 113.697 - mae: 10.458 - mean_q: 14.882

Interval 44 (430000 steps performed)
50 episodes - episode_reward: 16.000 [-200.000, 50.000] - loss: 114.535 - mae: 11.088 - mean_q: 15.856

Interval 45 (440000 steps performed)
50 episodes - episode_reward: 19.000 [-200.000, 50.000] - loss: 113.869 - mae: 11.213 - mean_q: 16.037

Interval 46 (450000 steps performed)
50 episodes -

<keras.callbacks.History at 0x21f34c988e0>

In [64]:
# running trained model for 100 times and getting average
scores = dqn.test(env, nb_episodes = 100, visualize = False)
print(np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: -250.000, steps: 200
Episode 2: reward: -250.000, steps: 200
Episode 3: reward: -250.000, steps: 200
Episode 4: reward: -100.000, steps: 200
Episode 5: reward: -250.000, steps: 200
Episode 6: reward: -200.000, steps: 200
Episode 7: reward: -250.000, steps: 200
Episode 8: reward: -100.000, steps: 200
Episode 9: reward: -250.000, steps: 200
Episode 10: reward: -200.000, steps: 200
Episode 11: reward: -250.000, steps: 200
Episode 12: reward: -200.000, steps: 200
Episode 13: reward: -200.000, steps: 200
Episode 14: reward: -250.000, steps: 200
Episode 15: reward: -200.000, steps: 200
Episode 16: reward: -250.000, steps: 200
Episode 17: reward: -250.000, steps: 200
Episode 18: reward: -250.000, steps: 200
Episode 19: reward: -250.000, steps: 200
Episode 20: reward: -200.000, steps: 200
Episode 21: reward: -250.000, steps: 200
Episode 22: reward: -200.000, steps: 200
Episode 23: reward: -250.000, steps: 200
Episode 24: reward: -250.000, steps: 

In [65]:
# saving model
dqn.save_weights('500k.h5f', overwrite = True)