## Wumpus World Testing notebook

### Team 4: Daniel Stanhope, Daniel Lion, Karan Teckwani


## Packages

In [2]:
import copy
from copy import deepcopy
import random
from random import randrange
from enum import Enum
import sys
sys.path.append(".")
import os
from collections import deque

import numpy as np
import time
from IPython.display import clear_output
from matplotlib import pylab as plt
import torch

## Setup Wumpus World Environment

### Orientation with turn left and turn right

In [1]:
class North:
    def turnLeft(self):
        return West

    def turnRight(self):
        return East

class South:
    def turnLeft(self):
        return East

    def turnRight(self):
        return West

class East:
    def turnLeft(self):
        return North

    def turnRight(self):
        return South

class West:
    def turnLeft(self):
        return South

    def turnRight(self):
        return North

### Game Action

In [None]:
class Action(Enum):
    Forward = "Forward"
    TurnLeft = "TurnLeft"
    TurnRight = "TurnRight"
    Shoot = "Shoot"
    Grab = "Grab"
    Climb = "Climb"

### Coordinates class for location

In [None]:
class Coords:
    def __init__(self, x, y):
        self._x = x
        self._y = y

    def adjacentCells(self, gridWidth, gridHeight):
        toLeft = Coords(self._x - 1, self._y ) if (self._x > 0) else False
        toRight =  Coords(self._x + 1, self._y ) if (self._x < gridWidth - 1) else False
        below = Coords(self._x, self._y  - 1) if (self._y  > 0) else False
        above = Coords(self._x, self._y  + 1) if (self._y  < gridHeight - 1) else False

        return [toLeft, toRight, below, above]
    @property
    def x(self):
        return self._x

    @property
    def y(self):
        return self._y

### Percept class for the Environment¶

In [None]:
class Percept:
    def __init__(self, stench, breeze, glitter, bump, scream, isTerminated, reward):
        self.stench = stench
        self.breeze = breeze
        self.glitter = glitter
        self.bump = bump
        self.scream = scream
        self.isTerminated = isTerminated
        self.reward = reward

    def show(self):
        return (f'stench:{self.stench} breeze:{self.breeze} glitter:{self.glitter}'
                f'bump:{self.bump} scream:{self.scream} isTerminated:{self.isTerminated}'
                f'reward:{self.reward}')

### Game Environment

In [None]:
class Environment:
    def __init__(self,
                 gridWidth,
                 gridHeight,
                 pitProb,
                 allowClimbWithoutGold,
                 agent,
                 pitLocations,
                 isTerminated,
                 wumpusLocation,
                 wumpusAlive,
                 goldLocation):
        self.gridWidth = gridWidth
        self.gridHeight = gridHeight
        self.pitProb = pitProb
        self.allowClimbWithoutGold = allowClimbWithoutGold
        self.agent = agent
        self.pitLocations = pitLocations
        self.isTerminated = isTerminated
        self.wumpusLocation = wumpusLocation
        self.wumpusAlive = wumpusAlive
        self.goldLocation = goldLocation

    def isPitAt(self, coords):
        for item in self.pitLocations:
            if(item.x == coords.x and item.y == coords.y):
                return True

        return False

    def isWumpusAt(self, coords):
        if(self.wumpusLocation is None):
            return False
        elif(self.wumpusLocation.x == coords.x and 
             self.wumpusLocation.y == coords.y):
            return True
        else:
            return False

    def isAgentAt(self, coords):
        if(self.agent.location.x == coords.x and 
           self.agent.location.y == coords.y):
            return True
        else:
            return False

    def isGlitter(self):
        if(self.goldLocation is None):
            return False
        elif(self.agent.location.x == self.goldLocation.x and 
             self.agent.location.y == self.goldLocation.y):
            return True
        else:
            return False           

    def isGoldAt(self, coords):
        if(self.goldLocation is None):
            return False
        elif(coords.x == self.goldLocation.x and coords.y == self.goldLocation.y):
            return True
        else:
            return False         

    def killAttemptSuccessful(self):
        if(self.wumpusLocation is None):
            return True

        wumpusInLineOfFire = False
        
        if self.agent.orientation == West:
            if (self.agent.location.x > self.wumpusLocation.x and 
                self.agent.location.y == self.wumpusLocation.y):
                wumpusInLineOfFire = True
        elif self.agent.orientation == East:
            if (self.agent.location.x < self.wumpusLocation.x and 
                self.agent.location.y == self.wumpusLocation.y):
                wumpusInLineOfFire = True
        elif self.agent.orientation == South:
            if (self.agent.location.x == self.wumpusLocation.x and 
                self.agent.location.y > self.wumpusLocation.y):
                wumpusInLineOfFire = True
        elif self.agent.orientation == North:
            if (self.agent.location.x == self.wumpusLocation.x and 
                self.agent.location.y < self.wumpusLocation.y):
                wumpusInLineOfFire = True
        
        return self.agent.hasArrow and self.wumpusAlive and wumpusInLineOfFire

    def adjacentCells(self, coords):
        toLeft = Coords(coords.x - 1, coords.y) if (coords.x > 0) else False
        toRight =  Coords(coords.x + 1, coords.y) if (coords.x < self.gridWidth - 1) else False
        below = Coords(coords.x, coords.y - 1) if (coords.y > 0) else False
        above = Coords(coords.x, coords.y + 1) if (coords.y < self.gridHeight - 1) else False

        return [toLeft, toRight, below, above]

    def isPitAdjacent(self, coords):
        adjacent = self.adjacentCells(coords)
        for item in adjacent:
            if(item != False):
                if(self.isPitAt(item)):
                    return True
                    
        return False

    def isWumpusAdjacent(self, coords):
        adjacent = self.adjacentCells(coords)

        for item in adjacent:
            if(item != False):
                if(self.isWumpusAt(item)):
                    return True

        return False

    def isBreeze(self):
        return self.isPitAdjacent(self.agent.location)

    def isStench(self):
        return self.isWumpusAdjacent(self.agent.location) or self.isWumpusAt(self.agent.location)

    def applyAction(self, action):
        if (self.isTerminated):
            return self, Percept(False, False, False, False, False, True, 0)
        else:
            if action.name is Action.Forward.name:                
                movedAgent = self.agent.forward(self.gridWidth, self.gridHeight)

                death = (self.isWumpusAt(movedAgent.location) 
                         and self.wumpusAlive) or self.isPitAt(movedAgent.location)
                
                newAgent = deepcopy(movedAgent)
                newAgent.isAlive = False if death else True

                Bump = False
                if(newAgent.location.x == self.agent.location.x 
                   and newAgent.location.y == self.agent.location.y):
                    Bump = True
                
                newEnv = Environment(self.gridWidth, self.gridHeight, self.pitProb, 
                                     self.allowClimbWithoutGold, newAgent, self.pitLocations,
                                     death, self.wumpusLocation, self.wumpusAlive, 
                                     newAgent.location if self.agent.hasGold else self.goldLocation)
                
                return newEnv, Percept(newEnv.isStench(), newEnv.isBreeze(), 
                                       newEnv.isGlitter(), Bump, False, 
                                       False if newAgent.isAlive else True, 
                                       -1 if newAgent.isAlive else -1001)
            
            elif action.name is Action.TurnLeft.name:
                return Environment(self.gridWidth, self.gridHeight, self.pitProb, 
                                   self.allowClimbWithoutGold, self.agent.turnLeft(), 
                                   self.pitLocations, self.isTerminated,
                                   self.wumpusLocation,  self.wumpusAlive,  self.goldLocation), 
                        Percept(self.isStench(), self.isBreeze(), self.isGlitter(), 
                                False, False,  False, -1)
                
            elif action.name is Action.TurnRight.name:
                return Environment(self.gridWidth, self.gridHeight, self.pitProb, 
                                   self.allowClimbWithoutGold, self.agent.turnRight(), 
                                   self.pitLocations, self.isTerminated, self.wumpusLocation, 
                                   self.wumpusAlive, self.goldLocation), 
                        Percept(self.isStench(), self.isBreeze(), self.isGlitter(), 
                                False, False,  False, -1)
                
            elif action.name is Action.Grab.name:
                newAgent = deepcopy(self.agent)
                newAgent.hasGold = self.isGlitter()
                return Environment(self.gridWidth, self.gridHeight, self.pitProb, 
                                   self.allowClimbWithoutGold, newAgent, self. pitLocations, 
                                   self.isTerminated, self.wumpusLocation, self.wumpusAlive, 
                                   self.agent.location if newAgent.hasGold else self.goldLocation), 
                        Percept(self.isStench(), self.isBreeze(), self.isGlitter(), 
                                False, False,  False, -1)
            
            elif action.name is Action.Climb.name:
                inStartLocation = False
                if(self.agent.location.x == Coords(0, 0).x and self.agent.location.y == Coords(0, 0).y):
                    inStartLocation = True
                success = self.agent.hasGold and inStartLocation
                isTerminated = success or self.allowClimbWithoutGold
                return Environment(self.gridWidth, self.gridHeight, self.pitProb, 
                                   self.allowClimbWithoutGold, self.agent, self.pitLocations, 
                                   isTerminated, self.wumpusLocation, self.wumpusAlive, 
                                   self.goldLocation), 
                        Percept(False, False, self.agent.hasGold, False, False, 
                                isTerminated, 999 if success else -1)
            
            elif action.name is Action.Shoot.name:
                hadArrow = self.agent.hasArrow
                wumpusKilled = self.killAttemptSuccessful()
                newAgent = deepcopy(self.agent)
                newAgent.hasArrow = False
                return Environment(self.gridWidth, self.gridHeight, self.pitProb, 
                                   self.allowClimbWithoutGold, newAgent, self.pitLocations, 
                                   self.isTerminated, self.wumpusLocation, 
                                   self.wumpusAlive and not wumpusKilled, self.goldLocation), 
                        Percept(self.isStench(), self.isBreeze(), self.isGlitter(), False, 
                                wumpusKilled, False, -11 if hadArrow else -1)

    
    def visualize(self):
        wumpusSymbol =  "W" if (self.wumpusAlive == True) else "w"         

        Rows = []
        for y in range(self.gridHeight):
            Cells = []
            for x in range(self.gridWidth):
                A = "A" if (self.isAgentAt(Coords(x, y))) else " "
                P = "P" if (self.isPitAt(Coords(x, y))) else " "
                G = "G" if (self.isGoldAt(Coords(x, y))) else " "
                W = wumpusSymbol if (self.isWumpusAt(Coords(x, y))) else " "
                
                Cells.append("%s%s%s%s" % (A, P, G, W))

            Rows.append('|'.join(Cells))

        return '\n'.join(Rows)



### Wumpus Environment (Creating Pits at (1,0) and (0,1) for unwinnable games)

In [None]:
class WumpusWorldEnvironment:
    def apply(self, gridWidth, gridHeight, pitProb, allowClimbWithoutGold):
        self.gridWidth = gridWidth
        self.gridHeight = gridHeight
        self.pitProb = pitProb
        self.allowClimbWithoutGold = allowClimbWithoutGold

        cellIndexes = []
        pitLocations = []

        for x in range(gridWidth):
            for y in range(gridHeight):
                cellIndexes.append(Coords(x, y))

        cellIndexes.pop(0)
        pitCount = 0
        
        pitLocations.append(Coords(1, 0))
        pitLocations.append(Coords(0, 1))

        env = Environment(
            gridWidth,
            gridHeight,
            pitProb,
            allowClimbWithoutGold,
            Agent(),
            pitLocations,
            False,
            self.randomLocationExceptOrigin(),
            True,
            self.randomLocationExceptOrigin()
        )

        return env, Percept(env.isStench(), env.isBreeze(), False, False, False, False,  0.0)

    def randomLocationExceptOrigin(self):
        x = random.randrange(self.gridWidth)
        y = random.randrange(self.gridHeight)

        if x == 0 and y == 0:
            self.randomLocationExceptOrigin()
        else:
            return Coords(x, y)

## Game Agent

In [3]:
class Agent:
    def __init__(self, location = Coords(0,0), orientation = East, 
                 hasGold = False, hasArrow = True, isAlive = True):
        self.location =  location
        self.orientation = orientation
        self.hasGold = hasGold
        self.hasArrow = hasArrow
        self.isAlive = isAlive

    def turnLeft(self):
        ret = deepcopy(self)
        ret.orientation = self.orientation.turnLeft(self)

        return ret

    def turnRight(self):
        ret = deepcopy(self)
        ret.orientation  = self.orientation.turnRight(self)

        return ret

    def useArrow(self):
        ret = deepcopy(self)
        ret.hasArrow = False
    
        return ret

    def forward(self, gridWidth, gridHeight):
        ret = deepcopy(self)
        
        newAgentLocation = False

        if self.orientation == West:
            newAgentLocation = Coords(max(0, self.location.x - 1), 
                                      self.location.y)
        elif self.orientation == East:
            newAgentLocation = Coords(min(gridWidth - 1, self.location.x + 1), 
                                      self.location.y)
        elif self.orientation == South:
            newAgentLocation = Coords(self.location.x, 
                                      max(0, self.location.y - 1))
        elif self.orientation == North:
            newAgentLocation = Coords(self.location.x, 
                                      min(gridHeight - 1, self.location.y + 1))
        ret.location = newAgentLocation
        return ret

    def applyMoveAction(self, action, gridWidth, gridHeight):
        if action == Action.Forward:
            return self.forward(gridWidth, gridHeight)
        elif action == Action.TurnRight:
            return self.turnRight()
        elif action == Action.TurnLeft:
            return self.turnLeft()

## DeepQ Agent

In [4]:
class DeepQAgent():
    def __init__(self, gridHeight=4, gridWidth=4, agentState=Agent, 
                 safeLocations=[Coords(0,0)], stenchLocations=[], 
                 breezeLocations=[], agentLocationGrid = [], 
                 safeLocationGrid = [], stenchLocationGrid = [], 
                 breezeLocationGrid = [], agentHasGold = False, 
                 agentSensesGold = False, agentHasArrow = False, 
                 agentHeardScream = False, agentOrientationSet= False, 
                 previousAction = [], previousLocation = [], 
                 sameMovesSet = [], sameLocationSet = []):
        self.gridHeight = gridHeight
        self.gridWidth = gridWidth
        self.agentState = agentState()
        self.safeLocations = safeLocations
        self.stenchLocations = stenchLocations
        self.breezeLocations = breezeLocations
        self.agentLocationGrid = agentLocationGrid
        self.safeLocationGrid = safeLocationGrid
        self.stenchLocationGrid = stenchLocationGrid
        self.breezeLocationGrid = breezeLocationGrid
        self.agentHasGold = agentHasGold
        self.agentSensesGold = agentSensesGold
        self.agentHasArrow = agentHasArrow
        self.agentHeardScream = agentHeardScream
        self.agentOrientationSet = agentOrientationSet
        self.previousAction = previousAction
        self.previousLocation = previousLocation
        self.sameMovesSet = sameMovesSet
        self.sameLocationSet = sameLocationSet

    #Helper method to print grids nicely
    def printTable(self, grid):
        rows = []
        for i in range(self.gridHeight):
            cells = []
            for j in range(self.gridWidth):            
                cells.append("%s" % (grid[i][j]))

            rows.append('|'.join(cells))
        return '\n'.join(rows)

    #Helper method to print all the different belief state values
    def printBeliefState(self, percept):
        print("-- Percept --")
        print(percept.show())

        print('-- Current Agent Location --')
        print(self.printTable(self.agentLocationGrid))

        print('-- Agent Orientation Set --')
        print(self.agentOrientationSet)        

        print('-- Safe Locations --')
        print(self.printTable(self.safeLocationGrid))

        print('-- Stench Locations --')
        print(self.printTable(self.stenchLocationGrid))

        print('-- Breeze Locations --')
        print(self.printTable(self.breezeLocationGrid))    

        print('-- Agent Has Gold --')
        print(self.agentHasGold)  

        print('-- Agent Senses Gold --')
        print(self.agentSensesGold)  

        print('-- Agent Has Arrow --')
        print(self.agentHasArrow)  

        print('-- Agent Heard Scream --')
        print(self.agentHeardScream)          

    #flattening and concatenating all the arrays to one array of length 78
    def getAgentBeliefState(self):          
        return np.concatenate((np.array(self.sameLocationSet).flatten(), 
                               np.array(self.sameMovesSet).flatten(), 
                               np.array(self.agentLocationGrid).flatten(), 
                               np.array(self.safeLocationGrid).flatten(),
                               np.array(self.stenchLocationGrid).flatten(), 
                               np.array(self.breezeLocationGrid).flatten(), 
                               np.array(self.agentOrientationSet).flatten(), 
                               np.array(self.agentHasGold).flatten(), 
                               np.array(self.agentSensesGold).flatten(), 
                               np.array(self.agentHasArrow).flatten(), 
                               np.array(self.agentHeardScream).flatten()))

    #Set is represented as a list of 1s and 0s
    #I don't think the order matters, but we'll say:
    #  [1, 0, 0, 0] = North
    #  [0, 1, 0, 0] = South
    #  [0, 0, 1, 0] = East
    #  [0, 0, 0, 1] = West
    def buildAgentOrientatioSet(self, orientation):
        orientationSet = []

        if orientation == North:
            orientationSet.append([1, 0, 0, 0])
        elif orientation == South:
            orientationSet.append([0, 1, 0, 0])
        elif orientation == East:
            orientationSet.append([0, 0, 1, 0])
        elif orientation == West:
            orientationSet.append([0, 0, 0, 1])

        return orientationSet

    #Build counters for last number of moves and if they are the same
    def buildSameMoves(self, moves):
        sequence = 1
        reversed_moves = moves[::-1]
        fiveSame = 0         
        tenSame = 0         
        fiftySame = 0         
        for index, move in enumerate(reversed_moves):
            if index + 1 == len(reversed_moves):
                break
          
            if(move == reversed_moves[index + 1]):
                sequence += 1
            else:
                break
        if sequence >= 5:
            fiveSame = 1

        if sequence >= 10:
            tenSame = 1

        if sequence >= 50:
            fiftySame = 1  

        return [fiveSame, tenSame, fiftySame] 

    #Build counters for last number of locations and if they are the same
    def buildSameLocations(self, locations):
        sequence = 1
        reversed_locations = locations[::-1]
        fourSame = 0         
        tenSame = 0         
        fiftySame = 0         
        for index, location in enumerate(reversed_locations):
            if index + 1 == len(reversed_locations):
                break
          
            if(location.x == reversed_locations[index + 1].x 
               and location.y == reversed_locations[index + 1].y):
                sequence += 1
            else:
                break
        if sequence >= 4:
            fourSame = 1

        if sequence >= 10:
            tenSame = 1

        if sequence >= 50:
            fiftySame = 1  

        return [fourSame, tenSame, fiftySame]                      

    #Build agent location grid
    def buildAgentLocationGrid(self, coords):
        rows = []

        for i in range(self.gridHeight):
            cols = []
            for j in range(self.gridWidth):
                if(i == coords.y and j == coords.x):
                    cols.append(1)
                else:
                    cols.append(0)   

            rows.append(cols)

        return rows
    
    #Build safe location grid
    def buildSafeLocationGrid(self, visited):
        rows = []

        for i in range(self.gridHeight):
            cols = []
            for j in range(self.gridWidth):
                if any(d.y == i and d.x == j for d in visited):
                    cols.append(1)
                else:
                    cols.append(0)   

            rows.append(cols)

        return rows            

    #Build stench location grid
    def buildStenchLocationGrid(self, stenches):
        rows = []

        for i in range(self.gridHeight):
            cols = []
            for j in range(self.gridWidth):
                if any(d.y == i and d.x == j for d in stenches):
                    cols.append(1)
                else:
                    cols.append(0)   

            rows.append(cols)

        return rows 
    
    #Build breeze location grid
    def buildBreezeLocationGrid(self, breezes):
        rows = []

        for i in range(self.gridHeight):
            cols = []
            for j in range(self.gridWidth):
                if any(d.y == i and d.x == j for d in breezes):
                    cols.append(1)
                else:
                    cols.append(0)   

            rows.append(cols)

        return rows

    #apply next action. Action value is passed in from training(0,1,2,4,5)
    def nextAction(self, percept, action):
        ret = deepcopy(self)
        
        ret.previousAction.append(action)
        ret.previousLocation.append(ret.agentState.location)
        
        if(percept.stench == True):
            ret.stenchLocations.append(ret.agentState.location)
        if(percept.breeze == True):
            ret.breezeLocations.append(ret.agentState.location) 

        ret.agentLocationGrid = self.buildAgentLocationGrid(ret.agentState.location)
        ret.safeLocationGrid = self.buildSafeLocationGrid(ret.safeLocations)  
        ret.stenchLocationGrid = self.buildStenchLocationGrid(ret.stenchLocations)       
        ret.breezeLocationGrid = self.buildBreezeLocationGrid(ret.breezeLocations)
        ret.agentHasGold =  1 if ret.agentState.hasGold == True else 0
        ret.agentSensesGold = 1 if percept.glitter == True else 0
        ret.agentHasArrow = 1 if ret.agentState.hasArrow == True else 0
        ret.agentHeardScream = 1 if percept.scream == True else 0
        ret.agentOrientationSet = self.buildAgentOrientatioSet(ret.agentState.orientation)
        ret.sameMovesSet = self.buildSameMoves(ret.previousAction)
        ret.sameLocationSet = self.buildSameLocations(ret.previousLocation)

        if action == 0:
            ret.agentState = ret.agentState.forward(
                self.gridWidth, self.gridHeight)
            ret.safeLocations.append(ret.agentState.location)

            return ret, Action.Forward
        elif action == 1:
            ret.agentState = ret.agentState.turnLeft()
            return ret, Action.TurnLeft
        elif action == 2:
            ret.agentState = ret.agentState.turnRight()
            return ret, Action.TurnRight
        elif action == 3:
            ret.agentState = ret.agentState.useArrow()
            return ret, Action.Shoot
        if action == 4:
            if percept.glitter == True:
                ret.agentState.hasGold = True

            return ret, Action.Grab
        if action == 5:
            ret.agentState.isTerminated = True
            return ret, Action.Climb

## Model Testing

### Model Testing Helper Functions

In [4]:
def get_model(path):
    model = torch.load(path)
    return model

def run(env, agent, percept, action):
    agent, action = agent.nextAction(percept, action)
    env, percept = env.applyAction(action)

    return env, agent, percept

def getState(belief_state):
    state_ = np.array([belief_state]).reshape(1,l1) + np.random.rand(1,l1)/100.0   
    state = torch.from_numpy(state_).float()

    return state_, state

### Model Testing Parameters

In [10]:
l1 = 78
epsilon = 0.3

### Model Testing

In [11]:
def getStateTest(belief_state):
    state_ = np.array([belief_state]).reshape(1,l1) + np.random.rand(1,l1)/10.0   
    state = torch.from_numpy(state_).float()

    return state_, state


def test_model_alternate(model, mode='static', display=True):
    i = 0
    terminated = False
    world = WumpusWorldEnvironment()
    initialEnv, initialPercept = world.apply(4, 4, 0.2, False)        
    agent = DeepQAgent(4, 4)    
    randGen = randrange(6)
    env, agent, percept = run(initialEnv, agent, initialPercept, randGen)
    belief_state = agent.getAgentBeliefState()
    state_, state = getStateTest(belief_state)

    status = 1
    reward = 0
    mov = 0
    won_counter = 0
    agent_safe_escape_counter = 0

    while terminated == False:
        qval = model(state)
        qval_ = qval.data.numpy()

        if (random.random() < epsilon):
            nextMove = np.random.randint(0,6)
        else:
            action_ = np.argmax(qval_)
            nextMove = action_.item()

        env, agent, percept = run(env, agent, percept, nextMove) 

        belief_state = agent.getAgentBeliefState()
        state_, state = getState(belief_state)       
        reward += percept.reward        
        
                
        if nextMove == 5:
            agent_safe_escape_counter += 1
            terminated = True

        if reward > 0:
            won_counter += 1
            terminated = True

        if mov > 500:
            terminated = True

        if percept.isTerminated == True:
            terminated = True

        mov += 1  

        clear_output(wait=True)
        print(env.visualize())
        print("-----------------------")
    win = True if won_counter > 0 else False
    agent_safe_escape = True if agent_safe_escape_counter > 0 else False
    
    return win, agent_safe_escape, reward

## Model 6: Test quitRate: 71.0%, averageScorePerGame: -125.33

In [6]:
import os
model_path = os.path.join(os.getcwd(), 'model')
os.makedirs(model_path, exist_ok=True)
model_filename = 'deepQ_wumpus_model_new_features_3.pt'
model_file_path = os.path.join(model_path, model_filename)
model = get_model(model_file_path)

In [None]:
max_games = 100
wins = 0
agent_safe_escapes = 0
total_reward = []

for i in range(max_games):
    win, agent_safe_escape, reward = test_model_alternate(model, mode='random', display=False)
    total_reward.append(reward)
    if win:
        wins += 1
    if agent_safe_escape:
        agent_safe_escapes += 1  

safe_escape_perc = float(agent_safe_escapes) / float(max_games)
print("Games played: {0}, # of wins: {1}".format(max_games,agent_safe_escapes))
print("Safe Escape percentage: {}%".format(100.0*safe_escape_perc))
print(f"average_reward: {sum(total_reward)/len(total_reward)}")

## Model 6a: Test quitRate: 72.0%, averageScorePerGame: -114.07

In [7]:
import os
model_path = os.path.join(os.getcwd(), 'model')
os.makedirs(model_path, exist_ok=True)
model_filename = 'deepQ_wumpus_model_new_features_3a.pt'
model_file_path = os.path.join(model_path, model_filename)
model = get_model(model_file_path)

In [19]:
max_games = 100
wins = 0
agent_safe_escapes = 0
total_reward = []

for i in range(max_games):
    win, agent_safe_escape, reward = test_model_alternate(model, mode='random', display=False)
    total_reward.append(reward)
    if win:
        wins += 1
    if agent_safe_escape:
        agent_safe_escapes += 1  

safe_escape_perc = float(agent_safe_escapes) / float(max_games)
print("Games played: {0}, # of wins: {1}".format(max_games,agent_safe_escapes))
print("Safe Escape percentage: {}%".format(100.0*safe_escape_perc))
print(f"average_reward: {sum(total_reward)/len(total_reward)}")

    |    |    |    
    | P  |APG |   W
    |    |    |    
    |    |    |    
-----------------------
99 9
Games played: 100, # of wins: 9
Win percentage: 9.0%
average_reward: -890.98


## Model 6b: Test winRate: 89.0%, averageScorePerGame: -52.42

In [8]:
import os
model_path = os.path.join(os.getcwd(), 'model')
os.makedirs(model_path, exist_ok=True)
model_filename = 'deepQ_wumpus_model_new_features_3b.pt'
model_file_path = os.path.join(model_path, model_filename)
model = get_model(model_file_path)

In [21]:
max_games = 100
wins = 0
agent_safe_escapes = 0
total_reward = []

for i in range(max_games):
    win, agent_safe_escape, reward = test_model_alternate(model, mode='random', display=False)
    total_reward.append(reward)
    if win:
        wins += 1
    if agent_safe_escape:
        agent_safe_escapes += 1  

safe_escape_perc = float(agent_safe_escapes) / float(max_games)
print("Games played: {0}, # of wins: {1}".format(max_games,agent_safe_escapes))
print("Safe Escape percentage: {}%".format(100.0*safe_escape_perc))
print(f"average_reward: {sum(total_reward)/len(total_reward)}")

A G |    |    |    
    |    |    |    
    |    |    |    
    |   W| P  |    
-----------------------
99 7
Games played: 100, # of wins: 7
Win percentage: 7.000000000000001%
average_reward: -953.72
