# Reduction Of Dataset Size

**Content**:
1. [Libraries](#libraries)
2. [Gridworld Creation](#gridworld)
3. [Functions](#funct)
4. [Uniform Dataset MLP Performance](#mlpuniform)
5. [MLP performance on 90% of the dataset](#mlpuniform_90)
6. [MLP performance on 80% of the dataset](#mlpuniform_80)
7. [MLP performance on 70% of the dataset](#mlpuniform_70)
8. [MLP performance on 60% of the dataset](#mlpuniform_60)
9. [MLP performance on 50% of the dataset](#mlpuniform_50)
10. [MLP performance on 40% of the dataset](#mlpuniform_40)
11. [MLP performance on 30% of the dataset](#mlpuniform_30)
12. [MLP performance on 20% of the dataset](#mlpuniform_20)

## 1. Libraries <a id = "libraries"> 

In [15]:
import numpy as np
import random
import sys
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split, cross_validate, GridSearchCV, validation_curve
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer

## 2. Gridworld Creation <a id = "gridworld">

In [2]:
def randPair(s,e):
    return np.random.randint(s,e), np.random.randint(s,e)

class BoardPiece:

    def __init__(self, name, code, pos):
        self.name = name #name of the piece
        self.code = code #an ASCII character to display on the board
        self.pos = pos #2-tuple e.g. (1,4)

class BoardMask:

    def __init__(self, name, mask, code):
        self.name = name
        self.mask = mask
        self.code = code

    def get_positions(self): #returns tuple of arrays
        return np.nonzero(self.mask)

def zip_positions2d(positions): #positions is tuple of two arrays
    x,y = positions
    return list(zip(x,y))

class GridBoard:

    def __init__(self, size=4):
        self.size = size #Board dimensions, e.g. 4 x 4
        self.components = {} #name : board piece
        self.masks = {}

    def addPiece(self, name, code, pos=(0,0)):
        newPiece = BoardPiece(name, code, pos)
        self.components[name] = newPiece

    #basically a set of boundary elements
    def addMask(self, name, mask, code):
        #mask is a 2D-numpy array with 1s where the boundary elements are
        newMask = BoardMask(name, mask, code)
        self.masks[name] = newMask

    def movePiece(self, name, pos):
        move = True
        for _, mask in self.masks.items():
            if pos in zip_positions2d(mask.get_positions()):
                move = False
        if move:
            self.components[name].pos = pos

    def delPiece(self, name):
        del self.components['name']

    def render(self):
        dtype = '<U2'
        displ_board = np.zeros((self.size, self.size), dtype=dtype)
        displ_board[:] = ' '

        for name, piece in self.components.items():
            displ_board[piece.pos] = piece.code

        for name, mask in self.masks.items():
            displ_board[mask.get_positions()] = mask.code

        return displ_board

    def render_np(self):
        num_pieces = len(self.components) + len(self.masks)
        displ_board = np.zeros((num_pieces, self.size, self.size), dtype=np.uint8)
        layer = 0
        for name, piece in self.components.items():
            pos = (layer,) + piece.pos
            displ_board[pos] = 1
            layer += 1

        for name, mask in self.masks.items():
            x,y = self.masks['boundary'].get_positions()
            z = np.repeat(layer,len(x))
            a = (z,x,y)
            displ_board[a] = 1
            layer += 1
        return displ_board

def addTuple(a,b):
    return tuple([sum(x) for x in zip(a,b)])

In [3]:
class Gridworld:

    def __init__(self, size=4, mode='static'):
        if size >= 4:
            self.board = GridBoard(size=size)
        else:
            print("Minimum board size is 4. Initialized to size 4.")
            self.board = GridBoard(size=4)

        #Add pieces, positions will be updated later
        self.board.addPiece('Player','P',(0,0))
        self.board.addPiece('Goal','+',(0,3))
        self.board.addPiece('Pit','-',(0,3))
        self.board.addPiece('Wall','W',(2,3))

        if mode == 'static':
            self.initGridStatic()
        elif mode == 'player':
            self.initGridPlayer()
        else:
            self.initGridRand()

    #Initialize stationary grid, all items are placed deterministically
    def initGridStatic(self):
        #Setup static pieces
        self.board.components['Player'].pos = randPair(0,self.board.size) #Row, Column
        self.board.components['Goal'].pos = (3,0)
        self.board.components['Pit'].pos = (3,1)
        self.board.components['Wall'].pos = (3,2)
        
        if (not self.validateBoard()):
            #print('Invalid grid. Rebuilding..')
            self.initGridStatic()


    #Check if board is initialized appropriately (no overlapping pieces)
    #also remove impossible-to-win boards
    def validateBoard(self):
        valid = True

        player = self.board.components['Player']
        goal = self.board.components['Goal']
        wall = self.board.components['Wall']
        pit = self.board.components['Pit']

        all_positions = [piece for name,piece in self.board.components.items()]
        all_positions = [player.pos, goal.pos, wall.pos, pit.pos]
        if len(all_positions) > len(set(all_positions)):
            return False

        corners = [(0,0),(0,self.board.size), (self.board.size,0), (self.board.size,self.board.size)]
        #if player is in corner, can it move? if goal is in corner, is it blocked?
        if player.pos in corners or goal.pos in corners:
            val_move_pl = [self.validateMove('Player', addpos) for addpos in [(0,1),(1,0),(-1,0),(0,-1)]]
            val_move_go = [self.validateMove('Goal', addpos) for addpos in [(0,1),(1,0),(-1,0),(0,-1)]]
            if 0 not in val_move_pl or 0 not in val_move_go:
                #print(self.display())
                #print("Invalid board. Re-initializing...")
                valid = False

        return valid

    #Initialize player in random location, but keep wall, goal and pit stationary
    def initGridPlayer(self):
        #height x width x depth (number of pieces)
        self.initGridStatic()
        #place player
        self.board.components['Player'].pos = randPair(0,self.board.size)

        if (not self.validateBoard()):
            #print('Invalid grid. Rebuilding..')
            self.initGridPlayer()

    #Initialize grid so that goal, pit, wall, player are all randomly placed
    def initGridRand(self):
        #height x width x depth (number of pieces)
        self.board.components['Player'].pos = randPair(0,self.board.size)
        self.board.components['Goal'].pos = randPair(0,self.board.size)
        self.board.components['Pit'].pos = randPair(0,self.board.size)
        self.board.components['Wall'].pos = randPair(0,self.board.size)

        if (not self.validateBoard()):
            #print('Invalid grid. Rebuilding..')
            self.initGridRand()

    def validateMove(self, piece, addpos=(0,0)):
        outcome = 0 #0 is valid, 1 invalid, 2 lost game
        pit = self.board.components['Pit'].pos
        wall = self.board.components['Wall'].pos
        new_pos = addTuple(self.board.components[piece].pos, addpos)
        if new_pos == wall:
            outcome = 1 #block move, player can't move to wall
        elif max(new_pos) > (self.board.size-1):    #if outside bounds of board
            outcome = 1
        elif min(new_pos) < 0: #if outside bounds
            outcome = 1
        elif new_pos == pit:
            outcome = 2

        return outcome

    def makeMove(self, action):
        #need to determine what object (if any) is in the new grid spot the player is moving to
        #actions in {u,d,l,r}
        def checkMove(addpos):
            if self.validateMove('Player', addpos) in [0,2]:
                new_pos = addTuple(self.board.components['Player'].pos, addpos)
                self.board.movePiece('Player', new_pos)

        if action == 'u': #up
            checkMove((-1,0))
        elif action == 'd': #down
            checkMove((1,0))
        elif action == 'l': #left
            checkMove((0,-1))
        elif action == 'r': #right
            checkMove((0,1))
        else:
            pass

    def reward(self):
        if (self.board.components['Player'].pos == self.board.components['Pit'].pos):
            return -10
        elif (self.board.components['Player'].pos == self.board.components['Goal'].pos):
            return 10
        else:
            return -1

    def display(self):
        return self.board.render()

In [4]:
action_set = {
    0: 'u',
    1: 'd',
    2: 'l',
    3: 'r',
}

## 3. Functions <a id = 'funct'> 

In [5]:
"""
from2dto1d:
    Args: 
        pos(tuple):2d position of the objects(Player, Wall, Goal, Pit) in the gridworld
    Return:
        return(integer): 1d position of the objects(Player, Wall, Goal, Pit)in the gridworld
""" 
def from2dto1d(pos):
    if pos == '(0, 0)':
        return 0
    if pos == '(0, 1)':
        return 1
    if pos == '(0, 2)':
        return 2
    if pos == '(0, 3)':
        return 3
    if pos == '(1, 0)':
        return 4
    if pos == '(1, 1)':
        return 5
    if pos == '(1, 2)':
        return 6
    if pos == '(1, 3)':
        return 7
    if pos == '(2, 0)':
        return 8
    if pos == '(2, 1)':
        return 9
    if pos == '(2, 2)':
        return 10
    if pos == '(2, 3)':
        return 11
    if pos == '(3, 0)':
        return 12
    if pos == '(3, 1)':
        return 13
    if pos == '(3, 2)':
        return 14
    if pos == '(3, 3)':
        return 15

In [6]:
def from1dto2d(pos):
    if pos == 0:
        return (0, 0)
    if pos == 1:
        return (0, 1)
    if pos == 2:
        return (0, 2)
    if pos == 3:
        return (0, 3)
    if pos == 4:
        return (1, 0)
    if pos == 5:
        return (1, 1)
    if pos == 6:
        return (1, 2)
    if pos == 7:
        return (1, 3)
    if pos == 8:
        return (2, 0)
    if pos == 9:
        return (2, 1)
    if pos == 10:
        return (2, 2)
    if pos == 11:
        return (2, 3)
    if pos == 12:
        return (3, 0)
    if pos == 13:
        return (3, 1)
    if pos == 14:
        return (3, 2)
    if pos == 15:
        return (3, 3)

In [7]:
"""
from_num_to_one_hot_encode:
    Args: 
        num(int): number from 0 to 15 representing the state of the objects(Player, Wall, Goal, Pit) in the gridworld
    Return:
        return(tuple): encoded binary code with the size of 16-bit 
"""
def from_num_to_one_hot_encode(num):
    en0,en1,en2,en3,en4,en5,en6,en7,en8,en9,en10,en11,en12,en13,en14,en15 = 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
    if num == 0:
        en0 = 1
    if num == 1:
        en1 = 1
    if num == 2:
        en2 = 1
    if num == 3:
        en3 = 1
    if num == 4:
        en4 = 1
    if num == 5:
        en5 = 1
    if num == 6:
        en6 = 1
    if num == 7:
        en7 = 1
    if num == 8:
        en8 = 1
    if num == 9:
        en9 = 1
    if num == 10:
        en10 = 1
    if num == 11:
        en11 = 1
    if num == 12:
        en12 = 1
    if num == 13:
        en13 = 1
    if num == 14:
        en14 = 1
    if num == 15:
        en15 = 1
    return en0,en1,en2,en3,en4,en5,en6,en7,en8,en9,en10,en11,en12,en13,en14,en15

In [8]:
def calculate_smape(actual, predicted) -> float:
    if not all([isinstance(actual, np.ndarray), 
                isinstance(predicted, np.ndarray)]):
        actual, predicted = np.array(actual),
        np.array(predicted)
  
    return round(
        np.mean(
            np.abs(predicted - actual) / 
            ((np.abs(predicted) + np.abs(actual))/2)
        )*100, 2
    )

In [9]:
score = make_scorer(calculate_smape)

In [10]:
def gridworld_test_model(model_b, mode='static', display=True):
    i = 0
    test_game = Gridworld(mode=mode)
    if display:
        print("Initial State:")
        print(test_game.display())
    status = 1
    j = 0
    while(status == 1): #A
        p_curr = from2dto1d((str(test_game.board.components['Player'].pos)))
        pi = from2dto1d((str(test_game.board.components['Pit'].pos)))
        g = from2dto1d((str(test_game.board.components['Goal'].pos)))
        w = from2dto1d((str(test_game.board.components['Wall'].pos)))

        q_value_next = []

        test_game.makeMove('u')
        p = from2dto1d((str(test_game.board.components['Player'].pos)))
        
        if p_curr != p:
            en0,en1,en2,en3,en4,en5,en6,en7,en8,en9,en10,en11,en12,en13,en14,en15 = from_num_to_one_hot_encode(p)
            reward = test_game.reward()
            q_value_next.append(model_b.predict([[p,pi,g,w]])[0])
            test_game.makeMove('d')
        else:
            q_value_next.append(-10)
        
        test_game.makeMove('d')
        p = from2dto1d((str(test_game.board.components['Player'].pos)))
        
        if p_curr != p:
            en0,en1,en2,en3,en4,en5,en6,en7,en8,en9,en10,en11,en12,en13,en14,en15 = from_num_to_one_hot_encode(p)
            reward = test_game.reward()
            q_value_next.append(model_b.predict([[p,pi,g,w]])[0])
            test_game.makeMove('u')
        else:
            q_value_next.append(-10)
        
        test_game.makeMove('l')
        p = from2dto1d((str(test_game.board.components['Player'].pos)))
        

        if p_curr != p:
            test_game.makeMove('r')
            en0,en1,en2,en3,en4,en5,en6,en7,en8,en9,en10,en11,en12,en13,en14,en15 = from_num_to_one_hot_encode(p)
            reward = test_game.reward()
            q_value_next.append(model_b.predict([[p,pi,g,w]])[0])
        else:
            q_value_next.append(-10)
        
        test_game.makeMove('r')
        p = from2dto1d((str(test_game.board.components['Player'].pos)))
        

        if p_curr != p:
            en0,en1,en2,en3,en4,en5,en6,en7,en8,en9,en10,en11,en12,en13,en14,en15 = from_num_to_one_hot_encode(p)
            reward = test_game.reward()
            q_value_next.append(model_b.predict([[p,pi,g,w]])[0])
            test_game.makeMove('l')
        else:
            q_value_next.append(-10)
        
#         print(q_value_next)
        
        
        action_ = np.argmax(q_value_next)
#         print(action_)
        action = action_set[action_]
        
        
    
        if display:
            print('Move #: %s; Taking action: %s' % (i, action))
        test_game.makeMove(action)
        p_next = from2dto1d((str(test_game.board.components['Player'].pos)))

        j += 1
        if display:
            print(test_game.display())
        reward = test_game.reward()
        if reward != -1:
            if reward > 0:
                status = 2
                if display:
                    print("Game won! Reward: %s" % (reward,))
            else:
                status = 0
                if display:
                    print("Game LOST. Reward: %s" % (reward,))
        i += 1
        if (i > 15):
            if display:
                print("Game lost; too many moves.")
            break
    
    win = True if status == 2 else False
    return win

### Dataset

In [11]:
df_uni = pd.read_csv("DatasetAfterUniform.csv")
df_uni

Unnamed: 0.1,Unnamed: 0,Player,Goal,Wall,Pit,Q_Current,Reward_Current,Bellman_Operator
0,0,0,2,1,0,0.305944,-10,1.284265
1,1,2,2,1,0,9.047926,10,10.000000
2,2,3,2,1,0,7.594888,-1,10.000000
3,3,4,2,1,0,2.538072,-1,4.271501
4,4,5,2,1,0,5.857223,-1,5.000492
...,...,...,...,...,...,...,...,...
50395,50395,8,13,14,15,5.306869,-1,5.827087
50396,50396,13,13,14,15,12.008292,10,10.000000
50397,50397,9,13,14,15,7.040803,-1,10.000000
50398,50398,12,13,14,15,7.585652,-1,10.000000


## 4. Unform Dataset MLP Performance <a id = "mlpuniform"> 

### Bellman Opeartor 100%

In [21]:
from sklearn.model_selection import train_test_split
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Bellman_Operator']]

In [22]:
mlp_uniform_bo = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)

In [23]:
scores = cross_validate(mlp_uniform_bo, X.values,y.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_bo = np.mean(scores['test_score'])
SMAPE_mlp_uniform_bo



30.848000000000003

In [24]:
mlp_uniform_bo.fit(X.values,y.values.ravel())

In [25]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_bo, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 884
0.884
0.884
Games played: 1000, # of wins: 889
0.889
1.7730000000000001
Games played: 1000, # of wins: 874
0.874
2.6470000000000002
Games played: 1000, # of wins: 879
0.879
3.5260000000000002
Games played: 1000, # of wins: 898
0.898
4.424
Games played: 1000, # of wins: 872
0.872
5.296
Games played: 1000, # of wins: 875
0.875
6.171
Games played: 1000, # of wins: 891
0.891
7.062
Games played: 1000, # of wins: 855
0.855
7.917
Games played: 1000, # of wins: 874
0.874
8.791
Win percentage: 87.91%


### Q-Value 100%

In [27]:
from sklearn.model_selection import train_test_split
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Q_Current']]
mlp_uniform_q = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_q, X.values,y.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_q = np.mean(scores['test_score'])
SMAPE_mlp_uniform_q



45.134

In [28]:
mlp_uniform_q.fit(X.values,y.values.ravel())

In [29]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_q, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 925
0.925
0.925
Games played: 1000, # of wins: 927
0.927
1.852
Games played: 1000, # of wins: 936
0.936
2.7880000000000003
Games played: 1000, # of wins: 940
0.94
3.728
Games played: 1000, # of wins: 924
0.924
4.652
Games played: 1000, # of wins: 919
0.919
5.571
Games played: 1000, # of wins: 910
0.91
6.481
Games played: 1000, # of wins: 930
0.93
7.411
Games played: 1000, # of wins: 918
0.918
8.328999999999999
Games played: 1000, # of wins: 924
0.924
9.252999999999998
Win percentage: 92.52999999999997%


## 5. MLP performance on 90% of the dataset <a id = "mlpuniform_90"> 

### Bellman Operator 90%

In [30]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Bellman_Operator']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/10, random_state = 0)
mlp_uniform_bo_90 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_bo_90, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_bo_90 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_bo_90



25.122

In [31]:
mlp_uniform_bo_90.fit(X_train.values,y_train.values.ravel())



In [32]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_bo_90, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 881
0.881
0.881
Games played: 1000, # of wins: 911
0.911
1.792
Games played: 1000, # of wins: 876
0.876
2.668
Games played: 1000, # of wins: 894
0.894
3.5620000000000003
Games played: 1000, # of wins: 877
0.877
4.439
Games played: 1000, # of wins: 888
0.888
5.327
Games played: 1000, # of wins: 878
0.878
6.205
Games played: 1000, # of wins: 896
0.896
7.101
Games played: 1000, # of wins: 902
0.902
8.003
Games played: 1000, # of wins: 891
0.891
8.894
Win percentage: 88.94%


### Q-Value 90%

In [33]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Q_Current']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 1/10, random_state = 0)
mlp_uniform_q_90 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_q_90, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_q_90 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_q_90



34.06

In [34]:
mlp_uniform_q_90.fit(X_train.values,y_train.values.ravel())



In [36]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_q_90, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 915
0.915
0.915
Games played: 1000, # of wins: 913
0.913
1.828
Games played: 1000, # of wins: 899
0.899
2.7270000000000003
Games played: 1000, # of wins: 923
0.923
3.6500000000000004
Games played: 1000, # of wins: 907
0.907
4.557
Games played: 1000, # of wins: 916
0.916
5.473000000000001
Games played: 1000, # of wins: 907
0.907
6.380000000000001
Games played: 1000, # of wins: 914
0.914
7.2940000000000005
Games played: 1000, # of wins: 913
0.913
8.207
Games played: 1000, # of wins: 903
0.903
9.110000000000001
Win percentage: 91.10000000000001%


## 6. MLP performance on 80% of the dataset <a id = "mlpuniform_80"> 

### Bellman Operator 80%

In [37]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Bellman_Operator']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 2/10, random_state = 0)
mlp_uniform_bo_80 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_bo_80, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_bo_80 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_bo_80



26.226

In [38]:
mlp_uniform_bo_80.fit(X_train.values,y_train.values.ravel())



In [39]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_bo_80, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 889
0.889
0.889
Games played: 1000, # of wins: 872
0.872
1.7610000000000001
Games played: 1000, # of wins: 887
0.887
2.648
Games played: 1000, # of wins: 889
0.889
3.537
Games played: 1000, # of wins: 877
0.877
4.414
Games played: 1000, # of wins: 891
0.891
5.305
Games played: 1000, # of wins: 872
0.872
6.177
Games played: 1000, # of wins: 865
0.865
7.042
Games played: 1000, # of wins: 873
0.873
7.915
Games played: 1000, # of wins: 890
0.89
8.805
Win percentage: 88.05%


### Q-Value 80%

In [40]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Q_Current']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 2/10, random_state = 0)
mlp_uniform_q_80 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_q_80, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_q_80 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_q_80



36.658

In [41]:
mlp_uniform_q_80.fit(X_train.values,y_train.values.ravel())



In [42]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_q_80, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 910
0.91
0.91
Games played: 1000, # of wins: 919
0.919
1.8290000000000002
Games played: 1000, # of wins: 903
0.903
2.732
Games played: 1000, # of wins: 916
0.916
3.648
Games played: 1000, # of wins: 906
0.906
4.554
Games played: 1000, # of wins: 921
0.921
5.4750000000000005
Games played: 1000, # of wins: 930
0.93
6.405
Games played: 1000, # of wins: 901
0.901
7.306
Games played: 1000, # of wins: 925
0.925
8.231
Games played: 1000, # of wins: 903
0.903
9.134
Win percentage: 91.34%


## 7. MLP performance on 70% of the dataset <a id = "mlpuniform_70"> 

### Bellman Operator 70%

In [43]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Bellman_Operator']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 3/10, random_state = 0)
mlp_uniform_bo_70 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_bo_70, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_bo_70 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_bo_70



29.018

In [44]:
mlp_uniform_bo_70.fit(X_train.values,y_train.values.ravel())

In [45]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_bo_70, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 881
0.881
0.881
Games played: 1000, # of wins: 870
0.87
1.751
Games played: 1000, # of wins: 868
0.868
2.6189999999999998
Games played: 1000, # of wins: 868
0.868
3.4869999999999997
Games played: 1000, # of wins: 883
0.883
4.369999999999999
Games played: 1000, # of wins: 877
0.877
5.246999999999999
Games played: 1000, # of wins: 891
0.891
6.137999999999999
Games played: 1000, # of wins: 886
0.886
7.023999999999999
Games played: 1000, # of wins: 904
0.904
7.927999999999999
Games played: 1000, # of wins: 867
0.867
8.794999999999998
Win percentage: 87.94999999999999%


### Q-Value 70%

In [46]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Q_Current']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 3/10, random_state = 0)
mlp_uniform_q_70 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_q_70, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_q_70 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_q_70



36.013999999999996

In [47]:
mlp_uniform_q_70.fit(X_train.values,y_train.values.ravel())



In [48]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_q_70, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 909
0.909
0.909
Games played: 1000, # of wins: 913
0.913
1.822
Games played: 1000, # of wins: 902
0.902
2.724
Games played: 1000, # of wins: 913
0.913
3.6370000000000005
Games played: 1000, # of wins: 926
0.926
4.563000000000001
Games played: 1000, # of wins: 913
0.913
5.476000000000001
Games played: 1000, # of wins: 925
0.925
6.401000000000001
Games played: 1000, # of wins: 922
0.922
7.323
Games played: 1000, # of wins: 911
0.911
8.234
Games played: 1000, # of wins: 900
0.9
9.134
Win percentage: 91.34%


## 8. MLP performance on 60% of the dataset <a id = "mlpuniform_60"> 

### Bellman Operator 60%

In [49]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Bellman_Operator']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 4/10, random_state = 0)
mlp_uniform_bo_60 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_bo_60, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_bo_60 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_bo_60



27.076

In [50]:
mlp_uniform_bo_60.fit(X_train.values,y_train.values.ravel())



In [51]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_bo_60, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 869
0.869
0.869
Games played: 1000, # of wins: 863
0.863
1.732
Games played: 1000, # of wins: 860
0.86
2.592
Games played: 1000, # of wins: 863
0.863
3.455
Games played: 1000, # of wins: 871
0.871
4.3260000000000005
Games played: 1000, # of wins: 850
0.85
5.176
Games played: 1000, # of wins: 879
0.879
6.055
Games played: 1000, # of wins: 865
0.865
6.92
Games played: 1000, # of wins: 871
0.871
7.791
Games played: 1000, # of wins: 882
0.882
8.673
Win percentage: 86.72999999999999%


### Q-Value 60%

In [52]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Q_Current']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 4/10, random_state = 0)
mlp_uniform_q_60 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_q_60, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_q_60 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_q_60



39.089999999999996

In [53]:
mlp_uniform_q_60.fit(X_train.values,y_train.values.ravel())

In [54]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_q_60, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 891
0.891
0.891
Games played: 1000, # of wins: 887
0.887
1.778
Games played: 1000, # of wins: 864
0.864
2.642
Games played: 1000, # of wins: 884
0.884
3.526
Games played: 1000, # of wins: 870
0.87
4.396
Games played: 1000, # of wins: 875
0.875
5.271
Games played: 1000, # of wins: 869
0.869
6.14
Games played: 1000, # of wins: 879
0.879
7.019
Games played: 1000, # of wins: 887
0.887
7.906000000000001
Games played: 1000, # of wins: 886
0.886
8.792
Win percentage: 87.92%


## 9. MLP performance on 50% of the dataset <a id = "mlpuniform_50"> 

### Bellman Operator 50%

In [55]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Bellman_Operator']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 5/10, random_state = 0)
mlp_uniform_bo_50 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_bo_50, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_bo_50 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_bo_50



27.360000000000003

In [56]:
mlp_uniform_bo_50.fit(X_train.values,y_train.values.ravel())



In [57]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_bo_50, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 830
0.83
0.83
Games played: 1000, # of wins: 831
0.831
1.661
Games played: 1000, # of wins: 849
0.849
2.51
Games played: 1000, # of wins: 857
0.857
3.367
Games played: 1000, # of wins: 847
0.847
4.214
Games played: 1000, # of wins: 860
0.86
5.074000000000001
Games played: 1000, # of wins: 844
0.844
5.918000000000001
Games played: 1000, # of wins: 855
0.855
6.7730000000000015
Games played: 1000, # of wins: 859
0.859
7.6320000000000014
Games played: 1000, # of wins: 826
0.826
8.458000000000002
Win percentage: 84.58000000000003%


### Q-Value 50%

In [58]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Q_Current']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 5/10, random_state = 0)
mlp_uniform_q_50 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_q_50, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_q_50 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_q_50



37.93

In [59]:
mlp_uniform_q_50.fit(X_train.values,y_train.values.ravel())



In [60]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_q_50, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 885
0.885
0.885
Games played: 1000, # of wins: 886
0.886
1.771
Games played: 1000, # of wins: 878
0.878
2.649
Games played: 1000, # of wins: 881
0.881
3.5300000000000002
Games played: 1000, # of wins: 888
0.888
4.418
Games played: 1000, # of wins: 891
0.891
5.309
Games played: 1000, # of wins: 878
0.878
6.187
Games played: 1000, # of wins: 895
0.895
7.082000000000001
Games played: 1000, # of wins: 875
0.875
7.957000000000001
Games played: 1000, # of wins: 885
0.885
8.842
Win percentage: 88.42000000000002%


## 10. MLP performance on 40% of the dataset <a id = "mlpuniform_40"> 

### Bellman Opearator 40%

In [61]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Bellman_Operator']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 6/10, random_state = 0)
mlp_uniform_bo_40 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_bo_40, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_bo_40 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_bo_40



29.552

In [62]:
mlp_uniform_bo_40.fit(X_train.values,y_train.values.ravel())

In [63]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_bo_40, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 850
0.85
0.85
Games played: 1000, # of wins: 843
0.843
1.693
Games played: 1000, # of wins: 825
0.825
2.518
Games played: 1000, # of wins: 845
0.845
3.3629999999999995
Games played: 1000, # of wins: 828
0.828
4.191
Games played: 1000, # of wins: 839
0.839
5.029999999999999
Games played: 1000, # of wins: 828
0.828
5.858
Games played: 1000, # of wins: 829
0.829
6.686999999999999
Games played: 1000, # of wins: 830
0.83
7.5169999999999995
Games played: 1000, # of wins: 827
0.827
8.344
Win percentage: 83.44%


### Q-Value 40%

In [64]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Q_Current']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 6/10, random_state = 0)
mlp_uniform_q_40 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_q_40, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_q_40 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_q_40



40.712

In [65]:
mlp_uniform_q_40.fit(X_train.values,y_train.values.ravel())



In [66]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_q_40, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 879
0.879
0.879
Games played: 1000, # of wins: 892
0.892
1.771
Games played: 1000, # of wins: 863
0.863
2.634
Games played: 1000, # of wins: 881
0.881
3.5149999999999997
Games played: 1000, # of wins: 880
0.88
4.395
Games played: 1000, # of wins: 882
0.882
5.276999999999999
Games played: 1000, # of wins: 865
0.865
6.1419999999999995
Games played: 1000, # of wins: 867
0.867
7.0089999999999995
Games played: 1000, # of wins: 871
0.871
7.879999999999999
Games played: 1000, # of wins: 866
0.866
8.745999999999999
Win percentage: 87.45999999999998%


## 11. MLP performance on 30% of the dataset <a id = "mlpuniform_30"> 

### Bellman Operator 30%

In [67]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Bellman_Operator']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 7/10, random_state = 0)
mlp_uniform_bo_30 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_bo_30, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_bo_30 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_bo_30



31.102000000000004

In [68]:
mlp_uniform_bo_30.fit(X_train.values,y_train.values.ravel())



In [69]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_bo_30, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 820
0.82
0.82
Games played: 1000, # of wins: 828
0.828
1.648
Games played: 1000, # of wins: 829
0.829
2.477
Games played: 1000, # of wins: 817
0.817
3.2939999999999996
Games played: 1000, # of wins: 807
0.807
4.101
Games played: 1000, # of wins: 821
0.821
4.922
Games played: 1000, # of wins: 816
0.816
5.7379999999999995
Games played: 1000, # of wins: 826
0.826
6.563999999999999
Games played: 1000, # of wins: 801
0.801
7.364999999999999
Games played: 1000, # of wins: 807
0.807
8.171999999999999
Win percentage: 81.72%


### Q-Value 30%

In [70]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Q_Current']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 7/10, random_state = 0)
mlp_uniform_q_30 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_q_30, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_q_30 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_q_30



42.6

In [71]:
mlp_uniform_q_30.fit(X_train.values,y_train.values.ravel())



In [72]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_q_30, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 856
0.856
0.856
Games played: 1000, # of wins: 846
0.846
1.702
Games played: 1000, # of wins: 857
0.857
2.559
Games played: 1000, # of wins: 860
0.86
3.419
Games played: 1000, # of wins: 852
0.852
4.271
Games played: 1000, # of wins: 867
0.867
5.138
Games played: 1000, # of wins: 859
0.859
5.997
Games played: 1000, # of wins: 844
0.844
6.841
Games played: 1000, # of wins: 870
0.87
7.711
Games played: 1000, # of wins: 855
0.855
8.566
Win percentage: 85.66%


## 12. MLP performance on 20% of the dataset <a id = "mlpuniform_20"> 

### Bellman Operator 20%

In [73]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Bellman_Operator']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 8/10, random_state = 0)
mlp_uniform_bo_20 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_bo_20, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_bo_20 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_bo_20

31.869999999999997

In [74]:
mlp_uniform_bo_20.fit(X_train.values,y_train.values.ravel())

In [75]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_bo_20, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 788
0.788
0.788
Games played: 1000, # of wins: 795
0.795
1.5830000000000002
Games played: 1000, # of wins: 772
0.772
2.3550000000000004
Games played: 1000, # of wins: 781
0.781
3.1360000000000006
Games played: 1000, # of wins: 764
0.764
3.9000000000000004
Games played: 1000, # of wins: 784
0.784
4.684
Games played: 1000, # of wins: 787
0.787
5.471
Games played: 1000, # of wins: 787
0.787
6.258
Games played: 1000, # of wins: 787
0.787
7.045
Games played: 1000, # of wins: 761
0.761
7.806
Win percentage: 78.06%


### Q-Value 20%

In [76]:
X = df_uni[['Player','Pit', 'Goal', 'Wall']]
y = df_uni[['Q_Current']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 8/10, random_state = 0)
mlp_uniform_q_20 = MLPRegressor(hidden_layer_sizes=(200,120), max_iter=300, activation = 'relu', solver='adam', random_state=123)
scores = cross_validate(mlp_uniform_q_20, X_train.values,y_train.values.ravel(), scoring=score, cv=5, return_train_score=True)
SMAPE_mlp_uniform_q_20 = np.mean(scores['test_score'])
SMAPE_mlp_uniform_q_20

44.742

In [77]:
mlp_uniform_q_20.fit(X_train.values,y_train.values.ravel())



In [78]:
win_num = 0
for i in range(0,10):
    max_games = 1000
    wins = 0
    for i in range(max_games):
        win = gridworld_test_model(mlp_uniform_q_20, 'random', display = False)
        if win:
            wins += 1
    win_perc = float(wins) / float(max_games)
    win_num = win_num + win_perc
    print("Games played: {0}, # of wins: {1}".format(max_games,wins))
    print(win_perc)
    print(win_num)
win_num = win_num / 10
print("Win percentage: {}%".format(win_num*100))

Games played: 1000, # of wins: 836
0.836
0.836
Games played: 1000, # of wins: 863
0.863
1.6989999999999998
Games played: 1000, # of wins: 820
0.82
2.5189999999999997
Games played: 1000, # of wins: 846
0.846
3.3649999999999998
Games played: 1000, # of wins: 855
0.855
4.22
Games played: 1000, # of wins: 850
0.85
5.069999999999999
Games played: 1000, # of wins: 838
0.838
5.9079999999999995
Games played: 1000, # of wins: 848
0.848
6.755999999999999
Games played: 1000, # of wins: 834
0.834
7.589999999999999
Games played: 1000, # of wins: 867
0.867
8.456999999999999
Win percentage: 84.57%
