# Dataset_Preprocessing

**Content**:
1. [Libraries](#libraries)
2. [Gridworld Creation](#gridworld)
3. [Functions](#funct)
4. [Uniform Dataset Preprocessing](#uniformtransformations)
5. [NonUniform Dataset Preprocessing](#nonuniformtransformations)

## 1. Libraries <a id = "libraries"> 

In [2]:
import numpy as np
import random
import sys
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.neural_network import MLPRegressor
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

## 2. Gridworld Creation <a id = "gridworld">

In [3]:
def randPair(s,e):
    return np.random.randint(s,e), np.random.randint(s,e)

class BoardPiece:

    def __init__(self, name, code, pos):
        self.name = name #name of the piece
        self.code = code #an ASCII character to display on the board
        self.pos = pos #2-tuple e.g. (1,4)

class BoardMask:

    def __init__(self, name, mask, code):
        self.name = name
        self.mask = mask
        self.code = code

    def get_positions(self): #returns tuple of arrays
        return np.nonzero(self.mask)

def zip_positions2d(positions): #positions is tuple of two arrays
    x,y = positions
    return list(zip(x,y))

class GridBoard:

    def __init__(self, size=4):
        self.size = size #Board dimensions, e.g. 4 x 4
        self.components = {} #name : board piece
        self.masks = {}

    def addPiece(self, name, code, pos=(0,0)):
        newPiece = BoardPiece(name, code, pos)
        self.components[name] = newPiece

    #basically a set of boundary elements
    def addMask(self, name, mask, code):
        #mask is a 2D-numpy array with 1s where the boundary elements are
        newMask = BoardMask(name, mask, code)
        self.masks[name] = newMask

    def movePiece(self, name, pos):
        move = True
        for _, mask in self.masks.items():
            if pos in zip_positions2d(mask.get_positions()):
                move = False
        if move:
            self.components[name].pos = pos

    def delPiece(self, name):
        del self.components['name']

    def render(self):
        dtype = '<U2'
        displ_board = np.zeros((self.size, self.size), dtype=dtype)
        displ_board[:] = ' '

        for name, piece in self.components.items():
            displ_board[piece.pos] = piece.code

        for name, mask in self.masks.items():
            displ_board[mask.get_positions()] = mask.code

        return displ_board

    def render_np(self):
        num_pieces = len(self.components) + len(self.masks)
        displ_board = np.zeros((num_pieces, self.size, self.size), dtype=np.uint8)
        layer = 0
        for name, piece in self.components.items():
            pos = (layer,) + piece.pos
            displ_board[pos] = 1
            layer += 1

        for name, mask in self.masks.items():
            x,y = self.masks['boundary'].get_positions()
            z = np.repeat(layer,len(x))
            a = (z,x,y)
            displ_board[a] = 1
            layer += 1
        return displ_board

def addTuple(a,b):
    return tuple([sum(x) for x in zip(a,b)])

In [4]:
class Gridworld:

    def __init__(self, size=4, mode='static'):
        if size >= 4:
            self.board = GridBoard(size=size)
        else:
            print("Minimum board size is 4. Initialized to size 4.")
            self.board = GridBoard(size=4)

        #Add pieces, positions will be updated later
        self.board.addPiece('Player','P',(0,0))
        self.board.addPiece('Goal','+',(0,3))
        self.board.addPiece('Pit','-',(0,3))
        self.board.addPiece('Wall','W',(2,3))

        if mode == 'static':
            self.initGridStatic()
        elif mode == 'player':
            self.initGridPlayer()
        else:
            self.initGridRand()

    #Initialize stationary grid, all items are placed deterministically
    def initGridStatic(self):
        #Setup static pieces
        self.board.components['Player'].pos = randPair(0,self.board.size) #Row, Column
        self.board.components['Goal'].pos = (3,0)
        self.board.components['Pit'].pos = (3,1)
        self.board.components['Wall'].pos = (3,2)
        
        if (not self.validateBoard()):
            #print('Invalid grid. Rebuilding..')
            self.initGridStatic()


    #Check if board is initialized appropriately (no overlapping pieces)
    #also remove impossible-to-win boards
    def validateBoard(self):
        valid = True

        player = self.board.components['Player']
        goal = self.board.components['Goal']
        wall = self.board.components['Wall']
        pit = self.board.components['Pit']

        all_positions = [piece for name,piece in self.board.components.items()]
        all_positions = [player.pos, goal.pos, wall.pos, pit.pos]
        if len(all_positions) > len(set(all_positions)):
            return False

        corners = [(0,0),(0,self.board.size), (self.board.size,0), (self.board.size,self.board.size)]
        #if player is in corner, can it move? if goal is in corner, is it blocked?
        if player.pos in corners or goal.pos in corners:
            val_move_pl = [self.validateMove('Player', addpos) for addpos in [(0,1),(1,0),(-1,0),(0,-1)]]
            val_move_go = [self.validateMove('Goal', addpos) for addpos in [(0,1),(1,0),(-1,0),(0,-1)]]
            if 0 not in val_move_pl or 0 not in val_move_go:
                #print(self.display())
                #print("Invalid board. Re-initializing...")
                valid = False

        return valid

    #Initialize player in random location, but keep wall, goal and pit stationary
    def initGridPlayer(self):
        #height x width x depth (number of pieces)
        self.initGridStatic()
        #place player
        self.board.components['Player'].pos = randPair(0,self.board.size)

        if (not self.validateBoard()):
            #print('Invalid grid. Rebuilding..')
            self.initGridPlayer()

    #Initialize grid so that goal, pit, wall, player are all randomly placed
    def initGridRand(self):
        #height x width x depth (number of pieces)
        self.board.components['Player'].pos = randPair(0,self.board.size)
        self.board.components['Goal'].pos = randPair(0,self.board.size)
        self.board.components['Pit'].pos = randPair(0,self.board.size)
        self.board.components['Wall'].pos = randPair(0,self.board.size)

        if (not self.validateBoard()):
            #print('Invalid grid. Rebuilding..')
            self.initGridRand()

    def validateMove(self, piece, addpos=(0,0)):
        outcome = 0 #0 is valid, 1 invalid, 2 lost game
        pit = self.board.components['Pit'].pos
        wall = self.board.components['Wall'].pos
        new_pos = addTuple(self.board.components[piece].pos, addpos)
        if new_pos == wall:
            outcome = 1 #block move, player can't move to wall
        elif max(new_pos) > (self.board.size-1):    #if outside bounds of board
            outcome = 1
        elif min(new_pos) < 0: #if outside bounds
            outcome = 1
        elif new_pos == pit:
            outcome = 2

        return outcome

    def makeMove(self, action):
        #need to determine what object (if any) is in the new grid spot the player is moving to
        #actions in {u,d,l,r}
        def checkMove(addpos):
            if self.validateMove('Player', addpos) in [0,2]:
                new_pos = addTuple(self.board.components['Player'].pos, addpos)
                self.board.movePiece('Player', new_pos)

        if action == 'u': #up
            checkMove((-1,0))
        elif action == 'd': #down
            checkMove((1,0))
        elif action == 'l': #left
            checkMove((0,-1))
        elif action == 'r': #right
            checkMove((0,1))
        else:
            pass

    def reward(self):
        if (self.board.components['Player'].pos == self.board.components['Pit'].pos):
            return -10
        elif (self.board.components['Player'].pos == self.board.components['Goal'].pos):
            return 10
        else:
            return -1

    def display(self):
        return self.board.render()

In [5]:
action_set = {
    0: 'u',
    1: 'd',
    2: 'l',
    3: 'r',
}

## 3. Functions <a id = 'funct'> 

In [6]:
"""
from2dto1d:
    Args: 
        pos(tuple):2d position of the objects(Player, Wall, Goal, Pit) in the gridworld
    Return:
        return(integer): 1d position of the objects(Player, Wall, Goal, Pit)in the gridworld
""" 
def from2dto1d(pos):
    if pos == '(0, 0)':
        return 0
    if pos == '(0, 1)':
        return 1
    if pos == '(0, 2)':
        return 2
    if pos == '(0, 3)':
        return 3
    if pos == '(0, 4)':
        return 4
    
    if pos == '(1, 0)':
        return 5
    if pos == '(1, 1)':
        return 6
    if pos == '(1, 2)':
        return 7
    if pos == '(1, 3)':
        return 8
    if pos == '(1, 4)':
        return 9
    
    if pos == '(2, 0)':
        return 10
    if pos == '(2, 1)':
        return 11
    if pos == '(2, 2)':
        return 12
    if pos == '(2, 3)':
        return 13
    if pos == '(2, 4)':
        return 14   
    
    if pos == '(3, 0)':
        return 15
    if pos == '(3, 1)':
        return 16
    if pos == '(3, 2)':
        return 17
    if pos == '(3, 3)':
        return 18
    if pos == '(3, 4)':
        return 19
    
    if pos == '(4, 0)':
        return 20
    if pos == '(4, 1)':
        return 21
    if pos == '(4, 2)':
        return 22
    if pos == '(4, 3)':
        return 23
    if pos == '(4, 4)':
        return 24

In [7]:
def from1dto2d(pos):
    if pos == 0:
        return (0, 0)
    if pos == 1:
        return (0, 1)
    if pos == 2:
        return (0, 2)
    if pos == 3:
        return (0, 3)
    if pos == 4:
        return (0, 4)  
    
    if pos == 5:
        return (1, 0)
    if pos == 6:
        return (1, 1)
    if pos == 7:
        return (1, 2)
    if pos == 8:
        return (1, 3)
    if pos == 9:
        return (1, 4)
    
    if pos == 10:
        return (2, 0)
    if pos == 11:
        return (2, 1)
    if pos == 12:
        return (2, 2)
    if pos == 13:
        return (2, 3)
    if pos == 14:
        return (2, 4)    

    if pos == 15:
        return (3, 0)
    if pos == 16:
        return (3, 1)
    if pos == 17:
        return (3, 2)
    if pos == 18:
        return (3, 3)
    if pos == 19:
        return (3, 4)    
    
    if pos == 20:
        return (4, 0)
    if pos == 21:
        return (4, 1)
    if pos == 22:
        return (4, 2)
    if pos == 23:
        return (4, 3)
    if pos == 24:
        return (4, 4)  

## 4. Uniform Dataset Preprocessing <a id = "uniformtransformations">

In [36]:
first_half_df = pd.read_csv('FirstHalf_DatasetBeforeUniform_5x5.csv')

# Read the second half CSV file into a DataFrame
second_half_df = pd.read_csv('SecondHalf_DatasetBeforeUniform_5x5.csv')

# Concatenate the two DataFrames
df = pd.concat([first_half_df, second_half_df], ignore_index=True)
df.drop('Unnamed: 0', axis=1, inplace=True)   
df

Unnamed: 0,Current_State,Pit_Position,Goal_Position,Wall_Position,Current_Reward,Q1_UP,Q2_DOWN,Q3_LEFT,Q4_RIGHT
0,0,0,2,1,-10,3.927775,3.646056,3.872293,5.228002
1,2,0,2,1,10,6.812043,6.547123,5.895384,5.198839
2,3,0,2,1,-1,7.362110,5.858342,9.420895,5.408645
3,4,0,2,1,-1,6.174321,5.829628,7.382860,5.810140
4,5,0,2,1,-1,4.099013,2.561610,3.525271,4.972821
...,...,...,...,...,...,...,...,...,...
331195,19,24,22,23,-1,1.985003,3.467614,5.011548,2.263311
331196,20,24,22,23,-1,4.507396,5.186273,5.852918,5.792179
331197,21,24,22,23,-1,5.626455,6.198049,6.237135,8.608895
331198,22,24,22,23,10,5.773876,6.352415,5.349704,4.008237


In [9]:
print(df.head(30))

    Current_State  Pit_Position  Goal_Position  Wall_Position  Current_Reward  \
0               0             0              2              1             -10   
1               2             0              2              1              10   
2               3             0              2              1              -1   
3               4             0              2              1              -1   
4               5             0              2              1              -1   
5               6             0              2              1              -1   
6               7             0              2              1              -1   
7               8             0              2              1              -1   
8               9             0              2              1              -1   
9              10             0              2              1              -1   
10             11             0              2              1              -1   
11             12           

In [10]:
game = Gridworld(size=5, mode='random')


### Rearrangement of dataset

In [11]:
counter = 0

h = 0
w, h = 6, 3000000
matrix = [[0 for x in range(w)] for y in range(h)] 

for i in range (0, df.shape[0]):
    game = Gridworld(size=5, mode='random')
    game.board.components['Player'].pos = from1dto2d(df["Current_State"][i])
    game.board.components['Goal'].pos = from1dto2d(df["Goal_Position"][i])
    game.board.components['Wall'].pos = from1dto2d(df["Wall_Position"][i])
    game.board.components['Pit'].pos = from1dto2d(df["Pit_Position"][i])
    
    pi = from2dto1d((str(game.board.components['Player'].pos)))
    game.makeMove('u')
    p = from2dto1d((str(game.board.components['Player'].pos)))
    if p == pi:
        pass
    else:
        matrix[counter][0] = from2dto1d((str(game.board.components['Player'].pos)))
        matrix[counter][1] = from2dto1d((str(game.board.components['Goal'].pos)))
        matrix[counter][2] = from2dto1d((str(game.board.components['Wall'].pos)))
        matrix[counter][3] = from2dto1d((str(game.board.components['Pit'].pos)))
        matrix[counter][4] = df["Q1_UP"][i]
        reward = game.reward()
        matrix[counter][5] = reward
        game.makeMove('d')
        counter += 1 
        
    pi = from2dto1d((str(game.board.components['Player'].pos)))    
    game.makeMove('d')
    p = from2dto1d((str(game.board.components['Player'].pos)))
    if p == pi:
        pass
    else:
        matrix[counter][0] = from2dto1d((str(game.board.components['Player'].pos)))
        matrix[counter][1] = from2dto1d((str(game.board.components['Goal'].pos)))
        matrix[counter][2] = from2dto1d((str(game.board.components['Wall'].pos)))
        matrix[counter][3] = from2dto1d((str(game.board.components['Pit'].pos)))
        matrix[counter][4] = df["Q2_DOWN"][i]
        reward = game.reward()
        matrix[counter][5] = reward
        game.makeMove('u')
        counter += 1 
        
    pi = from2dto1d((str(game.board.components['Player'].pos)))    
    game.makeMove('l')
    p = from2dto1d((str(game.board.components['Player'].pos)))
    if p == pi:
        pass
    else:
        matrix[counter][0] = from2dto1d((str(game.board.components['Player'].pos)))
        matrix[counter][1] = from2dto1d((str(game.board.components['Goal'].pos)))
        matrix[counter][2] = from2dto1d((str(game.board.components['Wall'].pos)))
        matrix[counter][3] = from2dto1d((str(game.board.components['Pit'].pos)))
        matrix[counter][4] = df["Q3_LEFT"][i]
        reward = game.reward()
        matrix[counter][5] = reward
        game.makeMove('r')
        counter += 1 
        
    pi = from2dto1d((str(game.board.components['Player'].pos)))   
    
    game.makeMove('r')
    p = from2dto1d((str(game.board.components['Player'].pos)))
    if p == pi:
        pass
    else:
        matrix[counter][0] = from2dto1d((str(game.board.components['Player'].pos)))
        matrix[counter][1] = from2dto1d((str(game.board.components['Goal'].pos)))
        matrix[counter][2] = from2dto1d((str(game.board.components['Wall'].pos)))
        matrix[counter][3] = from2dto1d((str(game.board.components['Pit'].pos)))
        matrix[counter][4] = df["Q4_RIGHT"][i]
        reward = game.reward()
        matrix[counter][5] = reward
        game.makeMove('l')
        counter += 1 

In [12]:
column_names = ["Player",  "Goal", "Wall", "Pit", "Q_value", "Reward"]
df_matrix = pd.DataFrame(matrix, columns = column_names)
df_matrix = df_matrix.loc[(df_matrix != 0).any(axis=1)]
df_matrix

Unnamed: 0,Player,Goal,Wall,Pit,Q_value,Reward
0,5,2,1,0,3.646056,-1
1,7,2,1,0,6.547123,-1
2,3,2,1,0,5.198839,-1
3,8,2,1,0,5.858342,-1
4,2,2,1,0,9.420895,10
...,...,...,...,...,...,...
1015675,20,22,23,24,6.237135,-1
1015676,22,22,23,24,8.608895,10
1015677,17,22,23,24,5.773876,-1
1015678,21,22,23,24,5.349704,-1


In [13]:
# Drop duplicate positions
df_new = df_matrix.drop_duplicates(keep='last',subset=[ 'Player', 'Goal', 'Wall', 'Pit'])
df_new = df_new.reset_index()
df_new.drop('index', axis=1, inplace=True)   
df_new

Unnamed: 0,Player,Goal,Wall,Pit,Q_value,Reward
0,0,2,1,0,4.099013,-10
1,2,2,1,0,8.792521,10
2,3,2,1,0,8.045016,-1
3,4,2,1,0,5.620292,-1
4,5,2,1,0,3.845427,-1
...,...,...,...,...,...,...
331195,20,22,23,24,6.237135,-1
331196,22,22,23,24,8.608895,10
331197,17,22,23,24,5.773876,-1
331198,21,22,23,24,5.349704,-1


In [14]:
print(df_new.head(30))

    Player  Goal  Wall  Pit   Q_value  Reward
0        0     2     1    0  4.099013     -10
1        2     2     1    0  8.792521      10
2        3     2     1    0  8.045016      -1
3        4     2     1    0  5.620292      -1
4        5     2     1    0  3.845427      -1
5        6     2     1    0  4.549308      -1
6        7     2     1    0  6.222810      -1
7        8     2     1    0  5.309031      -1
8        9     2     1    0  4.086766      -1
9       10     2     1    0  2.337399      -1
10      11     2     1    0  3.589457      -1
11      12     2     1    0  4.660168      -1
12      13     2     1    0  4.059385      -1
13      14     2     1    0  3.268758      -1
14      15     2     1    0  2.105637      -1
15      16     2     1    0  2.167912      -1
16      20     2     1    0  0.883606      -1
17      17     2     1    0  3.684138      -1
18      21     2     1    0  0.889759      -1
19      18     2     1    0  2.945780      -1
20      22     2     1    0  2.173

### Bellman Operator Dataset Creation

In [15]:
no_dup_df = df_new
no_dup_df

Unnamed: 0,Player,Goal,Wall,Pit,Q_value,Reward
0,0,2,1,0,4.099013,-10
1,2,2,1,0,8.792521,10
2,3,2,1,0,8.045016,-1
3,4,2,1,0,5.620292,-1
4,5,2,1,0,3.845427,-1
...,...,...,...,...,...,...
331195,20,22,23,24,6.237135,-1
331196,22,22,23,24,8.608895,10
331197,17,22,23,24,5.773876,-1
331198,21,22,23,24,5.349704,-1


In [16]:
no_dup_df.dtypes

Player       int64
Goal         int64
Wall         int64
Pit          int64
Q_value    float64
Reward       int64
dtype: object

In [17]:
no_dup_df['Player'] = no_dup_df['Player'].astype(int)
no_dup_df['Goal'] = no_dup_df['Goal'].astype(int)
no_dup_df['Wall'] = no_dup_df['Wall'].astype(int)
no_dup_df['Pit'] = no_dup_df['Pit'].astype(int)

In [18]:
no_dup_df.dtypes

Player       int32
Goal         int32
Wall         int32
Pit          int32
Q_value    float64
Reward       int64
dtype: object

In [19]:
counter = 0

h = 0
w, h = 7, 500000
matrix = [[0 for x in range(w)] for y in range(h)] 

for i in range (0, no_dup_df.shape[0]):
    game = Gridworld(size=5, mode='random')
    game.board.components['Player'].pos = from1dto2d(no_dup_df["Player"][i])
    game.board.components['Goal'].pos = from1dto2d(no_dup_df["Goal"][i])
    game.board.components['Wall'].pos = from1dto2d(no_dup_df["Wall"][i])
    game.board.components['Pit'].pos = from1dto2d(no_dup_df["Pit"][i])
    
    q_value = []
    reward_arr = []
    
    pi = from2dto1d((str(game.board.components['Player'].pos)))
    
    goal = int(no_dup_df["Goal"][i])
    pit = int(no_dup_df["Pit"][i])
    wall = int(no_dup_df["Wall"][i])
    
    matrix[counter][0] = pi
    matrix[counter][1] = no_dup_df["Goal"][i]
    matrix[counter][2] = no_dup_df["Wall"][i]
    matrix[counter][3] = no_dup_df["Pit"][i]
    matrix[counter][4] = no_dup_df["Q_value"][i]
    matrix[counter][5] = no_dup_df["Reward"][i]
    
    game.makeMove('u')
    p = from2dto1d((str(game.board.components['Player'].pos)))

    if p == pi:
        q_value.append(-10)
        reward_arr.append(-100)
    else:
        q_value.append(no_dup_df[(no_dup_df["Player"] == int(p)) & (no_dup_df["Goal"] == goal)  & (no_dup_df["Wall"] == wall) & (no_dup_df["Pit"] == pit)]['Q_value'].values[0])
        reward = game.reward()
        reward_arr.append(reward)
        game.makeMove('d')
        
        
    pi = from2dto1d((str(game.board.components['Player'].pos)))    
    game.makeMove('d')
    p = from2dto1d((str(game.board.components['Player'].pos)))
            
    if p == pi:
        q_value.append(-10)
        reward_arr.append(-100)
    else:
        q_value.append(no_dup_df[(no_dup_df["Player"] == p) & (no_dup_df["Goal"] == goal)  & (no_dup_df["Wall"] == wall) & (no_dup_df["Pit"] == pit)]['Q_value'].values[0])
        reward = game.reward()
        reward_arr.append(reward)
        game.makeMove('u')
        
        
    pi = from2dto1d((str(game.board.components['Player'].pos)))    
    game.makeMove('l')
    p = from2dto1d((str(game.board.components['Player'].pos)))
            
    if p == pi:
        q_value.append(-10)
        reward_arr.append(-100)
    else:
        q_value.append(no_dup_df[(no_dup_df["Player"] == p) & (no_dup_df["Goal"] == goal)  & (no_dup_df["Wall"] == wall) & (no_dup_df["Pit"] == pit)]['Q_value'].values[0])
        reward = game.reward()
        reward_arr.append(reward)
        game.makeMove('r')
        
        
    pi = from2dto1d((str(game.board.components['Player'].pos)))       
    game.makeMove('r')
    p = from2dto1d((str(game.board.components['Player'].pos)))
            
    if p == pi:
        q_value.append(-10)
        reward_arr.append(-100)
    else:
        q_value.append(no_dup_df[(no_dup_df["Player"] == p) & (no_dup_df["Goal"] == goal)  & (no_dup_df["Wall"] == wall) & (no_dup_df["Pit"] == pit)]['Q_value'].values[0])
        reward = game.reward()
        reward_arr.append(reward)
        game.makeMove('l')
        
    
    max_qvalue = q_value[np.argmax(q_value)]
    reward_max = reward_arr[np.argmax(q_value)]
    bellman = max_qvalue*0.9 + reward_max
    if pi == goal or bellman > 10:
        matrix[counter][6] = 10
    else:
        matrix[counter][6] = bellman
    counter += 1

In [20]:
import pandas as pd
column_names = ["Player","Goal", "Wall", "Pit", "Q_Current", "Reward_Current", "Bellman_Operator"
                ]
bell_df = pd.DataFrame(matrix, columns = column_names)
bell_df = bell_df.loc[(bell_df != 0).any(axis=1)]
bell_df

Unnamed: 0,Player,Goal,Wall,Pit,Q_Current,Reward_Current,Bellman_Operator
0,0,2,1,0,4.099013,-10,2.460884
1,2,2,1,0,8.792521,10,10.000000
2,3,2,1,0,8.045016,-1,10.000000
3,4,2,1,0,5.620292,-1,6.240515
4,5,2,1,0,3.845427,-1,3.094377
...,...,...,...,...,...,...,...
331195,20,22,23,24,6.237135,-1,3.814733
331196,22,22,23,24,8.608895,10,10.000000
331197,17,22,23,24,5.773876,-1,10.000000
331198,21,22,23,24,5.349704,-1,10.000000


In [21]:
bell_df.to_csv('DatasetAfterUniform_5x5.csv')

## 5. NonUniform Dataset Preprocessing <a id = "nonuniformtransformations">

In [22]:
df = pd.read_csv("DatasetBeforeNonUniform_5x5.csv")
df.drop('Unnamed: 0', axis=1, inplace=True)   
df

Unnamed: 0,Epochs,Player,Pit,Goal,Wall,Action,Reward,Next_State,Epsilon_Boolean,Q_MAX,Q1_UP,Q2_DOWN,Q3_LEFT,Q4_RIGHT,Q1_UP_NEXT,Q2_DOWN_NEXT,Q3_LEFT_NEXT,Q4_RIGHT_NEXT,Q_MAX_NEXT,Y
0,5000,19,24,4,5,2,-1,14,0,3.887865,3.887865,-0.486280,2.813664,3.421544,7.605936,4.846059,4.925508,6.077425,7.605936,5.845342
1,5000,14,24,4,5,0,-1,19,1,7.605936,7.605936,4.846059,4.925508,6.077425,3.739689,-0.644037,2.737098,3.316278,3.739689,2.365720
2,5000,19,24,4,5,1,-1,14,0,3.739689,3.739689,-0.644037,2.737098,3.316278,7.630964,4.795336,4.977115,6.108534,7.630964,5.867868
3,5000,14,24,4,5,0,-1,19,1,7.630964,7.630964,4.795336,4.977115,6.108534,3.916322,-0.481159,2.877255,3.461674,3.916322,2.524690
4,5000,19,24,4,5,1,-1,14,0,3.916322,3.916322,-0.481159,2.877255,3.461674,7.804955,4.973315,5.047181,6.210913,7.804955,6.024459
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28614,9999,13,18,6,7,2,-1,12,0,6.147482,5.289940,1.652240,6.147482,2.391237,6.785432,5.187898,7.750307,2.204850,7.750307,5.975276
28615,9999,12,18,6,7,2,-1,11,0,7.720791,6.782494,5.184531,7.720791,2.209640,9.893531,6.753792,6.625166,3.725423,9.893531,7.904178
28616,9999,11,18,6,7,2,-1,16,1,9.930336,9.930336,6.774661,6.593018,3.762311,8.231075,5.852197,3.708828,2.563195,8.231075,6.407968
28617,9999,16,18,6,7,1,-1,11,0,8.250247,8.250247,5.844639,3.707485,2.580454,10.058926,6.813267,6.665042,3.869955,10.058926,8.053033


In [23]:
df_d = df[df["Epsilon_Boolean"] == 0]
df_d

Unnamed: 0,Epochs,Player,Pit,Goal,Wall,Action,Reward,Next_State,Epsilon_Boolean,Q_MAX,Q1_UP,Q2_DOWN,Q3_LEFT,Q4_RIGHT,Q1_UP_NEXT,Q2_DOWN_NEXT,Q3_LEFT_NEXT,Q4_RIGHT_NEXT,Q_MAX_NEXT,Y
0,5000,19,24,4,5,2,-1,14,0,3.887865,3.887865,-0.486280,2.813664,3.421544,7.605936,4.846059,4.925508,6.077425,7.605936,5.845342
2,5000,19,24,4,5,1,-1,14,0,3.739689,3.739689,-0.644037,2.737098,3.316278,7.630964,4.795336,4.977115,6.108534,7.630964,5.867868
4,5000,19,24,4,5,1,-1,14,0,3.916322,3.916322,-0.481159,2.877255,3.461674,7.804955,4.973315,5.047181,6.210913,7.804955,6.024459
5,5000,14,24,4,5,0,-1,9,0,7.804955,7.804955,4.973315,5.047181,6.210913,9.886381,8.101170,7.742295,8.966203,9.886381,7.897743
6,5000,9,24,4,5,0,10,4,0,9.886381,9.886381,8.101170,7.742295,8.966203,7.967618,7.830739,7.107147,8.282929,8.282929,10.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28613,9999,14,18,6,7,1,-1,13,0,4.186839,2.521012,1.932748,4.186839,2.155245,5.311069,1.664888,6.175737,2.385941,6.175737,4.558164
28614,9999,13,18,6,7,2,-1,12,0,6.147482,5.289940,1.652240,6.147482,2.391237,6.785432,5.187898,7.750307,2.204850,7.750307,5.975276
28615,9999,12,18,6,7,2,-1,11,0,7.720791,6.782494,5.184531,7.720791,2.209640,9.893531,6.753792,6.625166,3.725423,9.893531,7.904178
28617,9999,16,18,6,7,1,-1,11,0,8.250247,8.250247,5.844639,3.707485,2.580454,10.058926,6.813267,6.665042,3.869955,10.058926,8.053033


In [24]:
# Drop duplicate positions
df_new = df.drop_duplicates(keep='last',subset=['Player', 'Goal', 'Wall', 'Pit'])
df_new = df_new[df_new["Epsilon_Boolean"] == 0]
df_new = df_new.reset_index()
df_new.drop('index', axis=1, inplace=True) 
df_new

Unnamed: 0,Epochs,Player,Pit,Goal,Wall,Action,Reward,Next_State,Epsilon_Boolean,Q_MAX,Q1_UP,Q2_DOWN,Q3_LEFT,Q4_RIGHT,Q1_UP_NEXT,Q2_DOWN_NEXT,Q3_LEFT_NEXT,Q4_RIGHT_NEXT,Q_MAX_NEXT,Y
0,5000,19,24,4,5,1,-1,14,0,3.916322,3.916322,-0.481159,2.877255,3.461674,7.804955,4.973315,5.047181,6.210913,7.804955,6.024459
1,5000,14,24,4,5,0,-1,9,0,7.804955,7.804955,4.973315,5.047181,6.210913,9.886381,8.101170,7.742295,8.966203,9.886381,7.897743
2,5000,9,24,4,5,0,10,4,0,9.886381,9.886381,8.101170,7.742295,8.966203,7.967618,7.830739,7.107147,8.282929,8.282929,10.000000
3,5001,20,6,12,11,1,-1,21,0,4.233319,3.634650,1.733565,1.231143,4.233319,5.754626,3.766642,1.716758,6.300687,6.300687,4.670619
4,5001,21,6,12,11,3,-1,22,0,6.300687,5.754626,3.766642,1.716758,6.300687,8.782804,3.751132,3.645004,5.736999,8.782804,6.904524
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14991,9999,14,18,6,7,1,-1,13,0,4.186839,2.521012,1.932748,4.186839,2.155245,5.311069,1.664888,6.175737,2.385941,6.175737,4.558164
14992,9999,13,18,6,7,2,-1,12,0,6.147482,5.289940,1.652240,6.147482,2.391237,6.785432,5.187898,7.750307,2.204850,7.750307,5.975276
14993,9999,12,18,6,7,2,-1,11,0,7.720791,6.782494,5.184531,7.720791,2.209640,9.893531,6.753792,6.625166,3.725423,9.893531,7.904178
14994,9999,16,18,6,7,1,-1,11,0,8.250247,8.250247,5.844639,3.707485,2.580454,10.058926,6.813267,6.665042,3.869955,10.058926,8.053033


In [25]:
counter = 0

h = 0
w, h = 6, 3000000
matrix = [[0 for x in range(w)] for y in range(h)] 

for i in range (0, df_new.shape[0]):
    game = Gridworld(size=5, mode='random')
    game.board.components['Player'].pos = from1dto2d(df_new["Player"][i])
    game.board.components['Goal'].pos = from1dto2d(df_new["Goal"][i])
    game.board.components['Wall'].pos = from1dto2d(df_new["Wall"][i])
    game.board.components['Pit'].pos = from1dto2d(df_new["Pit"][i])
    
    pi = from2dto1d((str(game.board.components['Player'].pos)))
    game.makeMove('u')
    p = from2dto1d((str(game.board.components['Player'].pos)))
    if p == pi:
        pass
    else:
        matrix[counter][0] = from2dto1d((str(game.board.components['Player'].pos)))
        matrix[counter][1] = from2dto1d((str(game.board.components['Goal'].pos)))
        matrix[counter][2] = from2dto1d((str(game.board.components['Wall'].pos)))
        matrix[counter][3] = from2dto1d((str(game.board.components['Pit'].pos)))
        matrix[counter][4] = df_new["Q1_UP"][i]
        reward = game.reward()
        matrix[counter][5] = reward
        game.makeMove('d')
        counter += 1 
        
    pi = from2dto1d((str(game.board.components['Player'].pos)))    
    game.makeMove('d')
    p = from2dto1d((str(game.board.components['Player'].pos)))
    if p == pi:
        pass
    else:
        matrix[counter][0] = from2dto1d((str(game.board.components['Player'].pos)))
        matrix[counter][1] = from2dto1d((str(game.board.components['Goal'].pos)))
        matrix[counter][2] = from2dto1d((str(game.board.components['Wall'].pos)))
        matrix[counter][3] = from2dto1d((str(game.board.components['Pit'].pos)))
        matrix[counter][4] = df_new["Q2_DOWN"][i]
        reward = game.reward()
        matrix[counter][5] = reward
        game.makeMove('u')
        counter += 1 
        
    pi = from2dto1d((str(game.board.components['Player'].pos)))    
    game.makeMove('l')
    p = from2dto1d((str(game.board.components['Player'].pos)))
    if p == pi:
        pass
    else:
        matrix[counter][0] = from2dto1d((str(game.board.components['Player'].pos)))
        matrix[counter][1] = from2dto1d((str(game.board.components['Goal'].pos)))
        matrix[counter][2] = from2dto1d((str(game.board.components['Wall'].pos)))
        matrix[counter][3] = from2dto1d((str(game.board.components['Pit'].pos)))
        matrix[counter][4] = df_new["Q3_LEFT"][i]
        reward = game.reward()
        matrix[counter][5] = reward
        game.makeMove('r')
        counter += 1 
        
    pi = from2dto1d((str(game.board.components['Player'].pos)))   
    
    game.makeMove('r')
    p = from2dto1d((str(game.board.components['Player'].pos)))
    if p == pi:
        pass
    else:
        matrix[counter][0] = from2dto1d((str(game.board.components['Player'].pos)))
        matrix[counter][1] = from2dto1d((str(game.board.components['Goal'].pos)))
        matrix[counter][2] = from2dto1d((str(game.board.components['Wall'].pos)))
        matrix[counter][3] = from2dto1d((str(game.board.components['Pit'].pos)))
        matrix[counter][4] = df_new["Q4_RIGHT"][i]
        reward = game.reward()
        matrix[counter][5] = reward
        game.makeMove('l')
        counter += 1 

In [26]:
column_names = ["Player",  "Goal", "Wall", "Pit", "Q_value", "Reward"]
df_matrix = pd.DataFrame(matrix, columns = column_names)
df_matrix = df_matrix.loc[(df_matrix != 0).any(axis=1)]
df_matrix

Unnamed: 0,Player,Goal,Wall,Pit,Q_value,Reward
0,14,4,5,24,3.916322,-1
1,24,4,5,24,-0.481159,-10
2,18,4,5,24,2.877255,-1
3,9,4,5,24,7.804955,-1
4,19,4,5,24,4.973315,-1
...,...,...,...,...,...,...
50017,17,6,7,18,2.580454,-1
50018,6,6,7,18,10.072153,10
50019,16,6,7,18,6.785964,-1
50020,10,6,7,18,6.687034,-1


In [27]:
# Drop duplicate positions
df_final = df_matrix.drop_duplicates(keep='last',subset=[ 'Player', 'Goal', 'Wall', 'Pit'])
df_final = df_final.reset_index()
df_final.drop('index', axis=1, inplace=True)   
df_final

Unnamed: 0,Player,Goal,Wall,Pit,Q_value,Reward
0,24,4,5,24,-0.481159,-10
1,18,4,5,24,2.877255,-1
2,9,4,5,24,7.804955,-1
3,19,4,5,24,4.973315,-1
4,13,4,5,24,5.047181,-1
...,...,...,...,...,...,...
39208,17,6,7,18,2.580454,-1
39209,6,6,7,18,10.072153,10
39210,16,6,7,18,6.785964,-1
39211,10,6,7,18,6.687034,-1


In [28]:
df_final.Reward.value_counts()

Reward
-1     34281
 10     3576
-10     1356
Name: count, dtype: int64

In [29]:
df_new = df_new[['Player', 'Goal', 'Wall', 'Pit', 'Q1_UP' ,'Q2_DOWN', 'Q3_LEFT' , 'Q4_RIGHT']].copy()
df_new

Unnamed: 0,Player,Goal,Wall,Pit,Q1_UP,Q2_DOWN,Q3_LEFT,Q4_RIGHT
0,19,4,5,24,3.916322,-0.481159,2.877255,3.461674
1,14,4,5,24,7.804955,4.973315,5.047181,6.210913
2,9,4,5,24,9.886381,8.101170,7.742295,8.966203
3,20,12,11,6,3.634650,1.733565,1.231143,4.233319
4,21,12,11,6,5.754626,3.766642,1.716758,6.300687
...,...,...,...,...,...,...,...,...
14991,14,6,7,18,2.521012,1.932748,4.186839,2.155245
14992,13,6,7,18,5.289940,1.652240,6.147482,2.391237
14993,12,6,7,18,6.782494,5.184531,7.720791,2.209640
14994,16,6,7,18,8.250247,5.844639,3.707485,2.580454


In [30]:
joined_df = pd.merge(df_new, df_final[['Player', 'Goal', 'Wall', 'Pit', 'Q_value', 'Reward']], on=['Player', 'Goal', 'Wall', 'Pit'], how='left')
joined_df = joined_df.dropna()
joined_df = joined_df.reset_index()
joined_df.drop('index', axis=1, inplace=True)   
joined_df

Unnamed: 0,Player,Goal,Wall,Pit,Q1_UP,Q2_DOWN,Q3_LEFT,Q4_RIGHT,Q_value,Reward
0,19,4,5,24,3.916322,-0.481159,2.877255,3.461674,4.973315,-1.0
1,14,4,5,24,7.804955,4.973315,5.047181,6.210913,8.101170,-1.0
2,9,4,5,24,9.886381,8.101170,7.742295,8.966203,7.804955,-1.0
3,20,12,11,6,3.634650,1.733565,1.231143,4.233319,1.716758,-1.0
4,21,12,11,6,5.754626,3.766642,1.716758,6.300687,4.233319,-1.0
...,...,...,...,...,...,...,...,...,...,...
13165,14,6,7,18,2.521012,1.932748,4.186839,2.155245,2.391237,-1.0
13166,13,6,7,18,5.289940,1.652240,6.147482,2.391237,2.209640,-1.0
13167,12,6,7,18,6.782494,5.184531,7.720791,2.209640,3.876474,-1.0
13168,16,6,7,18,8.250247,5.844639,3.707485,2.580454,6.785964,-1.0


### Bellman Operator Dataset Collection

In [31]:
no_dup_df = joined_df
no_dup_df

Unnamed: 0,Player,Goal,Wall,Pit,Q1_UP,Q2_DOWN,Q3_LEFT,Q4_RIGHT,Q_value,Reward
0,19,4,5,24,3.916322,-0.481159,2.877255,3.461674,4.973315,-1.0
1,14,4,5,24,7.804955,4.973315,5.047181,6.210913,8.101170,-1.0
2,9,4,5,24,9.886381,8.101170,7.742295,8.966203,7.804955,-1.0
3,20,12,11,6,3.634650,1.733565,1.231143,4.233319,1.716758,-1.0
4,21,12,11,6,5.754626,3.766642,1.716758,6.300687,4.233319,-1.0
...,...,...,...,...,...,...,...,...,...,...
13165,14,6,7,18,2.521012,1.932748,4.186839,2.155245,2.391237,-1.0
13166,13,6,7,18,5.289940,1.652240,6.147482,2.391237,2.209640,-1.0
13167,12,6,7,18,6.782494,5.184531,7.720791,2.209640,3.876474,-1.0
13168,16,6,7,18,8.250247,5.844639,3.707485,2.580454,6.785964,-1.0


In [32]:
counter = 0

h = 0
w, h = 7, 300000
matrix = [[0 for x in range(w)] for y in range(h)] 

for i in range (0, no_dup_df.shape[0]):
    game = Gridworld(size=5, mode='random')
    game.board.components['Player'].pos = from1dto2d(no_dup_df["Player"][i])
    game.board.components['Goal'].pos = from1dto2d(no_dup_df["Goal"][i])
    game.board.components['Wall'].pos = from1dto2d(no_dup_df["Wall"][i])
    game.board.components['Pit'].pos = from1dto2d(no_dup_df["Pit"][i])
    
    q_value = []
    reward_arr = []
    
    pi = from2dto1d((str(game.board.components['Player'].pos)))
    
    goal = int(no_dup_df["Goal"][i])
    pit = int(no_dup_df["Pit"][i])
    wall = int(no_dup_df["Wall"][i])
    
    matrix[counter][0] = pi
    matrix[counter][1] = no_dup_df["Goal"][i]
    matrix[counter][2] = no_dup_df["Wall"][i]
    matrix[counter][3] = no_dup_df["Pit"][i]
    matrix[counter][4] = no_dup_df["Q_value"][i]
    matrix[counter][5] = no_dup_df["Reward"][i]
    
    q_value.append(no_dup_df["Q1_UP"][i])
    q_value.append(no_dup_df["Q2_DOWN"][i])
    q_value.append(no_dup_df["Q3_LEFT"][i])
    q_value.append(no_dup_df["Q4_RIGHT"][i])
    
    game.makeMove('u')
    p = from2dto1d((str(game.board.components['Player'].pos)))

    if p == pi:
        reward_arr.append(-100)
    else:
        reward = game.reward()
        reward_arr.append(reward)
        game.makeMove('d')
        
        
    pi = from2dto1d((str(game.board.components['Player'].pos)))    
    game.makeMove('d')
    p = from2dto1d((str(game.board.components['Player'].pos)))
            
    if p == pi:
        reward_arr.append(-100)
    else:
        reward = game.reward()
        reward_arr.append(reward)
        game.makeMove('u')
        
        
    pi = from2dto1d((str(game.board.components['Player'].pos)))    
    game.makeMove('l')
    p = from2dto1d((str(game.board.components['Player'].pos)))
            
    if p == pi:
        reward_arr.append(-100)
    else:
        reward = game.reward()
        reward_arr.append(reward)
        game.makeMove('r')
        
        
    pi = from2dto1d((str(game.board.components['Player'].pos)))       
    game.makeMove('r')
    p = from2dto1d((str(game.board.components['Player'].pos)))
            
    if p == pi:
        reward_arr.append(-100)
    else:
        reward = game.reward()
        reward_arr.append(reward)
        game.makeMove('l')
        
    
    max_qvalue = q_value[np.argmax(q_value)]
    reward_max = reward_arr[np.argmax(q_value)]
    bellman = max_qvalue*0.9 + reward_max
    if pi == goal or bellman > 10:
        matrix[counter][6] = 10
    else:
        matrix[counter][6] = bellman
    counter += 1

In [33]:
import pandas as pd
column_names = ["Player","Goal", "Wall", "Pit", "Q_Current", "Reward_Current", "Bellman_Operator"
                ]
bell_df = pd.DataFrame(matrix, columns = column_names)
bell_df = bell_df.loc[(bell_df != 0).any(axis=1)]
bell_df

Unnamed: 0,Player,Goal,Wall,Pit,Q_Current,Reward_Current,Bellman_Operator
0,19,4,5,24,4.973315,-1.0,2.524690
1,14,4,5,24,8.101170,-1.0,6.024459
2,9,4,5,24,7.804955,-1.0,10.000000
3,20,12,11,6,1.716758,-1.0,2.809987
4,21,12,11,6,4.233319,-1.0,4.670619
...,...,...,...,...,...,...,...
13165,14,6,7,18,2.391237,-1.0,2.768155
13166,13,6,7,18,2.209640,-1.0,4.532734
13167,12,6,7,18,3.876474,-1.0,5.948712
13168,16,6,7,18,6.785964,-1.0,6.425222


In [35]:
bell_df.to_csv('DatasetAfterNonUniform_5x5.csv')