# Problem Description

<img src='images/reward_map.PNG' /></a>

The yellow box is the initial state. The Green box is goal state. The task is creating the most profitable using Q-Learning

In [1]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

## Import the rewardmap data

In [2]:
rw = pd.read_csv('data.txt', delimiter = '\t', header = None)
rw = np.array(rw).tolist()
rw

[[-1, -3, -5, -1, -3, -3, -5, -5, -1, 100],
 [-2, -1, -1, -4, -2, -5, -3, -5, -5, -5],
 [-3, -4, -4, -1, -3, -5, -5, -4, -3, -5],
 [-3, -5, -2, -5, -1, -4, -5, -1, -3, -4],
 [-4, -3, -3, -2, -1, -1, -1, -4, -3, -4],
 [-4, -2, -5, -2, -4, -5, -1, -2, -2, -4],
 [-4, -3, -2, -3, -1, -3, -4, -3, -1, -3],
 [-4, -2, -5, -4, -1, -4, -5, -5, -2, -4],
 [-2, -1, -1, -4, -1, -3, -5, -1, -4, -1],
 [-5, -3, -1, -2, -4, -3, -5, -2, -2, -2]]

In [3]:
def build_R(rw):
    xlen = len(rw)
    ylen = len(rw[0])

    d = pd.DataFrame(columns = ['up', 'right', 'down', 'left', 'this'])
    idx = 0 
    
    for i in range(ylen):
        for j in range(xlen):
            move = [float('-inf'),float('-inf'),float('-inf'),float('-inf'), rw[i][j]]
            if i>0: #up
                move[0] = rw[i-1][j]
            if i<ylen-1: #down
                move[2] = rw[i+1][j]
            if j>0: #left
                move[3] = rw[i][j-1]
            if j<xlen-1: #right
                move[1] = rw[i][j+1]
            d.loc[idx] = move
            idx+=1
    return d
    

In [4]:
R = build_R(rw)
R = np.array(R)
R

array([[ -inf,   -3.,   -2.,  -inf,   -1.],
       [ -inf,   -5.,   -1.,   -1.,   -3.],
       [ -inf,   -1.,   -1.,   -3.,   -5.],
       [ -inf,   -3.,   -4.,   -5.,   -1.],
       [ -inf,   -3.,   -2.,   -1.,   -3.],
       [ -inf,   -5.,   -5.,   -3.,   -3.],
       [ -inf,   -5.,   -3.,   -3.,   -5.],
       [ -inf,   -1.,   -5.,   -5.,   -5.],
       [ -inf,  100.,   -5.,   -5.,   -1.],
       [ -inf,  -inf,   -5.,   -1.,  100.],
       [  -1.,   -1.,   -3.,  -inf,   -2.],
       [  -3.,   -1.,   -4.,   -2.,   -1.],
       [  -5.,   -4.,   -4.,   -1.,   -1.],
       [  -1.,   -2.,   -1.,   -1.,   -4.],
       [  -3.,   -5.,   -3.,   -4.,   -2.],
       [  -3.,   -3.,   -5.,   -2.,   -5.],
       [  -5.,   -5.,   -5.,   -5.,   -3.],
       [  -5.,   -5.,   -4.,   -3.,   -5.],
       [  -1.,   -5.,   -3.,   -5.,   -5.],
       [ 100.,  -inf,   -5.,   -5.,   -5.],
       [  -2.,   -4.,   -3.,  -inf,   -3.],
       [  -1.,   -4.,   -5.,   -3.,   -4.],
       [  -1.,   -1.,   -2.,   -

## Build Q Matrice

In [5]:
Q = np.zeros((100,5))
Q

array([[ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.],
       [ 0.,  

## Build Transition Matrix
format row = 

    [up, right, down, left, none]

In [6]:
def build_trans(rw):
    xlen = len(rw)
    ylen = len(rw[0])
    
    d = {'up':[] ,'right':[], 'down':[], 'left':[]}
    d = pd.DataFrame(columns = ['up', 'right', 'down', 'left', 'none'])
    idx = 0 
    
    for i in range(ylen):
        for j in range(xlen):
            trans = [-1,-1,-1,-1, idx]
            if i>0: #up
                trans[0] = idx-xlen
            if i<ylen-1: #down
                trans[2] = idx+xlen
            if j>0: #left
                trans[3] = idx-1
            if j<xlen-1: #right
                trans[1] = idx+1
            d.loc[idx] = trans
            idx+=1
    return d

In [7]:
trans = build_trans(rw)
trans = np.array(trans)

In [8]:
trans

array([[-1, 1, 10, -1, 0],
       [-1, 2, 11, 0, 1],
       [-1, 3, 12, 1, 2],
       [-1, 4, 13, 2, 3],
       [-1, 5, 14, 3, 4],
       [-1, 6, 15, 4, 5],
       [-1, 7, 16, 5, 6],
       [-1, 8, 17, 6, 7],
       [-1, 9, 18, 7, 8],
       [-1, -1, 19, 8, 9],
       [0, 11, 20, -1, 10],
       [1, 12, 21, 10, 11],
       [2, 13, 22, 11, 12],
       [3, 14, 23, 12, 13],
       [4, 15, 24, 13, 14],
       [5, 16, 25, 14, 15],
       [6, 17, 26, 15, 16],
       [7, 18, 27, 16, 17],
       [8, 19, 28, 17, 18],
       [9, -1, 29, 18, 19],
       [10, 21, 30, -1, 20],
       [11, 22, 31, 20, 21],
       [12, 23, 32, 21, 22],
       [13, 24, 33, 22, 23],
       [14, 25, 34, 23, 24],
       [15, 26, 35, 24, 25],
       [16, 27, 36, 25, 26],
       [17, 28, 37, 26, 27],
       [18, 29, 38, 27, 28],
       [19, -1, 39, 28, 29],
       [20, 31, 40, -1, 30],
       [21, 32, 41, 30, 31],
       [22, 33, 42, 31, 32],
       [23, 34, 43, 32, 33],
       [24, 35, 44, 33, 34],
       [25, 36, 45, 34,

## Build List of Valid Actions for Each States
format : <br>
0: up
1 : right
2 : down
3 : left
4 : none

In [15]:
def build_va(trans):
    va = []
    for i in range(len(trans)):
        current_va = []
        if (trans[i][0] != -1): #up
            current_va.append(0)
        if (trans[i][1] != -1): #right
            current_va.append(1)
        if (trans[i][2] != -1): #down
            current_va.append(2)
        if (trans[i][3] != -1): #left
            current_va.append(3)
        current_va.append(4)
        va.append(current_va)
    return va

In [16]:
va = build_va(trans)
va = np.array(va)
va

array([list([1, 2, 4]), list([1, 2, 3, 4]), list([1, 2, 3, 4]),
       list([1, 2, 3, 4]), list([1, 2, 3, 4]), list([1, 2, 3, 4]),
       list([1, 2, 3, 4]), list([1, 2, 3, 4]), list([1, 2, 3, 4]),
       list([2, 3, 4]), list([0, 1, 2, 4]), list([0, 1, 2, 3, 4]),
       list([0, 1, 2, 3, 4]), list([0, 1, 2, 3, 4]), list([0, 1, 2, 3, 4]),
       list([0, 1, 2, 3, 4]), list([0, 1, 2, 3, 4]), list([0, 1, 2, 3, 4]),
       list([0, 1, 2, 3, 4]), list([0, 2, 3, 4]), list([0, 1, 2, 4]),
       list([0, 1, 2, 3, 4]), list([0, 1, 2, 3, 4]), list([0, 1, 2, 3, 4]),
       list([0, 1, 2, 3, 4]), list([0, 1, 2, 3, 4]), list([0, 1, 2, 3, 4]),
       list([0, 1, 2, 3, 4]), list([0, 1, 2, 3, 4]), list([0, 2, 3, 4]),
       list([0, 1, 2, 4]), list([0, 1, 2, 3, 4]), list([0, 1, 2, 3, 4]),
       list([0, 1, 2, 3, 4]), list([0, 1, 2, 3, 4]), list([0, 1, 2, 3, 4]),
       list([0, 1, 2, 3, 4]), list([0, 1, 2, 3, 4]), list([0, 1, 2, 3, 4]),
       list([0, 2, 3, 4]), list([0, 1, 2, 4]), list([0, 1, 2, 3

# Main Program


In [22]:
gamma = 0.8
episodes = 10000
reward_list = []
for i in range(episodes):
    start_state = 90
    goal_state = 9
    current_state = start_state
    current_reward = R[current_state][4]
    while current_state != goal_state:
        action = random.choice(va[current_state])
        next_state = trans[current_state][action]
        future_rewards = []
        for action_nxt in va[next_state]:
            future_rewards.append(Q[next_state][action_nxt])
            
        #update Q
#         print('CS : {}\nAct : {}'.format(current_state, action))
        qstate = R[current_state][action] + gamma*max(future_rewards)
        Q[current_state][action] = qstate
#         print(Q)
        current_state = next_state
        current_reward += R[current_state][4]
        if(current_state == goal_state):
            print('Episode {}, score : {}'.format(i, current_reward))
            reward_list.append(current_reward)

Episode 0, score : -870.0
Episode 1, score : -514.0
Episode 2, score : -1521.0
Episode 3, score : -1587.0
Episode 4, score : -983.0
Episode 5, score : -1243.0
Episode 6, score : -2197.0
Episode 7, score : -1093.0
Episode 8, score : -1466.0
Episode 9, score : -3629.0
Episode 10, score : -1372.0
Episode 11, score : -763.0
Episode 12, score : -2662.0
Episode 13, score : -411.0
Episode 14, score : -1852.0
Episode 15, score : -3443.0
Episode 16, score : -2738.0
Episode 17, score : -2006.0
Episode 18, score : -3909.0
Episode 19, score : -295.0
Episode 20, score : -3107.0
Episode 21, score : -696.0
Episode 22, score : -942.0
Episode 23, score : -3117.0
Episode 24, score : -3813.0
Episode 25, score : -1524.0
Episode 26, score : -1119.0
Episode 27, score : -309.0
Episode 28, score : -1537.0
Episode 29, score : -335.0
Episode 30, score : -2250.0
Episode 31, score : -246.0
Episode 32, score : -642.0
Episode 33, score : -227.0
Episode 34, score : -2975.0
Episode 35, score : -1976.0
Episode 36, sco

Episode 417, score : -675.0
Episode 418, score : -316.0
Episode 419, score : -1416.0
Episode 420, score : -546.0
Episode 421, score : -7440.0
Episode 422, score : -1537.0
Episode 423, score : -583.0
Episode 424, score : -1031.0
Episode 425, score : -74.0
Episode 426, score : -1549.0
Episode 427, score : -1888.0
Episode 428, score : -602.0
Episode 429, score : -1033.0
Episode 430, score : -1755.0
Episode 431, score : -4189.0
Episode 432, score : -2646.0
Episode 433, score : -3577.0
Episode 434, score : -1500.0
Episode 435, score : -746.0
Episode 436, score : -1201.0
Episode 437, score : -1217.0
Episode 438, score : -3264.0
Episode 439, score : -207.0
Episode 440, score : -1945.0
Episode 441, score : -1591.0
Episode 442, score : -1640.0
Episode 443, score : -3845.0
Episode 444, score : -493.0
Episode 445, score : -670.0
Episode 446, score : -5662.0
Episode 447, score : -4166.0
Episode 448, score : -1143.0
Episode 449, score : -307.0
Episode 450, score : -159.0
Episode 451, score : -340.0

Episode 1337, score : -9847.0
Episode 1338, score : -607.0
Episode 1339, score : -283.0
Episode 1340, score : -5597.0
Episode 1341, score : -1392.0
Episode 1342, score : -2868.0
Episode 1343, score : -843.0
Episode 1344, score : -2953.0
Episode 1345, score : -1078.0
Episode 1346, score : -1786.0
Episode 1347, score : -1354.0
Episode 1348, score : -191.0
Episode 1349, score : -2627.0
Episode 1350, score : -3332.0
Episode 1351, score : -1235.0
Episode 1352, score : -2019.0
Episode 1353, score : -3835.0
Episode 1354, score : -2748.0
Episode 1355, score : -403.0
Episode 1356, score : -129.0
Episode 1357, score : -1532.0
Episode 1358, score : -2335.0
Episode 1359, score : -2411.0
Episode 1360, score : -1236.0
Episode 1361, score : -3199.0
Episode 1362, score : -512.0
Episode 1363, score : -1684.0
Episode 1364, score : -2418.0
Episode 1365, score : -4370.0
Episode 1366, score : -5466.0
Episode 1367, score : -1186.0
Episode 1368, score : -1353.0
Episode 1369, score : -1044.0
Episode 1370, sco

Episode 1888, score : -171.0
Episode 1889, score : -436.0
Episode 1890, score : -69.0
Episode 1891, score : -4168.0
Episode 1892, score : -559.0
Episode 1893, score : -2343.0
Episode 1894, score : -1505.0
Episode 1895, score : -2934.0
Episode 1896, score : -2960.0
Episode 1897, score : -2573.0
Episode 1898, score : -1088.0
Episode 1899, score : -871.0
Episode 1900, score : -1488.0
Episode 1901, score : -656.0
Episode 1902, score : -4375.0
Episode 1903, score : -1513.0
Episode 1904, score : -1036.0
Episode 1905, score : -1192.0
Episode 1906, score : -8450.0
Episode 1907, score : -1142.0
Episode 1908, score : -1078.0
Episode 1909, score : -1951.0
Episode 1910, score : -907.0
Episode 1911, score : -1269.0
Episode 1912, score : -690.0
Episode 1913, score : -5505.0
Episode 1914, score : -1089.0
Episode 1915, score : -1229.0
Episode 1916, score : -672.0
Episode 1917, score : -1596.0
Episode 1918, score : -1240.0
Episode 1919, score : -1503.0
Episode 1920, score : -2165.0
Episode 1921, score 

Episode 2837, score : -3233.0
Episode 2838, score : -1788.0
Episode 2839, score : -252.0
Episode 2840, score : -738.0
Episode 2841, score : -267.0
Episode 2842, score : -2289.0
Episode 2843, score : -296.0
Episode 2844, score : -746.0
Episode 2845, score : -1617.0
Episode 2846, score : -340.0
Episode 2847, score : -1533.0
Episode 2848, score : -493.0
Episode 2849, score : -852.0
Episode 2850, score : -2265.0
Episode 2851, score : -655.0
Episode 2852, score : -641.0
Episode 2853, score : -8195.0
Episode 2854, score : -213.0
Episode 2855, score : -3673.0
Episode 2856, score : -1252.0
Episode 2857, score : -1702.0
Episode 2858, score : -4015.0
Episode 2859, score : -1371.0
Episode 2860, score : -1511.0
Episode 2861, score : -6825.0
Episode 2862, score : -1190.0
Episode 2863, score : -1512.0
Episode 2864, score : -379.0
Episode 2865, score : -948.0
Episode 2866, score : -997.0
Episode 2867, score : -2543.0
Episode 2868, score : -1110.0
Episode 2869, score : -3686.0
Episode 2870, score : -2

Episode 3371, score : -1370.0
Episode 3372, score : -1087.0
Episode 3373, score : -4056.0
Episode 3374, score : -5294.0
Episode 3375, score : -181.0
Episode 3376, score : -2650.0
Episode 3377, score : -628.0
Episode 3378, score : -1467.0
Episode 3379, score : -3939.0
Episode 3380, score : -328.0
Episode 3381, score : -1803.0
Episode 3382, score : -747.0
Episode 3383, score : -1549.0
Episode 3384, score : -3908.0
Episode 3385, score : -1805.0
Episode 3386, score : -590.0
Episode 3387, score : -2210.0
Episode 3388, score : -276.0
Episode 3389, score : -177.0
Episode 3390, score : -1678.0
Episode 3391, score : -1404.0
Episode 3392, score : -401.0
Episode 3393, score : -1525.0
Episode 3394, score : -1473.0
Episode 3395, score : -511.0
Episode 3396, score : -4523.0
Episode 3397, score : -1530.0
Episode 3398, score : -3919.0
Episode 3399, score : -219.0
Episode 3400, score : -649.0
Episode 3401, score : -1068.0
Episode 3402, score : -990.0
Episode 3403, score : -152.0
Episode 3404, score : -

Episode 4340, score : -7000.0
Episode 4341, score : -1599.0
Episode 4342, score : -1139.0
Episode 4343, score : -1900.0
Episode 4344, score : -96.0
Episode 4345, score : -1549.0
Episode 4346, score : -2760.0
Episode 4347, score : -149.0
Episode 4348, score : -1160.0
Episode 4349, score : -1322.0
Episode 4350, score : -3262.0
Episode 4351, score : -1340.0
Episode 4352, score : -271.0
Episode 4353, score : -1426.0
Episode 4354, score : -2320.0
Episode 4355, score : -3575.0
Episode 4356, score : -3118.0
Episode 4357, score : -401.0
Episode 4358, score : -117.0
Episode 4359, score : -3985.0
Episode 4360, score : -2735.0
Episode 4361, score : -229.0
Episode 4362, score : -2818.0
Episode 4363, score : -906.0
Episode 4364, score : -3707.0
Episode 4365, score : -144.0
Episode 4366, score : -1024.0
Episode 4367, score : -381.0
Episode 4368, score : -2628.0
Episode 4369, score : -2709.0
Episode 4370, score : -5574.0
Episode 4371, score : -3153.0
Episode 4372, score : -1949.0
Episode 4373, score 

Episode 4887, score : -612.0
Episode 4888, score : -5390.0
Episode 4889, score : -1219.0
Episode 4890, score : -4308.0
Episode 4891, score : -1186.0
Episode 4892, score : -369.0
Episode 4893, score : -2754.0
Episode 4894, score : -547.0
Episode 4895, score : -271.0
Episode 4896, score : -682.0
Episode 4897, score : -2375.0
Episode 4898, score : -833.0
Episode 4899, score : -1101.0
Episode 4900, score : -614.0
Episode 4901, score : -1086.0
Episode 4902, score : -7527.0
Episode 4903, score : -1104.0
Episode 4904, score : -1339.0
Episode 4905, score : -2404.0
Episode 4906, score : -1973.0
Episode 4907, score : -679.0
Episode 4908, score : -2024.0
Episode 4909, score : -247.0
Episode 4910, score : -4510.0
Episode 4911, score : -105.0
Episode 4912, score : -3158.0
Episode 4913, score : -671.0
Episode 4914, score : -585.0
Episode 4915, score : -1391.0
Episode 4916, score : -360.0
Episode 4917, score : -1002.0
Episode 4918, score : -3003.0
Episode 4919, score : -315.0
Episode 4920, score : -2

Episode 5828, score : -753.0
Episode 5829, score : -3008.0
Episode 5830, score : -7855.0
Episode 5831, score : -917.0
Episode 5832, score : -801.0
Episode 5833, score : -539.0
Episode 5834, score : -3531.0
Episode 5835, score : -5121.0
Episode 5836, score : -4313.0
Episode 5837, score : -2387.0
Episode 5838, score : -1309.0
Episode 5839, score : -2895.0
Episode 5840, score : -2182.0
Episode 5841, score : -1395.0
Episode 5842, score : -342.0
Episode 5843, score : -1699.0
Episode 5844, score : -256.0
Episode 5845, score : -1649.0
Episode 5846, score : -779.0
Episode 5847, score : -903.0
Episode 5848, score : -3219.0
Episode 5849, score : -550.0
Episode 5850, score : -535.0
Episode 5851, score : -2329.0
Episode 5852, score : -361.0
Episode 5853, score : -780.0
Episode 5854, score : -263.0
Episode 5855, score : -5140.0
Episode 5856, score : -438.0
Episode 5857, score : -703.0
Episode 5858, score : -2319.0
Episode 5859, score : -1131.0
Episode 5860, score : -913.0
Episode 5861, score : -209

Episode 6408, score : -1694.0
Episode 6409, score : -4405.0
Episode 6410, score : -4086.0
Episode 6411, score : -675.0
Episode 6412, score : -3632.0
Episode 6413, score : -2900.0
Episode 6414, score : -1678.0
Episode 6415, score : -797.0
Episode 6416, score : -1854.0
Episode 6417, score : -1061.0
Episode 6418, score : -1785.0
Episode 6419, score : -1415.0
Episode 6420, score : -848.0
Episode 6421, score : -3641.0
Episode 6422, score : -1993.0
Episode 6423, score : -2120.0
Episode 6424, score : -2156.0
Episode 6425, score : -1388.0
Episode 6426, score : -3345.0
Episode 6427, score : -3372.0
Episode 6428, score : -8852.0
Episode 6429, score : -4056.0
Episode 6430, score : -1584.0
Episode 6431, score : -4683.0
Episode 6432, score : -639.0
Episode 6433, score : -3989.0
Episode 6434, score : -202.0
Episode 6435, score : -952.0
Episode 6436, score : -2414.0
Episode 6437, score : -587.0
Episode 6438, score : -800.0
Episode 6439, score : -197.0
Episode 6440, score : -776.0
Episode 6441, score 

Episode 7336, score : -483.0
Episode 7337, score : -1485.0
Episode 7338, score : -3423.0
Episode 7339, score : -1400.0
Episode 7340, score : -757.0
Episode 7341, score : -1268.0
Episode 7342, score : -628.0
Episode 7343, score : -2404.0
Episode 7344, score : -9725.0
Episode 7345, score : -3243.0
Episode 7346, score : -3606.0
Episode 7347, score : -3264.0
Episode 7348, score : -848.0
Episode 7349, score : -3639.0
Episode 7350, score : -839.0
Episode 7351, score : -1377.0
Episode 7352, score : -464.0
Episode 7353, score : -2159.0
Episode 7354, score : -1854.0
Episode 7355, score : -651.0
Episode 7356, score : -1764.0
Episode 7357, score : -7491.0
Episode 7358, score : -791.0
Episode 7359, score : -300.0
Episode 7360, score : -688.0
Episode 7361, score : -2089.0
Episode 7362, score : -544.0
Episode 7363, score : -5528.0
Episode 7364, score : -3309.0
Episode 7365, score : -1859.0
Episode 7366, score : -2724.0
Episode 7367, score : -156.0
Episode 7368, score : -3521.0
Episode 7369, score : 

Episode 7895, score : -4140.0
Episode 7896, score : -783.0
Episode 7897, score : -150.0
Episode 7898, score : -7626.0
Episode 7899, score : -5419.0
Episode 7900, score : -554.0
Episode 7901, score : -4842.0
Episode 7902, score : -2206.0
Episode 7903, score : -2528.0
Episode 7904, score : -3541.0
Episode 7905, score : -6759.0
Episode 7906, score : -1408.0
Episode 7907, score : -1890.0
Episode 7908, score : -1159.0
Episode 7909, score : -869.0
Episode 7910, score : -500.0
Episode 7911, score : -789.0
Episode 7912, score : -1519.0
Episode 7913, score : -122.0
Episode 7914, score : -2020.0
Episode 7915, score : -2381.0
Episode 7916, score : -1875.0
Episode 7917, score : -1716.0
Episode 7918, score : -923.0
Episode 7919, score : -381.0
Episode 7920, score : -522.0
Episode 7921, score : -81.0
Episode 7922, score : -390.0
Episode 7923, score : -1469.0
Episode 7924, score : -1811.0
Episode 7925, score : -2846.0
Episode 7926, score : -1088.0
Episode 7927, score : -2674.0
Episode 7928, score : -

Episode 8826, score : -520.0
Episode 8827, score : -4259.0
Episode 8828, score : -6300.0
Episode 8829, score : -947.0
Episode 8830, score : -152.0
Episode 8831, score : -2259.0
Episode 8832, score : -3747.0
Episode 8833, score : -456.0
Episode 8834, score : -6570.0
Episode 8835, score : -1294.0
Episode 8836, score : -301.0
Episode 8837, score : -1022.0
Episode 8838, score : -2472.0
Episode 8839, score : -917.0
Episode 8840, score : -4253.0
Episode 8841, score : -284.0
Episode 8842, score : -1897.0
Episode 8843, score : -4528.0
Episode 8844, score : -3392.0
Episode 8845, score : -975.0
Episode 8846, score : -8402.0
Episode 8847, score : -892.0
Episode 8848, score : -4978.0
Episode 8849, score : -1895.0
Episode 8850, score : -3180.0
Episode 8851, score : -867.0
Episode 8852, score : -4216.0
Episode 8853, score : -3122.0
Episode 8854, score : -772.0
Episode 8855, score : -3291.0
Episode 8856, score : -141.0
Episode 8857, score : -5928.0
Episode 8858, score : -1343.0
Episode 8859, score : 

In [18]:
Q_df = pd.DataFrame(columns = ['up', 'right', 'down', 'left', 'none'])

In [19]:
for i in range(len(Q)):
    Q_df.loc[i] = Q[i]

In [20]:
Q_df

Unnamed: 0,up,right,down,left,none
0,0.000000,3.213581,1.048692,0.000000,1.570865
1,0.000000,7.766976,3.810865,1.570865,3.213581
2,0.000000,15.958720,6.013581,3.213581,7.766976
3,0.000000,21.198400,8.766976,7.766976,15.958720
4,0.000000,30.248000,14.958720,15.958720,21.198400
5,0.000000,41.560000,19.198400,21.198400,30.248000
6,0.000000,58.200000,30.248000,30.248000,41.560000
7,0.000000,79.000000,41.560000,41.560000,58.200000
8,0.000000,100.000000,58.200000,58.200000,79.000000
9,0.000000,0.000000,0.000000,0.000000,0.000000


In [21]:
max(reward_list)

69.0