### Cab-Driver Agent

In [1]:
# Importing libraries
import numpy as np
import random
import math
from collections import deque
import collections
import pickle
import time
import os
from datetime import datetime

# for building DQN model
from keras import layers
from keras import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

# for plotting graphs
import matplotlib.pyplot as plt

# Import the environment
from Env import CabDriver

Using TensorFlow backend.


#### Defining Time Matrix

In [2]:
# Loading the time matrix provided
Time_matrix = np.load("TM.npy")

In [3]:
# Debug print flag 

DEBUG = False

#### Tracking the state-action pairs for checking convergence


In [4]:
def to_string(state_or_action):
    return ('-'.join(str(e) for e in state_or_action))

In [5]:
def initialise_tracking_states(state_space, action_space):
    for state in state_space:
        for action in action_space:
            state_str = to_string(state)
            action_str = to_string(action)
            States_track[state_str][action_str] = []

In [6]:
def save_tracking_states(curr_state, curr_action, q_value):
    for state in States_track.keys():
        if state == curr_state:
            for action in States_track[state].keys():
                if action == curr_action:
                    States_track[state][action].append(q_value)

In [7]:
# #Defining a function to save the Q-dictionary as a pickle file
# def save_obj(obj, name ):
#     with open(name + '.pkl', 'wb') as f:
#         pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

### Agent Class

If you are using this framework, you need to fill the following to complete the following code block:
1. State and Action Size
2. Hyperparameters
3. Create a neural-network model in function 'build_model()'
4. Define epsilon-greedy strategy in function 'get_action()'
5. Complete the function 'append_sample()'. This function appends the recent experience tuple <state, action, reward, new-state> to the memory
6. Complete the 'train_model()' function with following logic:
   - If the memory size is greater than mini-batch size, you randomly sample experiences from memory as per the mini-batch size and do the following:
      - Initialise your input and output batch for training the model
      - Calculate the target Q value for each sample: reward + gamma*max(Q(s'a,))
      - Get Q(s', a) values from the last trained model
      - Update the input batch as your encoded state and output batch as your Q-values
      - Then fit your DQN model using the updated input and output batch.

In [8]:
class DQNAgent:
    def __init__(self, state_size, action_size, action_space):
        # Define size of state and action
        self.state_size = state_size
        self.action_size = action_size
        self.action_space = action_space

        # Write here: Specify you hyper parameters for the DQN
        self.discount_factor = 0.99
        self.learning_rate = 0.001       
        self.epsilon_max = 1.0
        self.epsilon_decay = 0.00065 #0.999
        self.epsilon_min = 0
        
        self.batch_size = 32    
        # create replay memory using deque
        self.memory = deque(maxlen=2000)

        # create main model and target model
        self.model = self.build_model()

    # approximate Q function using Neural Network
    def build_model(self):
        model = Sequential()
        # Write your code here: Add layers to your neural nets
        model.add(Dense(32, input_dim=self.state_size, activation='relu',
                        kernel_initializer='he_uniform'))
        model.add(Dense(32, activation='relu',
                        kernel_initializer='he_uniform'))
        model.add(Dense(self.action_size, activation='linear',
                        kernel_initializer='he_uniform'))
        # model.summary() 
        
        model.compile(loss='mse',optimizer=Adam(lr=self.learning_rate))
        model.summary
        return model

    def get_epsilon(self, time):
        return self.epsilon_min + (self.epsilon_max - self.epsilon_min) * np.exp(-self.epsilon_decay*time)

    def get_action(self, state, time):
    # Write your code here:
    # get action from model using epsilon-greedy policy
    # Decay in ε after we generate each sample from the environment       

        epsilon = self.get_epsilon(time)

        possible_actions_index, all_possible_actions = env.requests(state)

        if len(possible_actions_index) == 0:
            return all_possible_actions[0]

        if (np.random.rand() <= epsilon):
            return random.choice(all_possible_actions)
        # if generated random number is greater than ε, choose the action which has max Q-value
        else:
            state = env.state_encod_arch1(state)
            state = state.reshape(1, self.state_size)
            q_values = self.model.predict(state)[0]
            
            argmax = np.argmax(q_values[possible_actions_index])
#             DEBUG and print(f'q_values: {q_values}, available_q_vals: {q_values[possible_actions_index]}, argmax: {argmax}, all_possible_actions: {all_possible_actions}')
            return all_possible_actions[argmax]

    def append_sample(self, state, action, reward, next_state):
    # Write your code here:
    # save sample <s,a,r,s'> to the replay memory
        self.memory.append((state, action, reward, next_state))
    
    def get_q_values(self, state):
        return self.model.predict(state)[0]
    
    # pick samples randomly from replay memory (with batch_size) and train the network
    def train_model(self):
        if len(self.memory) > self.batch_size:
            # Sample batch from the memory
            mini_batch = random.sample(self.memory, self.batch_size)
            update_output = np.zeros((self.batch_size, self.state_size)) # write here
            update_input = np.zeros((self.batch_size, self.state_size)) # write here
            
            actions, rewards = [], []
            
            for i in range(self.batch_size):
                state, action, reward, next_state = mini_batch[i]
                # Write your code from here
                # 1. Predict the target from earlier model
                update_input[i] = env.state_encod_arch1(state).reshape(1, self.state_size)
                actions.append(action)
                rewards.append(reward)
                update_output[i] = env.state_encod_arch1(next_state).reshape(1, self.state_size)                
                
            # 2. Get the target for the Q-network
            target = self.model.predict(update_input)
            target_qval = self.model.predict(update_output)
            
            #3. Update your 'update_output' and 'update_input' batch
            for i in range(self.batch_size):
                # if done[i]:
                #     target[i][actions[i]] = rewards[i]
                # else: # non-terminal state
                action_idx = action_space.index(actions[i])
                target[i][action_idx] = rewards[i] + self.discount_factor * np.max(target_qval[i])
                
            # 4. Fit your model and track the loss values
            self.model.fit(update_input, target, batch_size=self.batch_size, epochs=1, verbose=0)

    def save(self, name):
        self.model.save(name)

In [9]:
Episodes = 10000

env = CabDriver()
action_space, state_space, state = env.reset()
action_size = len(action_space)
state_size = len(env.state_encod_arch1(state_space[0]))

agent = DQNAgent(state_size, action_size, action_space)

States_track = collections.defaultdict(dict)
initialise_tracking_states(state_space, action_space)

# create folder with current time stamp
DIR_NAME = os.path.join(
        os.getcwd(), 
        'model-' + datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
os.makedirs(DIR_NAME)

agent.epsilon_decay = 80.0/(Episodes * 10)

scores = {}

start_time = time.time()







### DQN block

In [None]:
for episode in range(Episodes):

    # Write code here
    # Call the environment
    # Call all the initialised variables of the environment
    

    #Call the DQN agent

    day = 0
    score = 0
    terminal_state = False

    # reset the state before new episode
    _, _, state = env.reset()
    initial_state = state
        
    while not terminal_state:        
        # Write your code here
        # 1. Pick epsilon-greedy action from possible actions for the current state
        
        action = agent.get_action(state, episode)

        # 2. Evaluate your reward and next state
        next_state, reward = env.step(state, action, Time_matrix)
        
        # 3. Append the experience to the memory
        agent.append_sample(state, action, reward, next_state)
        
        # 4. Train the model by calling function agent.train_model
        agent.train_model()
        
        # 5. Keep a track of rewards, Q-values, loss
        score += reward
        
        # increase the date if the day is changed
        if next_state[2] != state[2]:
            day = day + 1
        
        DEBUG and print(f'state: {state}, action: {action}, next_state: {next_state}, reward: {reward}, day: {day}')

        if day > 30:
            terminal_state = True
            
        state = next_state

    print(f'Score {episode}: {score}')
    scores[episode] = score
    
    if ((episode + 1) % 10 == 0):
        agent.save(DIR_NAME + '\\' + str(episode) + '.h5')
        # for tracking
        state_enc = env.state_encod_arch1(initial_state)
        state_enc = np.reshape(state_enc, [1, 36])
        q_values = agent.get_q_values(state_enc)
        
        state_string = to_string(initial_state)
        
        for index in range(len(env.action_space)):
            action_string = to_string(env.action_space[index])
            save_tracking_states(state_string, action_string, q_values[index])


elapsed_time = time.time() - start_time
print(f'Entire Operation took {elapsed_time} seconds')



Score 0: -57.0
Score 1: -234.0
Score 2: -209.0
Score 3: -80.0
Score 4: -395.0
Score 5: -163.0
Score 6: -112.0
Score 7: -26.0
Score 8: -226.0
Score 9: -31.0
Score 10: -119.0
Score 11: -104.0
Score 12: -40.0
Score 13: 121.0
Score 14: -287.0
Score 15: 59.0
Score 16: 33.0
Score 17: -193.0
Score 18: -104.0
Score 19: 56.0
Score 20: -5.0
Score 21: -92.0
Score 22: -228.0
Score 23: -130.0
Score 24: -246.0
Score 25: -248.0
Score 26: -350.0
Score 27: 107.0
Score 28: -10.0
Score 29: -291.0
Score 30: -230.0
Score 31: -169.0
Score 32: -51.0
Score 33: -327.0
Score 34: 45.0
Score 35: 259.0
Score 36: -101.0
Score 37: 351.0
Score 38: -244.0
Score 39: -279.0
Score 40: -265.0
Score 41: -147.0
Score 42: -160.0
Score 43: -133.0
Score 44: -241.0
Score 45: -157.0
Score 46: -181.0
Score 47: -103.0
Score 48: 204.0
Score 49: -350.0
Score 50: -53.0
Score 51: -218.0
Score 52: -128.0
Score 53: -81.0
Score 54: 82.0
Score 55: 59.0
Score 56: -188.0
Score 57: -46.0
Score 58: -21.0
Score 59: -93.0
Score 60: 118.0
Scor

Score 465: 229.0
Score 466: 474.0
Score 467: 546.0
Score 468: 283.0
Score 469: 401.0
Score 470: 266.0
Score 471: 303.0
Score 472: 303.0
Score 473: 377.0
Score 474: 231.0
Score 475: 238.0
Score 476: 190.0
Score 477: 85.0
Score 478: 465.0
Score 479: 239.0
Score 480: 275.0
Score 481: 261.0
Score 482: 463.0
Score 483: 599.0
Score 484: 662.0
Score 485: 307.0
Score 486: 114.0
Score 487: 427.0
Score 488: -197.0
Score 489: 421.0
Score 490: 411.0
Score 491: 697.0
Score 492: 158.0
Score 493: 608.0
Score 494: 95.0
Score 495: 365.0
Score 496: 418.0
Score 497: 59.0
Score 498: 109.0
Score 499: 394.0
Score 500: 220.0
Score 501: 238.0
Score 502: 213.0
Score 503: 199.0
Score 504: 439.0
Score 505: 493.0
Score 506: -28.0
Score 507: 170.0
Score 508: 221.0
Score 509: 406.0
Score 510: 633.0
Score 511: 258.0
Score 512: 487.0
Score 513: 271.0
Score 514: 435.0
Score 515: 442.0
Score 516: 570.0
Score 517: 532.0
Score 518: 447.0
Score 519: 427.0
Score 520: 419.0
Score 521: 366.0
Score 522: 256.0
Score 523: 386.0

Score 948: 529.0
Score 949: 1078.0
Score 950: 674.0
Score 951: 382.0
Score 952: 953.0
Score 953: 808.0
Score 954: 1092.0
Score 955: 245.0
Score 956: 832.0
Score 957: 912.0
Score 958: 647.0
Score 959: 648.0
Score 960: 990.0
Score 961: 729.0
Score 962: 593.0
Score 963: 905.0
Score 964: 791.0
Score 965: 423.0
Score 966: 682.0
Score 967: 611.0
Score 968: 743.0
Score 969: 837.0
Score 970: 759.0
Score 971: 561.0
Score 972: 1180.0
Score 973: 460.0
Score 974: 861.0
Score 975: 1171.0
Score 976: 781.0
Score 977: 667.0
Score 978: 698.0
Score 979: 1040.0
Score 980: 630.0
Score 981: 490.0
Score 982: 460.0
Score 983: 1041.0
Score 984: 584.0
Score 985: 396.0
Score 986: 805.0
Score 987: 229.0
Score 988: 242.0
Score 989: 795.0
Score 990: 734.0
Score 991: 840.0
Score 992: 387.0
Score 993: 932.0
Score 994: 634.0
Score 995: 683.0
Score 996: 1014.0
Score 997: 588.0
Score 998: 554.0
Score 999: 726.0
Score 1000: 855.0
Score 1001: 402.0
Score 1002: 538.0
Score 1003: 689.0
Score 1004: 965.0
Score 1005: 772.0
S

Score 1401: 777.0
Score 1402: 883.0
Score 1403: 744.0
Score 1404: 996.0
Score 1405: 1151.0
Score 1406: 961.0
Score 1407: 1014.0
Score 1408: 892.0
Score 1409: 1030.0
Score 1410: 970.0
Score 1411: 842.0
Score 1412: 1555.0
Score 1413: 1117.0
Score 1414: 1167.0
Score 1415: 878.0
Score 1416: 1174.0
Score 1417: 625.0
Score 1418: 842.0
Score 1419: 933.0
Score 1420: 1121.0
Score 1421: 782.0
Score 1422: 886.0
Score 1423: 962.0
Score 1424: 1044.0
Score 1425: 773.0
Score 1426: 1117.0
Score 1427: 1304.0
Score 1428: 1084.0
Score 1429: 734.0
Score 1430: 1365.0
Score 1431: 1099.0
Score 1432: 824.0
Score 1433: 1065.0
Score 1434: 1092.0
Score 1435: 843.0
Score 1436: 1032.0
Score 1437: 868.0
Score 1438: 1157.0
Score 1439: 600.0
Score 1440: 1035.0
Score 1441: 1314.0
Score 1442: 950.0
Score 1443: 994.0
Score 1444: 571.0
Score 1445: 414.0
Score 1446: 1031.0
Score 1447: 1185.0
Score 1448: 694.0
Score 1449: 957.0
Score 1450: 771.0
Score 1451: 886.0
Score 1452: 1071.0
Score 1453: 745.0
Score 1454: 711.0
Score

Score 1843: 1076.0
Score 1844: 1024.0
Score 1845: 1246.0
Score 1846: 1310.0
Score 1847: 896.0
Score 1848: 990.0
Score 1849: 752.0
Score 1850: 1290.0
Score 1851: 1196.0
Score 1852: 1135.0
Score 1853: 879.0
Score 1854: 981.0
Score 1855: 1212.0
Score 1856: 871.0
Score 1857: 1312.0
Score 1858: 1160.0
Score 1859: 1051.0
Score 1860: 1467.0
Score 1861: 1122.0
Score 1862: 1112.0
Score 1863: 1206.0
Score 1864: 1074.0
Score 1865: 1063.0
Score 1866: 854.0
Score 1867: 879.0
Score 1868: 969.0
Score 1869: 831.0
Score 1870: 1328.0
Score 1871: 1083.0
Score 1872: 1219.0
Score 1873: 1211.0
Score 1874: 1126.0
Score 1875: 1054.0
Score 1876: 1240.0
Score 1877: 986.0
Score 1878: 1375.0
Score 1879: 1102.0
Score 1880: 1294.0
Score 1881: 1041.0
Score 1882: 1315.0
Score 1883: 904.0
Score 1884: 1009.0
Score 1885: 1167.0
Score 1886: 1360.0
Score 1887: 962.0
Score 1888: 1412.0
Score 1889: 1151.0
Score 1890: 655.0
Score 1891: 1132.0
Score 1892: 846.0
Score 1893: 1211.0
Score 1894: 986.0
Score 1895: 1382.0
Score 189

Score 2279: 1338.0
Score 2280: 1425.0
Score 2281: 994.0
Score 2282: 1563.0
Score 2283: 1232.0
Score 2284: 1286.0
Score 2285: 976.0
Score 2286: 1293.0
Score 2287: 1011.0
Score 2288: 1396.0
Score 2289: 1115.0
Score 2290: 1090.0
Score 2291: 1266.0
Score 2292: 1331.0
Score 2293: 1107.0
Score 2294: 1026.0
Score 2295: 1248.0
Score 2296: 1283.0
Score 2297: 1242.0
Score 2298: 1254.0
Score 2299: 1157.0
Score 2300: 1176.0
Score 2301: 1336.0
Score 2302: 1063.0
Score 2303: 909.0
Score 2304: 1095.0
Score 2305: 981.0
Score 2306: 1139.0
Score 2307: 908.0
Score 2308: 1155.0
Score 2309: 1237.0
Score 2310: 1143.0
Score 2311: 1166.0
Score 2312: 1169.0
Score 2313: 1383.0
Score 2314: 1268.0
Score 2315: 1490.0
Score 2316: 846.0
Score 2317: 1368.0
Score 2318: 1358.0
Score 2319: 1148.0
Score 2320: 1174.0
Score 2321: 1068.0
Score 2322: 1470.0
Score 2323: 1414.0
Score 2324: 1383.0
Score 2325: 1174.0
Score 2326: 1364.0
Score 2327: 1085.0
Score 2328: 1384.0
Score 2329: 1365.0
Score 2330: 1265.0
Score 2331: 1147.0

Score 2713: 1186.0
Score 2714: 1670.0
Score 2715: 1202.0
Score 2716: 1427.0
Score 2717: 1080.0
Score 2718: 1394.0
Score 2719: 1440.0
Score 2720: 1059.0
Score 2721: 1540.0
Score 2722: 1232.0
Score 2723: 1206.0
Score 2724: 1400.0
Score 2725: 1332.0
Score 2726: 1377.0
Score 2727: 1156.0
Score 2728: 1204.0
Score 2729: 1493.0
Score 2730: 1407.0
Score 2731: 1247.0
Score 2732: 1401.0
Score 2733: 1531.0
Score 2734: 1358.0
Score 2735: 1212.0
Score 2736: 1389.0
Score 2737: 1401.0
Score 2738: 1266.0
Score 2739: 1206.0
Score 2740: 1362.0
Score 2741: 1170.0
Score 2742: 1369.0
Score 2743: 1259.0
Score 2744: 1463.0
Score 2745: 1243.0
Score 2746: 1283.0
Score 2747: 1520.0
Score 2748: 1299.0
Score 2749: 1417.0
Score 2750: 1292.0
Score 2751: 1274.0
Score 2752: 870.0
Score 2753: 1409.0
Score 2754: 1240.0
Score 2755: 1143.0
Score 2756: 1143.0
Score 2757: 996.0
Score 2758: 1174.0
Score 2759: 1189.0
Score 2760: 1377.0
Score 2761: 1058.0
Score 2762: 1616.0
Score 2763: 1070.0
Score 2764: 1459.0
Score 2765: 14

Score 3146: 1507.0
Score 3147: 1404.0
Score 3148: 1435.0
Score 3149: 1463.0
Score 3150: 1089.0
Score 3151: 1164.0
Score 3152: 1581.0
Score 3153: 1304.0
Score 3154: 1584.0
Score 3155: 1083.0
Score 3156: 1126.0
Score 3157: 1583.0
Score 3158: 1342.0
Score 3159: 969.0
Score 3160: 1426.0
Score 3161: 1219.0
Score 3162: 1265.0
Score 3163: 1356.0
Score 3164: 1202.0
Score 3165: 1346.0
Score 3166: 1131.0
Score 3167: 1512.0
Score 3168: 1223.0
Score 3169: 1426.0
Score 3170: 1473.0
Score 3171: 1156.0
Score 3172: 1198.0
Score 3173: 1682.0
Score 3174: 1502.0
Score 3175: 1392.0
Score 3176: 1201.0
Score 3177: 1605.0
Score 3178: 1176.0
Score 3179: 1242.0
Score 3180: 1439.0
Score 3181: 1478.0
Score 3182: 1206.0
Score 3183: 994.0
Score 3184: 1094.0
Score 3185: 1393.0
Score 3186: 1761.0
Score 3187: 1475.0
Score 3188: 1463.0
Score 3189: 1285.0
Score 3190: 1147.0
Score 3191: 1425.0
Score 3192: 1399.0
Score 3193: 1216.0
Score 3194: 1462.0
Score 3195: 1220.0
Score 3196: 1198.0
Score 3197: 1328.0
Score 3198: 13

Score 3578: 1537.0
Score 3579: 1268.0
Score 3580: 1026.0
Score 3581: 1331.0
Score 3582: 1113.0
Score 3583: 990.0
Score 3584: 1204.0
Score 3585: 1302.0
Score 3586: 1031.0
Score 3587: 1212.0
Score 3588: 1490.0
Score 3589: 1418.0
Score 3590: 1269.0
Score 3591: 1141.0
Score 3592: 1384.0
Score 3593: 968.0
Score 3594: 1439.0
Score 3595: 1482.0
Score 3596: 1346.0
Score 3597: 1226.0
Score 3598: 1501.0
Score 3599: 1292.0
Score 3600: 1686.0
Score 3601: 1619.0
Score 3602: 1520.0
Score 3603: 1310.0
Score 3604: 1656.0
Score 3605: 1593.0
Score 3606: 1589.0
Score 3607: 1480.0
Score 3608: 1305.0
Score 3609: 1404.0
Score 3610: 1426.0
Score 3611: 1285.0
Score 3612: 1229.0
Score 3613: 1431.0
Score 3614: 1346.0
Score 3615: 1380.0
Score 3616: 1394.0
Score 3617: 1544.0
Score 3618: 1282.0
Score 3619: 1410.0
Score 3620: 1293.0
Score 3621: 1709.0
Score 3622: 1499.0
Score 3623: 1346.0
Score 3624: 1391.0
Score 3625: 1229.0
Score 3626: 1462.0
Score 3627: 1494.0
Score 3628: 1129.0
Score 3629: 1439.0
Score 3630: 11

Score 4010: 1359.0
Score 4011: 1173.0
Score 4012: 1261.0
Score 4013: 1075.0
Score 4014: 1294.0
Score 4015: 1741.0
Score 4016: 1581.0
Score 4017: 1410.0
Score 4018: 1615.0
Score 4019: 1364.0
Score 4020: 1371.0
Score 4021: 1171.0
Score 4022: 1785.0
Score 4023: 1165.0
Score 4024: 1527.0
Score 4025: 1506.0
Score 4026: 1386.0
Score 4027: 1296.0
Score 4028: 1418.0
Score 4029: 1359.0
Score 4030: 1733.0
Score 4031: 1503.0
Score 4032: 1458.0
Score 4033: 1303.0
Score 4034: 1606.0
Score 4035: 1557.0
Score 4036: 1248.0
Score 4037: 1165.0
Score 4038: 1548.0
Score 4039: 1561.0
Score 4040: 1534.0
Score 4041: 1279.0
Score 4042: 1527.0
Score 4043: 1547.0
Score 4044: 1260.0
Score 4045: 1224.0
Score 4046: 1682.0
Score 4047: 863.0
Score 4048: 1562.0
Score 4049: 987.0
Score 4050: 1520.0
Score 4051: 1530.0
Score 4052: 1341.0
Score 4053: 1471.0
Score 4054: 1527.0
Score 4055: 1634.0
Score 4056: 1205.0
Score 4057: 1522.0
Score 4058: 1412.0
Score 4059: 1654.0
Score 4060: 1460.0
Score 4061: 1526.0
Score 4062: 15

Score 4442: 1381.0
Score 4443: 1562.0
Score 4444: 1659.0
Score 4445: 1257.0
Score 4446: 1674.0
Score 4447: 1484.0
Score 4448: 1220.0
Score 4449: 1569.0
Score 4450: 1617.0
Score 4451: 1677.0
Score 4452: 1454.0
Score 4453: 1841.0
Score 4454: 1311.0
Score 4455: 1629.0
Score 4456: 1392.0
Score 4457: 1363.0
Score 4458: 1583.0
Score 4459: 1549.0
Score 4460: 1342.0
Score 4461: 1294.0
Score 4462: 1392.0
Score 4463: 1278.0
Score 4464: 1391.0
Score 4465: 1609.0
Score 4466: 1408.0
Score 4467: 980.0
Score 4468: 1348.0
Score 4469: 1026.0
Score 4470: 1521.0
Score 4471: 1052.0
Score 4472: 1238.0
Score 4473: 1495.0
Score 4474: 1260.0
Score 4475: 1608.0
Score 4476: 1348.0
Score 4477: 1414.0
Score 4478: 1625.0
Score 4479: 1233.0
Score 4480: 1403.0
Score 4481: 1490.0
Score 4482: 1560.0
Score 4483: 1394.0
Score 4484: 1166.0
Score 4485: 1355.0
Score 4486: 1420.0
Score 4487: 1616.0
Score 4488: 1305.0
Score 4489: 1395.0
Score 4490: 1610.0
Score 4491: 1591.0
Score 4492: 1187.0
Score 4493: 1674.0
Score 4494: 1

Score 4874: 1438.0
Score 4875: 1634.0
Score 4876: 1554.0
Score 4877: 1495.0
Score 4878: 1547.0
Score 4879: 1698.0
Score 4880: 1665.0
Score 4881: 1710.0
Score 4882: 1360.0
Score 4883: 1470.0
Score 4884: 1358.0
Score 4885: 1507.0
Score 4886: 1500.0
Score 4887: 1177.0
Score 4888: 1503.0
Score 4889: 1630.0
Score 4890: 1110.0
Score 4891: 1202.0
Score 4892: 1427.0
Score 4893: 1088.0
Score 4894: 1502.0
Score 4895: 1580.0
Score 4896: 1691.0
Score 4897: 1544.0
Score 4898: 1321.0
Score 4899: 1774.0
Score 4900: 1468.0
Score 4901: 1527.0
Score 4902: 1237.0
Score 4903: 1323.0
Score 4904: 1382.0
Score 4905: 1492.0
Score 4906: 1137.0
Score 4907: 1389.0
Score 4908: 1678.0
Score 4909: 1317.0
Score 4910: 1402.0
Score 4911: 1454.0
Score 4912: 1628.0
Score 4913: 1690.0
Score 4914: 1480.0
Score 4915: 1463.0
Score 4916: 1400.0
Score 4917: 1494.0
Score 4918: 1387.0
Score 4919: 1517.0
Score 4920: 1315.0
Score 4921: 1523.0
Score 4922: 1579.0
Score 4923: 1237.0
Score 4924: 1241.0
Score 4925: 1461.0
Score 4926: 

Score 5306: 1238.0
Score 5307: 1327.0
Score 5308: 1653.0
Score 5309: 1582.0
Score 5310: 1556.0
Score 5311: 1175.0
Score 5312: 1484.0
Score 5313: 1654.0
Score 5314: 1662.0
Score 5315: 1484.0
Score 5316: 1525.0
Score 5317: 1319.0
Score 5318: 1466.0
Score 5319: 1773.0
Score 5320: 1617.0
Score 5321: 1691.0
Score 5322: 1807.0
Score 5323: 1566.0
Score 5324: 1708.0
Score 5325: 1254.0
Score 5326: 1539.0
Score 5327: 1451.0
Score 5328: 1308.0
Score 5329: 1707.0
Score 5330: 1454.0
Score 5331: 1720.0
Score 5332: 880.0
Score 5333: 1612.0
Score 5334: 1430.0
Score 5335: 1562.0
Score 5336: 1304.0
Score 5337: 1461.0
Score 5338: 1709.0
Score 5339: 1750.0
Score 5340: 1509.0
Score 5341: 1330.0
Score 5342: 1500.0
Score 5343: 1734.0
Score 5344: 1176.0
Score 5345: 1351.0
Score 5346: 1347.0
Score 5347: 1219.0
Score 5348: 1501.0
Score 5349: 1640.0
Score 5350: 1415.0
Score 5351: 1479.0
Score 5352: 1750.0
Score 5353: 1305.0
Score 5354: 2031.0
Score 5355: 1337.0
Score 5356: 1264.0
Score 5357: 1534.0
Score 5358: 1

Score 5738: 1302.0
Score 5739: 1530.0
Score 5740: 1685.0
Score 5741: 1951.0
Score 5742: 1423.0
Score 5743: 1795.0
Score 5744: 1669.0
Score 5745: 1531.0
Score 5746: 1831.0
Score 5747: 1419.0
Score 5748: 1587.0
Score 5749: 1653.0
Score 5750: 1662.0
Score 5751: 1528.0
Score 5752: 1282.0
Score 5753: 1692.0
Score 5754: 1311.0
Score 5755: 1527.0
Score 5756: 1711.0
Score 5757: 1644.0
Score 5758: 1474.0
Score 5759: 1619.0
Score 5760: 1755.0
Score 5761: 1510.0
Score 5762: 1727.0
Score 5763: 1765.0
Score 5764: 1401.0
Score 5765: 1714.0
Score 5766: 1296.0
Score 5767: 1710.0
Score 5768: 1629.0
Score 5769: 1248.0
Score 5770: 1750.0
Score 5771: 1645.0
Score 5772: 1534.0
Score 5773: 1528.0
Score 5774: 1601.0
Score 5775: 1527.0
Score 5776: 1672.0
Score 5777: 1674.0
Score 5778: 1373.0
Score 5779: 1678.0
Score 5780: 1697.0
Score 5781: 1664.0
Score 5782: 1638.0
Score 5783: 1465.0
Score 5784: 1395.0
Score 5785: 2008.0
Score 5786: 1022.0
Score 5787: 1621.0
Score 5788: 1827.0
Score 5789: 1257.0
Score 5790: 

Score 6170: 1430.0
Score 6171: 1754.0
Score 6172: 1678.0
Score 6173: 1742.0
Score 6174: 1704.0
Score 6175: 1573.0
Score 6176: 1438.0
Score 6177: 1725.0
Score 6178: 1341.0
Score 6179: 1448.0
Score 6180: 1709.0
Score 6181: 1673.0
Score 6182: 1510.0
Score 6183: 1940.0
Score 6184: 1454.0
Score 6185: 1579.0
Score 6186: 1621.0
Score 6187: 1498.0
Score 6188: 1825.0
Score 6189: 1852.0
Score 6190: 1603.0
Score 6191: 1779.0
Score 6192: 1814.0
Score 6193: 1672.0
Score 6194: 1621.0
Score 6195: 1535.0
Score 6196: 1683.0
Score 6197: 1626.0
Score 6198: 1807.0
Score 6199: 1553.0
Score 6200: 1582.0
Score 6201: 1609.0
Score 6202: 1530.0
Score 6203: 1904.0
Score 6204: 1457.0
Score 6205: 1597.0
Score 6206: 1890.0
Score 6207: 1458.0
Score 6208: 1488.0
Score 6209: 1618.0
Score 6210: 1593.0
Score 6211: 1283.0
Score 6212: 2091.0
Score 6213: 1408.0
Score 6214: 1584.0
Score 6215: 1777.0
Score 6216: 1404.0
Score 6217: 1585.0
Score 6218: 1889.0
Score 6219: 1733.0
Score 6220: 1561.0
Score 6221: 1387.0
Score 6222: 

Score 6602: 2029.0
Score 6603: 1477.0
Score 6604: 1467.0
Score 6605: 1530.0
Score 6606: 1492.0
Score 6607: 1847.0
Score 6608: 1257.0
Score 6609: 1365.0
Score 6610: 1619.0
Score 6611: 1539.0
Score 6612: 1762.0
Score 6613: 1859.0
Score 6614: 1553.0
Score 6615: 1674.0
Score 6616: 1504.0
Score 6617: 1463.0
Score 6618: 1774.0
Score 6619: 1682.0
Score 6620: 1593.0
Score 6621: 1542.0
Score 6622: 1823.0
Score 6623: 1696.0
Score 6624: 1589.0
Score 6625: 1588.0
Score 6626: 1706.0
Score 6627: 1468.0
Score 6628: 1449.0
Score 6629: 1529.0
Score 6630: 1428.0
Score 6631: 1520.0
Score 6632: 1553.0
Score 6633: 1701.0
Score 6634: 2084.0
Score 6635: 1574.0
Score 6636: 1107.0
Score 6637: 1557.0
Score 6638: 1439.0
Score 6639: 1480.0
Score 6640: 1787.0
Score 6641: 1792.0
Score 6642: 1816.0
Score 6643: 1655.0
Score 6644: 1625.0
Score 6645: 1502.0
Score 6646: 1661.0
Score 6647: 1409.0
Score 6648: 1738.0
Score 6649: 1624.0
Score 6650: 1742.0
Score 6651: 1219.0
Score 6652: 1124.0
Score 6653: 1823.0
Score 6654: 

Score 7034: 1804.0
Score 7035: 1628.0
Score 7036: 1706.0
Score 7037: 1675.0
Score 7038: 1701.0
Score 7039: 1828.0
Score 7040: 1870.0
Score 7041: 1809.0
Score 7042: 1414.0
Score 7043: 1351.0
Score 7044: 1499.0
Score 7045: 1389.0
Score 7046: 1792.0
Score 7047: 1588.0
Score 7048: 1372.0
Score 7049: 1382.0
Score 7050: 1700.0
Score 7051: 1467.0
Score 7052: 1499.0
Score 7053: 1664.0
Score 7054: 1795.0
Score 7055: 1827.0
Score 7056: 1607.0
Score 7057: 1751.0
Score 7058: 1890.0
Score 7059: 1638.0
Score 7060: 1444.0
Score 7061: 1855.0
Score 7062: 1804.0
Score 7063: 1745.0
Score 7064: 1976.0
Score 7065: 1616.0
Score 7066: 1503.0
Score 7067: 1661.0
Score 7068: 1747.0
Score 7069: 1257.0
Score 7070: 1636.0
Score 7071: 1528.0
Score 7072: 1707.0
Score 7073: 1426.0
Score 7074: 1859.0
Score 7075: 1617.0
Score 7076: 1724.0
Score 7077: 1836.0
Score 7078: 1935.0
Score 7079: 1719.0
Score 7080: 1734.0
Score 7081: 1556.0
Score 7082: 1491.0
Score 7083: 1407.0
Score 7084: 1444.0
Score 7085: 1603.0
Score 7086: 

In [None]:
# plot score values

plt.bar(*zip(*scores.items()))
plt.figure(figsize=(10,20))
plt.show()

In [None]:
# save final model

agent.save(DIR_NAME + '\\final.h5')

### Tracking Convergence

In [None]:
States_track

#### Epsilon-decay sample function

<div class="alert alert-block alert-info">
Try building a similar epsilon-decay function for your model.
</div>

In [None]:
# time_arr = np.arange(0,1000)
epsilon = []
for i in range(0,Episodes):
    # epsilon.append(0 + (1 - 0) * np.exp(-0.0008*i))
    epsilon.append(agent.get_epsilon(i))

plt.plot(np.arange(0,Episodes), epsilon)
plt.show()

In [None]:
len(state_space)