## Reinforcement learning approach to bank robber game

This is an attempt at making a Reinforcement Learning algorithm for playing the bank robber game.



### 1. Importing the data and getting the nearest neighbors of each point

this is an expansive step but it simplifies random game generations


In [45]:
import pandas as pd
import numpy as np
from scipy.spatial import distance
from random import choice

df = pd.read_csv('bank_data.csv')
coords = df[df.columns.difference(['id', 'money', 'time (hr)'])].to_numpy()
closests = np.argsort(distance.squareform(distance.pdist(coords)), axis=1)

### 2. generating random games 

games can either be played from scratch or simulated for a specific state

In [114]:
import numpy as np
from random import choice
from scipy.spatial import distance

id_index = 0
speed = 30
x_cord_index = 1
y_cord_index = 2
money_index = 3
time_index = 4
simulation_depth = 5

class Game:
    def __init__(self, data, is_simulation=False, simulation_data={}):
        self.data = data

        # restore data if simulation data is provided
        if (is_simulation):
            self.visited = simulation_data.visited
            self.score = simulation_data.score
            self.time_left = simulation_data.time_left
            self.time_to_exit = simulation_data.time_to_exit
        # prep normal game if not simulation
        else:
            self.visited = [choice(self.data)] if is_simulation == False else simulation_data.visited
            self.score = self.visited[0][money_index]
            self.time_left = 24 - self.visited[0][time_index]
            self.time_to_exit = distance.euclidean(self.visited[-1][x_cord_index:y_cord_index], [0,0]) / speed

    def play_move(self):
        distances_from_next = closests[int(self.visited[-1][id_index])]
        closest_values = [self.data[i] for i in distances_from_next[:simulation_depth]]
        # pick randomly a next move
        next_move = choice(closest_values)
        distance_to_next = distance.euclidean(self.visited[-1][x_cord_index:y_cord_index], next_move[x_cord_index:y_cord_index])
        self.visited.append(next_move)
        # update the score
        self.score = self.score + next_move[money_index]
        self.time_left = self.time_left - next_move[time_index] - (distance_to_next / speed)

    def generate_game(self):
        while(self.time_to_exit < self.time_left):
            self.play_move()
        self.visited = self.visited[:-1]
        
        

    

new_game = Game(df.to_numpy())
new_game.generate_game()
new_game.visited
# print(new_game.score, new_game.time_left)



[array([ 6.50700000e+03, -4.42898544e-01,  3.98845533e+00,  1.42000000e+04,
         1.36609616e+00]),
 array([ 6.50700000e+03, -4.42898544e-01,  3.98845533e+00,  1.42000000e+04,
         1.36609616e+00]),
 array([ 1.64000000e+03, -4.78674251e-01,  3.96912396e+00,  2.00000000e+02,
         8.14737630e-01]),
 array([ 6.50700000e+03, -4.42898544e-01,  3.98845533e+00,  1.42000000e+04,
         1.36609616e+00]),
 array([ 7.87600000e+03, -5.81873686e-01,  3.95049108e+00,  1.51000000e+04,
         2.63820890e-01]),
 array([ 7.87600000e+03, -5.81873686e-01,  3.95049108e+00,  1.51000000e+04,
         2.63820890e-01]),
 array([ 1.64000000e+03, -4.78674251e-01,  3.96912396e+00,  2.00000000e+02,
         8.14737630e-01]),
 array([ 7.87600000e+03, -5.81873686e-01,  3.95049108e+00,  1.51000000e+04,
         2.63820890e-01]),
 array([ 3.01400000e+03, -5.93514794e-01,  4.03886585e+00,  9.00000000e+02,
         6.81133949e-01]),
 array([ 3.41100000e+03, -6.19096592e-01,  4.01227342e+00,  3.00000000e+0