## Reinforcement learning approach to bank robber game

This is an attempt at making a Reinforcement Learning algorithm for playing the bank robber game.



### 1. Importing the data and getting the nearest neighbors of each point

this is an expansive step but it simplifies random game generations


In [116]:
import pandas as pd
import numpy as np
from scipy.spatial import distance
from random import choice

df = pd.read_csv('bank_data.csv')
data = df.to_numpy()
coords = df[df.columns.difference(['id', 'money', 'time (hr)'])].to_numpy()
closests = np.argsort(distance.squareform(distance.pdist(coords)), axis=1)

### 2. generating random games 

games can either be played from scratch or simulated for a specific state

In [142]:
import numpy as np
from random import choice
from scipy.spatial import distance

id_index = 0
speed = 30
x_cord_index = 1
y_cord_index = 2
money_index = 3
time_index = 4
simulation_depth = 5

class Game:
    def __init__(self):
        self.data = data

        self.visited = [choice(self.data)] 
        self.score = self.visited[0][money_index]
        self.time_left = 24 - self.visited[0][time_index]
        self.time_to_exit = distance.euclidean(self.visited[-1][x_cord_index:y_cord_index], [0,0]) / speed

    def play_game(self):
            while(self.time_to_exit < self.time_left):
                self.play_move()
            self.visited = self.visited[:-1]

    def play_move(self):
        distances_from_next = closests[int(self.visited[-1][id_index])]
        closest_values = [self.data[i] for i in distances_from_next[:simulation_depth]]

        # pick randomly a next move
        next_move = choice(closest_values)

        distance_to_next = distance.euclidean(self.visited[-1][x_cord_index:y_cord_index], next_move[x_cord_index:y_cord_index])
        self.visited.append(next_move)
        # update the score
        self.score = self.score + next_move[money_index]
        self.time_left = self.time_left - next_move[time_index] - (distance_to_next / speed)
        self.time_to_exit = distance.euclidean(self.visited[-1][x_cord_index:y_cord_index], [0,0]) / speed

        
        

    

new_game = Game()
new_game.play_game()
new_game.score



423300.0