## Reinforcement learning approach to bank robber game

This is an attempt at making a Reinforcement Learning algorithm for playing the bank robber game.



### 1. Importing the data and getting the nearest neighbors of each point

this is an expansive step but it simplifies random game generations


In [350]:
import pandas as pd
import numpy as np
from scipy.spatial import distance
from random import choice

df = pd.read_csv('bank_data.csv')
df['ratio'] = df['money'] / df['time (hr)']
data = df.to_numpy()
coords = df[df.columns.difference(['id', 'money', 'time (hr)'])].to_numpy()
closests = np.argsort(distance.squareform(distance.pdist(coords)), axis=1)

        id  x_coordinate  y_coordinate  money  time (hr)          ratio
0        0      2.851925      1.201712  29700   0.273403  108630.921470
1        1      4.150372     -2.654334   6500   0.064040  101499.543603
2        2     -1.494092     -1.230419  89400   0.127458  701405.770444
3        3      1.271326     -0.088520  96100   1.315029   73078.228687
4        4      2.471113     -0.592810  41100   0.164393  250010.909373
...    ...           ...           ...    ...        ...            ...
9995  9995     -2.472127      4.787304   2100   0.494904    4243.243258
9996  9996      1.034968     -4.321240  54900   0.492866  111389.317396
9997  9997     -0.512720     -2.949173  32200   0.933033   34511.098257
9998  9998      2.626841     -3.482923   5100   0.551312    9250.652010
9999  9999     -3.992507      0.090112  21000   1.107299   18965.059907

[10000 rows x 6 columns]


### 2. generating random games 

games can either be played from scratch or simulated for a specific state

In [435]:
import numpy as np
from random import choice
from scipy.spatial import distance

id_index = 0
speed = 30
x_cord_index = 1
y_cord_index = 2
money_index = 3
time_index = 4
ratio_index = 5
simulation_depth = 1500


class Game:
    def __init__(self, restore=False, state={}):
        self.data = data
        if(restore):
            self.visited = state['visited']
            self.score = state['score']
            self.time_left = state['time_left']
            self.time_to_exit = state['time_to_exit']
        else:
            self.visited = [choice(self.data)] 
            self.score = self.visited[0][money_index]
            self.time_left = 24 - self.visited[0][time_index]
            self.time_to_exit = distance.euclidean(self.visited[-1][x_cord_index:y_cord_index], [0,0]) / speed
        self.visited_dict = {}
        for visit in self.visited:
            self.visited_dict[visit[id_index]] = True
        self.lost = False

    def get_state(self):
        return {
            'visited': self.visited,
            'score': self.score,
            'time_left': self.time_left,
            'time_to_exit': self.time_to_exit
        }

    def is_already_visited(self, bank):
        return self.visited.count(bank) > 0

    def get_visited_ids(self):
        return list(map(lambda x: x[id_index], self.visited))

    def play_move(self, move):
        next_move = move
        distance_to_next = distance.euclidean(self.visited[-1][x_cord_index:y_cord_index], next_move[x_cord_index:y_cord_index])
        self.visited.append(next_move)
        self.visited_dict[next_move[id_index]] = True
        # update the score
        self.score = self.score + next_move[money_index]
        self.time_left = self.time_left - next_move[time_index] - (distance_to_next / speed)
        self.time_to_exit = distance.euclidean(self.visited[-1][x_cord_index:y_cord_index], [0,0]) / speed
        if (self.time_to_exit > self.time_left):
            self.lost = True



        

In [440]:
class Simulated_Game(Game):
    def play_random_game(self):
            while(self.time_to_exit < self.time_left):
                self.play_move(self.play_deterministic_move())
            self.visited = self.visited[:-1]
    
    def play_deterministic_move(self):
        distances_from_next = closests[int(self.visited[-1][id_index])]
        closest_values = [self.data[i] for i in distances_from_next[:simulation_depth] if self.data[i][id_index] not in self.visited_dict]
        return closest_values[np.argmax(closest_values, axis=0)[ratio_index]]
        

best_game = Simulated_Game()
best_game.play_deterministic_move()
state = best_game.get_state()
best_game.play_random_game()
for i in range(100):
    new_game = Simulated_Game(restore=True, state=state)
    new_game.play_random_game()
    if(new_game.score > best_game.score):
        best_game = new_game


print(best_game.get_visited_ids(), "${:,.2f}".format(best_game.score))


ValueError: attempt to get argmax of an empty sequence