### Preparation

the first code cell prepares the data for faster access and provides helper lists 

In [61]:
import pandas as pd
import numpy as np
from scipy.spatial import distance
from random import choice

# constants mapping to index values in the data frame
id_index = 0
x_cord_index = 1
y_cord_index = 2
money_index = 3
time_index = 4
ratio_index = 5
value_index = 6
speed = 30

df = pd.read_csv('bank_data.csv')

# Adding a column to represent the attractiveness of banks 
df['ratio'] = df['money'] / df['time (hr)']

# numpy array of indexed ratios
ratio = df['ratio'].to_numpy()

# data frame as an np array for faster manipulations
data = df.to_numpy()

# Indexed array of banks and their time to escape point ex: times_to_escape[bank_id] -> 0.10315893
coords = df[df.columns.difference(['id', 'money', 'time (hr)', 'ratio'])].to_numpy()
times_to_escape = np.apply_along_axis(lambda a : distance.euclidean(a, [0,0]) / speed, 1, coords)

# matrix of each bank and the distance to other banks (indexed by id along both axes)
distances = distance.squareform(distance.pdist(coords))

# matrix of each bank and the time to other banks (indexed by id along both axes)
times_to_point = distances / speed

### Game board class

We define a class representing a game board. It shows if the game is over, legal_moves at a specific game state and generates a starting state.
the game is simulated in reverse

In [213]:
from copy import deepcopy

def create_state_dict(visited, score, game_time):
    """
    takes all elements of a state
    returns a dictionary representation of that state
    """
    return {
            'visited' : visited,
            'score' : score,
            'game_time' : game_time,
        }

def get_ids_from_data(banks):
    """
    takes a list of visited bank information
    returns a lit of bank ids
    """
    return list(map(lambda x: x[id_index], banks))

class Board(object):
    def start(self):
        """
         Returns a representation of the starting state of a simulation(simulation are ran backwards)
         """
        return create_state_dict([], 0, 0)

    def next_state(self, state, play):
        """
        Takes the game state, and the move to be applied.
        Returns the new game state.
        """
        play = int(play)
        # On start we are at escape point. all
        if(len(state['visited']) == 0):
            return create_state_dict([play], data[play][money_index], data[play][time_index] + times_to_escape[play])
        
        distance_to_play = distance.euclidean(data[state['visited'][-1]][1:3], data[play][1:3])
        visited = [*state['visited'], play]
        score = state['score'] + data[play][money_index]
        game_time = state['game_time'] + data[play][time_index] + (distance_to_play / speed)

        return create_state_dict(visited, score, game_time)
        

    def legal_plays(self, state_history):
        """
        Takes a sequence of game states representing the full
        game history, and returns the full list of moves that are legal plays
        """
        # if game is in starting state all moves are legal
        if(len(state_history) is 1):
            return data
        # get copy of the times to go to banks
        
        times_to_bank_copy = deepcopy(times_to_point[state_history[-1]['visited'][id_index]])

        last_state = state_history[-1]  
        # any illegal moves becomes 0
        times_to_bank_copy[last_state['game_time'] + data[:, time_index] + times_to_bank_copy > 24] = 0

        for visit in state_history[-1]['visited']:
            times_to_bank_copy[visit] = 0

        # keep the indexes of legal moves        
        legal_moves = np.nonzero(times_to_bank_copy)[0]

        non_zero_legal_moves = np.take(data, legal_moves, axis=0)

        
        if(len(non_zero_legal_moves) > 0):
            return non_zero_legal_moves
        return []

    def winner(self, state_history):
        """
        takes the state_history
        returns a boolean indicating if the game is over
        """
    
        return len(self.legal_plays([state_history[-1]])) is 0

### Monte carlo class
 

In [217]:
import datetime
import time, math
from copy import deepcopy
from random import choice
from joblib import Parallel, delayed

class Monte_Carlo(object):
    def __init__(self, states):
        self.states = states
        self.board = Board()
        self.child_nodes = deepcopy(self.board.legal_plays(states))
        self.expanded_states = {}
        self.selection_depth = 2
        self.selection_amount = -10
            
    def pick_next_move(self):
        values = np.zeros(len(self.child_nodes))

        # select the most promising moves
        # for each bank the selection will play 2 steps of best possible position
        def selection(i):
            state_copy = deepcopy(self.states)
            next_state = self.board.next_state(state_copy[-1], data[i][id_index])
            state_copy = [*state_copy, next_state]
            for j in range(self.selection_depth):
                legal = self.board.legal_plays(state_copy)
                max_move = legal[np.argmax(legal, axis=0)[ratio_index]][0]
                state_copy = [*state_copy, self.board.next_state(state_copy[-1], max_move)] 
            values[i] = state_copy[-1]['score'] 
            
        Parallel(n_jobs=8, prefer="threads")(delayed(selection)(i) for i in range(len(self.child_nodes)))

        hundred_highest_scores = np.argpartition(values, self.selection_amount)[self.selection_amount:]
        self.child_nodes = np.take(self.child_nodes, hundred_highest_scores, axis=0)

        print(self.child_nodes)
        # the best 100 nodes will be expanded with each 10 positions
        def expension(i):
            new_state = [*self.states, self.board.next_state(self.states[-1], self.child_nodes[i][id_index])]
            legal = self.board.legal_plays(new_state)
            best_legal = legal[np.argsort(legal[:, ratio_index])][0:5]
            for move in best_legal:
                state_to_add = [*new_state, self.board.next_state(new_state[-1], move[id_index])]
                self.expanded_states[id(state_to_add)] = state_to_add
            
        Parallel(n_jobs=8, prefer="threads")(delayed(expension)(i) for i in range(len(self.child_nodes)))

        # will simulate random games until completion on expanded game states
        def simulation(state_id):
            game_state = [*self.expanded_states[state_id]]
            while(len(self.board.legal_plays(game_state)) is not 0):
                legal_moves = self.board.legal_plays(game_state)
                game_state = [*game_state, self.board.next_state(game_state[-1], choice(legal_moves)[id_index])]
            self.expanded_states[state_id] = (game_state[2]['visited'][0], game_state[-1]['score'])
        
        Parallel(n_jobs=8, prefer="threads")(delayed(simulation)(i) for i in self.expanded_states.keys())
        
        potential_moves = np.array(list(self.expanded_states.values()))
        max_move = potential_moves[np.argmax(potential_moves, axis=0)[1]][0]
        return max_move

        
game = Board()
game_state = [game.start()]
legal_plays = game.legal_plays([{'visited': [], 'score': 0, 'game_time': 0}, {'visited': [2707], 'score': 101700.0, 'game_time': 0.984989208647197}])
# print(legal_plays[2706][0])
# print(legal_plays[2707][0])
# print(len(legal_plays))

for i in range(1):
    simulation = Monte_Carlo(states=game_state)
    next_move = simulation.pick_next_move()
    game_state = [*game_state, game.next_state(game_state[-1], next_move)]



[[ 7.42100000e+03  1.68537690e+00  5.60147558e-01  1.01600000e+05
   1.15750491e+00  8.77750056e+04]
 [ 4.93700000e+03  1.61811060e-01  3.85946885e+00  1.01800000e+05
   1.25218786e+00  8.12977059e+04]
 [ 8.12500000e+03  2.13831936e+00  1.02577103e+00  1.02000000e+05
   1.38833505e-01  7.34692969e+05]
 [ 3.06100000e+03  2.95612607e+00  6.86596150e-01  1.01700000e+05
   3.34573548e-01  3.03969039e+05]
 [ 7.26500000e+03 -2.25961615e+00  1.90798501e+00  1.02100000e+05
   1.34392997e-01  7.59712206e+05]
 [ 6.43900000e+03  2.77659073e+00 -4.69853011e+00  1.01800000e+05
   1.42763460e+00  7.13067613e+04]
 [ 7.96300000e+03 -1.29304627e+00  2.38415738e+00  1.01600000e+05
   1.02704305e+00  9.89247728e+04]
 [ 2.70700000e+03  4.43352120e+00  2.67478435e+00  1.01700000e+05
   8.12392757e-01  1.25185754e+05]
 [ 4.84800000e+03  1.07953939e+00  1.63737823e+00  1.02300000e+05
   1.40751987e+00  7.26810344e+04]
 [ 5.15600000e+03  2.89903907e+00  4.21382148e+00  1.01900000e+05
   8.98851947e-01  1.1336