### Preparation

the first code cell prepares the data for faster access and provides helper lists 

In [1]:
import pandas as pd
import numpy as np
from scipy.spatial import distance
from random import choice

# constants mapping to index values in the data frame
id_index = 0
x_cord_index = 1
y_cord_index = 2
money_index = 3
time_index = 4
ratio_index = 5
speed = 30

df = pd.read_csv('bank_data.csv')

# Adding a column to represent the attractiveness of banks 
df['ratio'] = df['money'] / df['time (hr)']

# numpy array of indexed ratios
ratio = df['ratio'].to_numpy()

# data frame as an np array for faster manipulations
data = df.to_numpy()

# Indexed array of banks and their time to escape point ex: times_to_escape[bank_id] -> 0.10315893
coords = df[df.columns.difference(['id', 'money', 'time (hr)', 'ratio'])].to_numpy()
times_to_escape = np.apply_along_axis(lambda a : distance.euclidean(a, [0,0]) / speed, 1, coords)

# matrix of each bank and the distance to other banks (indexed by id along both axes)
distances = distance.squareform(distance.pdist(coords))

# matrix of each bank and the time to other banks (indexed by id along both axes)
times_to_point = distances / speed

### Game board class

We define a class representing a game board. It shows if the game is over, legal_moves at a specific game state and generates a starting state.
the game is simulated in reverse

In [40]:
from copy import deepcopy

def create_state_dict(visited, score, game_time):
    """
    takes all elements of a state
    returns a dictionary representation of that state
    """
    return {
            'visited' : visited,
            'score' : score,
            'game_time' : game_time,
        }

def get_ids_from_data(banks):
    """
    takes a list of visited bank information
    returns a lit of bank ids
    """
    return list(map(lambda x: x[id_index], banks))

class Board(object):
    def start(self):
        """
         Returns a representation of the starting state of a simulation(simulation are ran backwards)
         """
        return create_state_dict([], 0, 0)

    def next_state(self, state, play):
        """
        Takes the game state, and the move to be applied.
        Returns the new game state.
        """
        # On start we are at escape point. all
        if(len(state['visited']) == 0):
            print(play)
            play = int(play)
            print(state['visited'])
            return create_state_dict([play], data[play][money_index], data[play][time_index] + times_to_escape[play])

        distance_to_play = distance.euclidean(data[state['visited'][-1]][1:2], data[play][1:2])
        visited = [*state['visited'], play]
        score = state['score'] + data[play][money_index]
        game_time = state['game_time'] + data[play][time_index] + (distance_to_play / speed)

        return create_state_dict(visited, score, game_time)
        

    def legal_plays(self, state_history):
        """
        Takes a sequence of game states representing the full
        game history, and returns the full list of moves that are legal plays
        """
        # if game is in starting state all moves are legal
        if(len(state_history) is 1):
            return get_ids_from_data(data)
        # get copy of the times to go to banks
        times_to_bank_copy = deepcopy(times_to_point[state_history[-1]['visited'][id_index]])
        last_state = state_history[-1]  
        # any illegal moves becomes 0
        times_to_bank_copy[last_state['game_time'] + data[:, time_index] + times_to_bank_copy > 24] = 0
        # keep the indexes of legal moves
        legal_moves = np.nonzero(times_to_bank_copy)[0]
        # print(legal_moves)
        if(len(legal_moves) > 0):
            return legal_moves[:-1]
        return []

    def winner(self, state_history):
        """
        takes the state_history
        returns a boolean indicating if the game is over
        """
        return len(self.legal_plays(state_history[-1])) is 0


# game = Board()
# start_state = game.start()
# first_move_state = game.next_state(start_state, 10)

# print(game.legal_plays([start_state, first_move_state]))
# print(first_move_state)

### Monte carlo class
 

In [44]:
import datetime
from random import choice
from __future__ import division

class MonteCarlo(object):
    def __init__(self, board, **kwargs):
        self.board = board
        self.states = [board.start()]
        self.max_moves = kwargs.get('max_moves', 100)
        seconds = kwargs.get('time', 30)
        self.plays = set()
        self.calculation_time = datetime.timedelta(seconds=seconds)
        pass

    def update(self, state):
        self.states.append(state)

    def get_play(self):
        self.max_depth = 0
        state = self.states[-1]
        legal = self.board.legal_plays(self.states[:])
        state_id = hash(str(state['visited']))

        # Bail out early if there is no real choice to be made.
        if not legal:
            return
        if len(legal) == 1:
            return legal[0]

        games = 0
        begin = datetime.datetime.utcnow()
        while datetime.datetime.utcnow() - begin < self.calculation_time:
            self.run_simulation()
            games += 1

        moves_states = [(p, self.board.next_state(state, p)) for p in legal]

        # Display the number of calls of `run_simulation` and the
        # time elapsed.
        print (games, datetime.datetime.utcnow() - begin)

        # Pick the move with the highest pscore
        print(state.plays)
        # move = max(
        #     (
        #      self.plays.get((player, S), 1),
        #      p)
        #     for p, S in moves_states
        # )

        return 1

    def run_simulation(self):
        visited_states = set()
        states_copy = self.states[:]
        
        state = states_copy[-1]

        expand = True
        for t in range(self.max_moves):
            legal = self.board.legal_plays(states_copy)

            play = choice(legal)
            
            state = self.board.next_state(state, play)
            states_copy.append(state)
            state_id = hash(str(state['visited']))

            
            if expand and (state_id, state) not in self.plays:
                expand = False
                self.plays[(state_id, state)] = 0

            visited_states.add((state_id, state))

            winner = self.board.winner(states_copy)
            if winner:
                break

        for state_id, state in visited_states:
            if (state_id, state) not in self.plays:
                continue
            self.plays[(state_id, state)] += 1

test = MonteCarlo(Board())
test.run_simulation()
print(test.states)

2770.0
[]


TypeError: unhashable type: 'dict'