In [1]:
import pandas as pd
import numpy as np
from scipy.spatial import distance
from random import choice

# constants mapping to index values in the data frame
id_index = 0
x_cord_index = 1
y_cord_index = 2
money_index = 3
time_index = 4
ratio_index = 5
value_index = 6
speed = 30

df = pd.read_csv('bank_data.csv')

# Adding a column to represent the attractiveness of banks 
df['ratio'] = df['money'] / df['time (hr)']

# numpy array of indexed ratios
ratio = df['ratio'].to_numpy()

# data frame as an np array for faster manipulations
data = df.to_numpy()

# Indexed array of banks and their time to escape point ex: times_to_escape[bank_id] -> 0.10315893
coords = df[df.columns.difference(['id', 'money', 'time (hr)', 'ratio'])].to_numpy()
times_to_escape = np.apply_along_axis(lambda a : distance.euclidean(a, [0,0]) / speed, 1, coords)

# matrix of each bank and the distance to other banks (indexed by id along both axes)
distances = distance.squareform(distance.pdist(coords))

# matrix of each bank and the time to other banks (indexed by id along both axes)
times_to_point = distances / speed

In [2]:
from copy import deepcopy

def create_state_dict(visited, score, game_time):
    """
    takes all elements of a state
    returns a dictionary representation of that state
    """
    return {
            'visited' : visited,
            'score' : score,
            'game_time' : game_time,
        }

def get_ids_from_data(banks):
    """
    takes a list of visited bank information
    returns a lit of bank ids
    """
    return list(map(lambda x: x[id_index], banks))

class Board(object):
    def start(self):
        """
         Returns a representation of the starting state of a simulation(simulation are ran backwards)
         """
        return create_state_dict([], 0, 0)

    def next_state(self, state, play):
        """
        Takes the game state, and the move to be applied.
        Returns the new game state.
        """
        play = int(play)
        # On start we are at escape point. all
        if(len(state['visited']) == 0):
            return create_state_dict([play], data[play][money_index], data[play][time_index] + times_to_escape[play])
        
        distance_to_play = distance.euclidean(data[state['visited'][-1]][1:3], data[play][1:3])
        visited = [*state['visited'], play]
        score = state['score'] + data[play][money_index]
        game_time = state['game_time'] + data[play][time_index] + (distance_to_play / speed)

        return create_state_dict(visited, score, game_time)
        

    def legal_plays(self, state_history):
        """
        Takes a sequence of game states representing the full
        game history, and returns the full list of moves that are legal plays
        """
        # if game is in starting state all moves are legal
        if(len(state_history) is 1):
            return data
        # get copy of the times to go to banks
        
        times_to_bank_copy = deepcopy(times_to_point[state_history[-1]['visited'][id_index]])

        last_state = state_history[-1]  
        # any illegal moves becomes 0
        times_to_bank_copy[last_state['game_time'] + data[:, time_index] + times_to_bank_copy > 24] = 0

        for visit in state_history[-1]['visited']:
            times_to_bank_copy[visit] = 0

        # keep the indexes of legal moves        
        legal_moves = np.nonzero(times_to_bank_copy)[0]

        non_zero_legal_moves = np.take(data, legal_moves, axis=0)

        
        if(len(non_zero_legal_moves) > 0):
            return non_zero_legal_moves
        return []

    def winner(self, state_history):
        """
        takes the state_history
        returns a boolean indicating if the game is over
        """
    
        return len(self.legal_plays([state_history[-1]])) is 0

In [91]:
import datetime
import time, math
from copy import deepcopy
from random import choice
from joblib import Parallel, delayed

class Simulation(object):
    def __init__(self, states):
        self.states = states
        self.board = Board()
        self.values = {}
        self.legal_banks = deepcopy(self.board.legal_plays(states))
            
    def pick_next_move(self):

        def selection(current_node):
            simulated_step = [*deepcopy(self.states), self.board.next_state(self.states[-1], current_node[id_index])]

            first_legal_moves = self.board.legal_plays(simulated_step)
            max_first_move = first_legal_moves[np.argmax(first_legal_moves, axis=0)[ratio_index]][0]
            maxed_first_state = [*deepcopy(simulated_step), self.board.next_state(simulated_step[-1], max_first_move)]
            
            second_legal_moves = self.board.legal_plays(maxed_first_state)
            max_second_move = second_legal_moves[np.argmax(second_legal_moves, axis=0)[ratio_index]][0]
            maxed_second_state = [*deepcopy(maxed_first_state), self.board.next_state(maxed_first_state[-1], max_second_move)]

            self.values[current_node[id_index]] = maxed_second_state[-1]['score']

            
            
        Parallel(n_jobs=8, prefer="threads")(delayed(selection)(current_node) for current_node in self.legal_banks)
        next_move_values = np.array(list(self.values.items()))

        return next_move_values[np.argmax(next_move_values, axis=0)[1]][0]
        
        return 0

In [97]:
game = Board()
game_state = [{'visited': [], 'score': 0, 'game_time': 0}]

while(game.legal_plays(game_state) is not 0):
    simulation = Simulation(states=game_state)
    next_move = simulation.pick_next_move()
    
    game_state = [*game_state, game.next_state(game_state[-1], next_move)]
    print(next_move, game_state[-1]['score'], game_state[-1]['game_time'])

print(game_state[-1]['score'])

4848.0 102300.0 1.472894113566226
7265.0 204400.0 1.7189571988613603
8125.0 306400.0 2.007308961151784
5156.0 408300.0 3.0154127081491957
4937.0 510100.0 4.359602876022306
6439.0 611900.0 6.085522241657806
2707.0 713600.0 7.149821473489835
3061.0 815300.0 7.566962089648945
5884.0 916900.0 8.728496621212585
7421.0 1018500.0 10.046236510232024
7963.0 1120100.0 11.189698419352762
6019.0 1221600.0 11.573894750148371
6317.0 1323100.0 11.760035838479856
6333.0 1424600.0 13.087948913283274
8301.0 1526100.0 14.221002702916623
1437.0 1627500.0 14.848670112954284
1447.0 1728900.0 15.2007406277492
2524.0 1830300.0 16.176381720896693
3008.0 1931700.0 17.30798861270727
5176.0 2033100.0 18.952936457891273
6478.0 2134400.0 19.382482451532727
8066.0 2235700.0 20.18766529563417
4789.0 2336900.0 20.456769286145608
6694.0 2438100.0 21.8981944443315
7760.0 2539300.0 23.23448521474352


ValueError: attempt to get argmax of an empty sequence