In [48]:
import pandas as pd
import numpy as np
from scipy.spatial import distance
from random import choice

df = pd.read_csv('bank_data.csv')
df['ratio'] = df['money'] / df['time (hr)']

data = df.to_numpy()
data_by_ratio = data[data[:,2].argsort()]
coords = df[df.columns.difference(['id', 'money', 'time (hr)'])].to_numpy()
closests = np.argsort(distance.squareform(distance.pdist(coords)), axis=1)

In [None]:
# make matrix state/reward
# make Q matrix
# make gamma learning parameter
# make random initial position

### 2. generating random games 

games can either be played from scratch or simulated for a specific state

In [82]:
import numpy as np
from random import choice
from copy import deepcopy
from scipy.spatial import distance

id_index = 0
speed = 30
x_cord_index = 1
y_cord_index = 2
money_index = 3
time_index = 4
ratio_index = 5
simulations_amout = 100
simulation_depth = 100

actions = ['random', money_index, ratio_index]

class Game:
    def __init__(self, restore=False, state={}):
        self.data = data

        if(restore):
            local_state = deepcopy(state)
            self.visited = local_state['visited']
            self.score = local_state['score']
            self.time_left = local_state['time_left']
            self.time_to_exit = local_state['time_to_exit']
        else:
            # todo: rework to chose randomly from interesting regions
            # self.visited = [choice(data_by_ratio[0:200])]
            self.visited = [choice(self.data)] 
            self.score = self.visited[0][money_index]
            self.actions = ['random']
            self.time_left = 24 - self.visited[0][time_index]
            self.time_to_exit = distance.euclidean(self.visited[-1][x_cord_index:y_cord_index], [0,0]) / speed
        self.visited_dict = {}

        for visit in self.visited:
            self.visited_dict[visit[id_index]] = True

        self.game_ended = False

    def get_state(self):
        return {
            'visited': self.visited,
            'score': self.score,
            'time_left': self.time_left,
            'time_to_exit': self.time_to_exit
        }

    def get_visited_ids(self):
        return list(map(lambda x: x[id_index], self.visited))

    def play_move(self, move):
        # don't let move when game is over
        if (self.time_to_exit > self.time_left):
            self.game_ended = True
            return

        next_move = move
        distance_to_next = distance.euclidean(self.visited[-1][x_cord_index:y_cord_index], next_move[x_cord_index:y_cord_index])
        self.visited.append(next_move)
        self.visited_dict[next_move[id_index]] = True
        # update the score
        self.score = self.score + next_move[money_index]
        self.time_left = self.time_left - next_move[time_index] - (distance_to_next / speed)
        self.time_to_exit = distance.euclidean(self.visited[-1][x_cord_index:y_cord_index], [0,0]) / speed
        

In [83]:
class Simulated_Game(Game):
    def play_random_game(self):
            while(self.time_to_exit < self.time_left):
                self.play_move(self.choose_move_type())
            self.visited = self.visited[:-1]
            # end game
            self.game_ended = True
    
    def choose_move_type(self):
        move_type = choice(actions)
        self.actions.append(move_type)
        distances_from_next = closests[int(self.visited[-1][id_index])]
        closest_values = [self.data[i] for i in distances_from_next[:simulation_depth] if self.data[i][id_index] not in self.visited_dict]

        if (move_type == 'random'):
            return choice(closest_values)
        # return the highest value based on the current move type selected
        return closest_values[np.argmax(closest_values, axis=0)[move_type]]

# helpers
def get_current_policy(ratio_score, random_score):
    maximum = max(ratio_score, random_score)

    if(maximum == ratio_score):
        return ratio_index

    if(maximum == random_score):
        return 'random'


In [85]:


def simple_simulator():
    game = Simulated_Game()
    for i in range(1000):
            temp_game = Simulated_Game()
            temp_game.play_random_game()
            if(temp_game.score > game.score):
                game = temp_game
    return game


g = simple_simulator()
print(len(g.get_visited_ids()), "${:,.2f}".format(g.score))


125 $8,191,300.00
