In [2]:
import pandas as pd
import numpy as np
from scipy.spatial import distance
from random import choice

df = pd.read_csv('bank_data.csv')
df['ratio'] = df['money'] / df['time (hr)']

data = df.to_numpy()
data_by_ratio = data[data[:,2].argsort()]
coords = df[df.columns.difference(['id', 'money', 'time (hr)'])].to_numpy()
closests = np.argsort(distance.squareform(distance.pdist(coords)), axis=1)

In [None]:
# make matrix state/reward
# make Q matrix
# make gamma learning parameter
# make random initial position

### 2. generating random games 

games can either be played from scratch or simulated for a specific state

In [3]:
import numpy as np
from random import choice
from copy import deepcopy
from scipy.spatial import distance

id_index = 0
x_cord_index = 1
y_cord_index = 2
money_index = 3
time_index = 4
ratio_index = 5
speed = 30
simulations_amout = 100
simulation_depth = 100

actions = ['random', money_index, ratio_index]

class Game:
    def __init__(self, restore=False, state={}):
        self.data = data

        if(restore):
            local_state = deepcopy(state)
            self.visited = local_state['visited']
            self.score = local_state['score']
            self.time_left = local_state['time_left']
            self.time_to_exit = local_state['time_to_exit']
        else:
            # todo: rework to chose randomly from interesting regions
            # self.visited = [choice(data_by_ratio[0:200])]
            self.visited = [choice(self.data)] 
            self.score = self.visited[0][money_index]
            self.actions = ['random']
            self.time_left = 24 - self.visited[0][time_index]
            self.time_to_exit = distance.euclidean(self.visited[-1][x_cord_index:y_cord_index], [0,0]) / speed
        self.visited_dict = {}

        for visit in self.visited:
            self.visited_dict[visit[id_index]] = True

        self.game_ended = False

    def get_state(self):
        return {
            'visited': self.visited,
            'score': self.score,
            'time_left': self.time_left,
            'time_to_exit': self.time_to_exit
        }

    def get_visited_ids(self):
        return list(map(lambda x: x[id_index], self.visited))

    def play_move(self, move):
        # don't let move when game is over
        if (self.time_to_exit > self.time_left):
            self.game_ended = True
            return

        next_move = move
        distance_to_next = distance.euclidean(self.visited[-1][x_cord_index:y_cord_index], next_move[x_cord_index:y_cord_index])
        self.visited.append(next_move)
        self.visited_dict[next_move[id_index]] = True
        # update the score
        self.score = self.score + next_move[money_index]
        self.time_left = self.time_left - next_move[time_index] - (distance_to_next / speed)
        self.time_to_exit = distance.euclidean(self.visited[-1][x_cord_index:y_cord_index], [0,0]) / speed
        

In [4]:
class Simulated_Game(Game):
    def play_random_game(self):
            while(self.time_to_exit < self.time_left):
                self.play_move(self.choose_move_type())
            self.visited = self.visited[:-1]
            # end game
            self.game_ended = True
    
    def choose_move_type(self):
        move_type = choice(actions)
        self.actions.append(move_type)
        distances_from_next = closests[int(self.visited[-1][id_index])]
        closest_values = [self.data[i] for i in distances_from_next[:simulation_depth] if self.data[i][id_index] not in self.visited_dict]

        if (move_type == 'random'):
            return choice(closest_values)
        # return the highest value based on the current move type selected
        return closest_values[np.argmax(closest_values, axis=0)[move_type]]

# helpers
def get_current_policy(ratio_score, random_score):
    maximum = max(ratio_score, random_score)

    if(maximum == ratio_score):
        return ratio_index

    if(maximum == random_score):
        return 'random'


In [5]:


def simple_simulator():
    game = Simulated_Game()
    for i in range(100):
            temp_game = Simulated_Game()
            temp_game.play_random_game()
            if(temp_game.score > game.score):
                game = temp_game
    return game


g = simple_simulator()
print(g.get_visited_ids(), "${:,.2f}".format(g.score))


[4610.0, 8949.0, 3136.0, 9640.0, 8125.0, 2458.0, 5971.0, 7265.0, 4494.0, 8870.0, 4629.0, 8295.0, 3239.0, 6934.0, 4293.0, 2626.0, 5944.0, 4291.0, 6180.0, 2425.0, 1844.0, 2185.0, 4723.0, 9928.0, 8829.0, 6216.0, 670.0, 7769.0, 5184.0, 8841.0, 4359.0, 3683.0, 8407.0, 2643.0, 6317.0, 5631.0, 6478.0, 1447.0, 9469.0, 5155.0, 517.0, 5719.0, 70.0, 1914.0, 865.0, 4499.0, 4789.0, 5622.0, 8375.0, 9378.0, 5356.0, 6281.0, 1053.0, 1684.0, 7772.0, 6375.0, 1372.0, 5563.0, 557.0, 7649.0, 7665.0, 279.0, 6156.0, 2769.0, 1676.0, 232.0, 3007.0, 2028.0, 4036.0, 8503.0, 3216.0, 6876.0, 5836.0, 7625.0, 4362.0, 488.0, 6623.0, 4807.0, 6535.0, 3516.0, 6104.0, 2346.0, 4906.0, 5399.0, 5627.0, 7595.0, 7087.0, 8231.0, 4345.0, 5296.0, 507.0, 5562.0, 3026.0, 5725.0, 7689.0, 2243.0, 2037.0, 2331.0, 8525.0, 3193.0, 9736.0, 4696.0, 8287.0, 3089.0, 3005.0, 7583.0, 2729.0, 58.0, 1455.0, 2442.0, 5295.0, 4234.0, 7801.0, 9049.0, 1997.0, 781.0, 1599.0, 9170.0, 9120.0, 7494.0, 9290.0, 9779.0, 2.0, 5381.0, 3340.0, 839.0, 1860.0, 