In [4]:
import numpy as np

In [5]:
import math


class Reward:
    def __init__(self, verbose=False):
        self.first_racingpoint_index = None
        self.verbose = verbose

    def reward_function(self, params):

        # Import package (needed for heading)
        #import math

        ################## HELPER FUNCTIONS ###################

        def dist_2_points(x1, x2, y1, y2):
            return abs(abs(x1-x2)**2 + abs(y1-y2)**2)**0.5

        def closest_2_racing_points_index(racing_coords, car_coords):

            # Calculate all distances to racing points
            distances = []
            for i in range(len(racing_coords)):
                distance = dist_2_points(x1=racing_coords[i][0], x2=car_coords[0],
                                         y1=racing_coords[i][1], y2=car_coords[1])
                distances.append(distance)

            # Get index of the closest racing point
            closest_index = distances.index(min(distances))

            # Get index of the second closest racing point
            distances_no_closest = distances.copy()
            distances_no_closest[closest_index] = 999
            second_closest_index = distances_no_closest.index(
                min(distances_no_closest))

            return [closest_index, second_closest_index]

        def dist_to_racing_line(closest_coords, second_closest_coords, car_coords):
            
            # Calculate the distances between 2 closest racing points
            a = abs(dist_2_points(x1=closest_coords[0],
                                  x2=second_closest_coords[0],
                                  y1=closest_coords[1],
                                  y2=second_closest_coords[1]))

            # Distances between car and closest and second closest racing point
            b = abs(dist_2_points(x1=car_coords[0],
                                  x2=closest_coords[0],
                                  y1=car_coords[1],
                                  y2=closest_coords[1]))
            c = abs(dist_2_points(x1=car_coords[0],
                                  x2=second_closest_coords[0],
                                  y1=car_coords[1],
                                  y2=second_closest_coords[1]))

            # Calculate distance between car and racing line (goes through 2 closest racing points)
            # try-except in case a=0 (rare bug in DeepRacer)
            try:
                distance = abs(-(a**4) + 2*(a**2)*(b**2) + 2*(a**2)*(c**2) -
                               (b**4) + 2*(b**2)*(c**2) - (c**4))**0.5 / (2*a)
            except:
                distance = b

            return distance

        # Calculate which one of the closest racing points is the next one and which one the previous one
        def next_prev_racing_point(closest_coords, second_closest_coords, car_coords, heading):

            # Virtually set the car more into the heading direction
            heading_vector = [math.cos(math.radians(
                heading)), math.sin(math.radians(heading))]
            new_car_coords = [car_coords[0]+heading_vector[0],
                              car_coords[1]+heading_vector[1]]

            # Calculate distance from new car coords to 2 closest racing points
            distance_closest_coords_new = dist_2_points(x1=new_car_coords[0],
                                                        x2=closest_coords[0],
                                                        y1=new_car_coords[1],
                                                        y2=closest_coords[1])
            distance_second_closest_coords_new = dist_2_points(x1=new_car_coords[0],
                                                               x2=second_closest_coords[0],
                                                               y1=new_car_coords[1],
                                                               y2=second_closest_coords[1])

            if distance_closest_coords_new <= distance_second_closest_coords_new:
                next_point_coords = closest_coords
                prev_point_coords = second_closest_coords
            else:
                next_point_coords = second_closest_coords
                prev_point_coords = closest_coords

            return [next_point_coords, prev_point_coords]

        def racing_direction_diff(closest_coords, second_closest_coords, car_coords, heading):

            # Calculate the direction of the center line based on the closest waypoints
            next_point, prev_point = next_prev_racing_point(closest_coords,
                                                            second_closest_coords,
                                                            car_coords,
                                                            heading)

            # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
            track_direction = math.atan2(
                next_point[1] - prev_point[1], next_point[0] - prev_point[0])

            # Convert to degree
            track_direction = math.degrees(track_direction)

            # Calculate the difference between the track direction and the heading direction of the car
            direction_diff = abs(track_direction - heading)
            if direction_diff > 180:
                direction_diff = 360 - direction_diff

            return direction_diff

        # Gives back indexes that lie between start and end index of a cyclical list 
        # (start index is included, end index is not)
        def indexes_cyclical(start, end, array_len):

            if end < start:
                end += array_len

            return [index % array_len for index in range(start, end)]

        # Calculate how long car would take for entire lap, if it continued like it did until now
        def projected_time(first_index, closest_index, step_count, times_list):

            # Calculate how much time has passed since start
            current_actual_time = (step_count-1) / 15

            # Calculate which indexes were already passed
            indexes_traveled = indexes_cyclical(first_index, closest_index, len(times_list))

            # Calculate how much time should have passed if car would have followed optimals
            current_expected_time = sum([times_list[i] for i in indexes_traveled])

            # Calculate how long one entire lap takes if car follows optimals
            total_expected_time = sum(times_list)

            # Calculate how long car would take for entire lap, if it continued like it did until now
            try:
                projected_time = (current_actual_time/current_expected_time) * total_expected_time
            except:
                projected_time = 9999

            return projected_time

        #################### RACING LINE ######################

        # Optimal racing line for the Spain track
        # Each row: [x,y,speed,timeFromPreviousPoint]
        racing_track = [[3.06664, 0.69989, 2.3, 0.06356],
                        [3.21372, 0.69357, 2.3, 0.064],
                        [3.36169, 0.6893, 2.3, 0.06436],
                        [3.51032, 0.68657, 2.3, 0.06463],
                        [3.6594, 0.68496, 2.3, 0.06482],
                        [3.8088, 0.68412, 2.3, 0.06496],
                        [3.9584, 0.68379, 2.3, 0.06504],
                        [4.10793, 0.68414, 2.3, 0.06501],
                        [4.25712, 0.68535, 2.3, 0.06487],
                        [4.40585, 0.68761, 2.3, 0.06467],
                        [4.55396, 0.69115, 2.3, 0.06442],
                        [4.70133, 0.69619, 2.3, 0.06411],
                        [4.84783, 0.70293, 2.3, 0.06376],
                        [4.99331, 0.71158, 2.3, 0.06337],
                        [5.13763, 0.72237, 2.3, 0.06292],
                        [5.28066, 0.73548, 2.3, 0.06245],
                        [5.42227, 0.75106, 2.27233, 0.06269],
                        [5.56233, 0.76926, 2.04347, 0.06912],
                        [5.70059, 0.79043, 1.82599, 0.0766],
                        [5.83677, 0.81492, 1.61765, 0.08553],
                        [5.97044, 0.84325, 1.43322, 0.09534],
                        [6.10109, 0.87602, 1.26705, 0.10631],
                        [6.22807, 0.91394, 1.10213, 0.12023],
                        [6.35051, 0.95783, 1.10213, 0.11802],
                        [6.46729, 1.00867, 1.10213, 0.11557],
                        [6.57689, 1.06758, 1.10213, 0.11289],
                        [6.67731, 1.1357, 1.0, 0.12134],
                        [6.76588, 1.21406, 1.0, 0.11827],
                        [6.83839, 1.3035, 1.0, 0.11513],
                        [6.8965, 1.40041, 1.0, 0.113],
                        [6.94112, 1.50274, 1.0, 0.11163],
                        [6.96947, 1.60974, 1.0, 0.11069],
                        [6.97707, 1.71948, 1.08365, 0.10151],
                        [6.96702, 1.82873, 1.0957, 0.10013],
                        [6.94149, 1.93565, 1.0957, 0.10032],
                        [6.90175, 2.03894, 1.0957, 0.10101],
                        [6.84699, 2.13674, 1.0957, 0.1023],
                        [6.77574, 2.22619, 1.21092, 0.09444],
                        [6.69117, 2.307, 1.34777, 0.08679],
                        [6.5958, 2.37958, 1.50431, 0.07967],
                        [6.49161, 2.44467, 1.71585, 0.07159],
                        [6.38049, 2.50335, 1.92639, 0.06523],
                        [6.26371, 2.5565, 2.19142, 0.05855],
                        [6.14243, 2.60505, 2.3, 0.0568],
                        [6.01777, 2.65004, 2.3, 0.05762],
                        [5.89082, 2.69257, 2.3, 0.05821],
                        [5.76272, 2.73384, 2.3, 0.05852],
                        [5.63017, 2.77782, 2.3, 0.06072],
                        [5.49811, 2.82317, 2.3, 0.06071],
                        [5.36667, 2.87018, 2.3, 0.06069],
                        [5.23602, 2.9192, 2.3, 0.06067],
                        [5.10632, 2.97055, 2.3, 0.06065],
                        [4.97777, 3.02458, 2.3, 0.06063],
                        [4.85051, 3.08159, 2.3, 0.06063],
                        [4.72465, 3.14171, 2.3, 0.06064],
                        [4.60022, 3.20493, 2.3, 0.06068],
                        [4.47719, 3.27112, 2.3, 0.06074],
                        [4.35549, 3.34005, 2.3, 0.06081],
                        [4.23502, 3.41139, 2.3, 0.06087],
                        [4.11568, 3.48475, 2.3, 0.06091],
                        [3.99733, 3.55968, 2.3, 0.0609],
                        [3.87982, 3.63569, 2.3, 0.06085],
                        [3.76284, 3.71231, 2.26772, 0.06166],
                        [3.64732, 3.78753, 1.9755, 0.06978],
                        [3.53132, 3.86145, 1.72036, 0.07996],
                        [3.41449, 3.93319, 1.72036, 0.07969],
                        [3.29649, 4.00174, 1.72036, 0.07933],
                        [3.17696, 4.06601, 1.72036, 0.07888],
                        [3.05548, 4.12441, 1.72036, 0.07835],
                        [2.93169, 4.17515, 1.72036, 0.07776],
                        [2.80549, 4.21581, 1.89765, 0.06987],
                        [2.67785, 4.24822, 1.96995, 0.06685],
                        [2.5493, 4.27301, 1.90274, 0.06881],
                        [2.42021, 4.29067, 1.81229, 0.07189],
                        [2.29093, 4.30153, 1.64511, 0.07886],
                        [2.16175, 4.30562, 1.50063, 0.08613],
                        [2.03303, 4.30283, 1.3435, 0.09583],
                        [1.90519, 4.29292, 1.18159, 0.10852],
                        [1.7788, 4.27535, 1.09334, 0.11671],
                        [1.65459, 4.24957, 1.09334, 0.11603],
                        [1.53376, 4.21418, 1.09334, 0.11516],
                        [1.41797, 4.16786, 1.09334, 0.11406],
                        [1.30974, 4.10893, 1.09334, 0.11271],
                        [1.21287, 4.03538, 1.09334, 0.11124],
                        [1.13093, 3.94692, 1.12668, 0.10702],
                        [1.06435, 3.84609, 1.24962, 0.09669],
                        [1.01121, 3.73603, 1.37533, 0.08887],
                        [0.96999, 3.61869, 1.50145, 0.08283],
                        [0.93956, 3.49541, 1.63892, 0.07748],
                        [0.91891, 3.36729, 1.78732, 0.07261],
                        [0.90708, 3.23527, 1.93172, 0.06862],
                        [0.90334, 3.10018, 2.10254, 0.06427],
                        [0.90681, 2.9629, 2.22701, 0.06166],
                        [0.91698, 2.82419, 2.3, 0.06047],
                        [0.93341, 2.68483, 2.3, 0.06101],
                        [0.95571, 2.54557, 2.3, 0.06132],
                        [0.98342, 2.40706, 2.28904, 0.06171],
                        [1.01626, 2.26986, 2.13693, 0.06602],
                        [1.05392, 2.13444, 1.94961, 0.0721],
                        [1.09624, 2.00121, 1.77206, 0.07888],
                        [1.14311, 1.87057, 1.59503, 0.08702],
                        [1.19482, 1.7431, 1.41812, 0.097],
                        [1.25158, 1.61938, 1.2416, 0.10963],
                        [1.31382, 1.50015, 1.2416, 0.10832],
                        [1.38221, 1.38643, 1.2416, 0.10688],
                        [1.45757, 1.27943, 1.2416, 0.10541],
                        [1.54096, 1.18072, 1.2416, 0.10408],
                        [1.63386, 1.09253, 1.2416, 0.10317],
                        [1.7384, 1.01844, 1.46264, 0.0876],
                        [1.85098, 0.955, 1.62135, 0.0797],
                        [1.97002, 0.90067, 1.75684, 0.07448],
                        [2.09459, 0.85453, 1.90108, 0.06987],
                        [2.2239, 0.81579, 2.05818, 0.06559],
                        [2.35729, 0.78373, 2.23036, 0.06151],
                        [2.49419, 0.75767, 2.3, 0.06059],
                        [2.63406, 0.73695, 2.3, 0.06148],
                        [2.77639, 0.72086, 2.3, 0.06228],
                        [2.92074, 0.70874, 2.3, 0.06298]]

        ################## INPUT PARAMETERS ###################

        # Read all input parameters
        all_wheels_on_track = params['all_wheels_on_track']
        x = params['x']
        y = params['y']
        distance_from_center = params['distance_from_center']
        is_left_of_center = params['is_left_of_center']
        heading = params['heading']
        progress = params['progress']
        steps = params['steps']
        speed = params['speed']
        steering_angle = params['steering_angle']
        track_width = params['track_width']
        waypoints = params['waypoints']
        closest_waypoints = params['closest_waypoints']
        is_offtrack = params['is_offtrack']

        ############### OPTIMAL X,Y,SPEED,TIME ################

        # Get closest indexes for racing line (and distances to all points on racing line)
        closest_index, second_closest_index = closest_2_racing_points_index(
            racing_track, [x, y])

        # Get optimal [x, y, speed, time] for closest and second closest index
        optimals = racing_track[closest_index]
        optimals_second = racing_track[second_closest_index]

        # Save first racingpoint of episode for later
        if self.verbose == True:
            self.first_racingpoint_index = 0 # this is just for testing purposes
        if steps == 1:
            self.first_racingpoint_index = closest_index

        ################ REWARD AND PUNISHMENT ################

        ## Define the default reward ##
        reward = 1

        ## Reward if car goes close to optimal racing line ##
        DISTANCE_MULTIPLE = 1
        dist = dist_to_racing_line(optimals[0:2], optimals_second[0:2], [x, y])
        distance_reward = max(1e-3, 1 - (dist/(track_width*0.5)))
        reward += distance_reward * DISTANCE_MULTIPLE

        ## Reward if speed is close to optimal speed ##
        SPEED_DIFF_NO_REWARD = 1
        SPEED_MULTIPLE = 2
        speed_diff = abs(optimals[2]-speed)
        if speed_diff <= SPEED_DIFF_NO_REWARD:
            # we use quadratic punishment (not linear) bc we're not as confident with the optimal speed
            # so, we do not punish small deviations from optimal speed
            speed_reward = (1 - (speed_diff/(SPEED_DIFF_NO_REWARD))**2)**2
        else:
            speed_reward = 0
        reward += speed_reward * SPEED_MULTIPLE

        # Reward if less steps
        REWARD_PER_STEP_FOR_FASTEST_TIME = 1 
        ## MANUAL INPUT###
        STANDARD_TIME = 11
        FASTEST_TIME = 9
        ###
        
        times_list = [row[3] for row in racing_track]
        try:
            projected_time = projected_time(self.first_racingpoint_index, closest_index, steps, times_list)
        except:
            steps_reward = 0
        
        try:
            steps_prediction = projected_time * 15 + 1
            reward_prediction = max(1e-3, (-REWARD_PER_STEP_FOR_FASTEST_TIME*(FASTEST_TIME) /
                                           (STANDARD_TIME-FASTEST_TIME))*(steps_prediction-(STANDARD_TIME*15+1)))
            steps_reward = min(REWARD_PER_STEP_FOR_FASTEST_TIME, reward_prediction / steps_prediction)
        except:
            steps_reward = 0
        reward += steps_reward

        # Zero reward if obviously wrong direction (e.g. spin)
        direction_diff = racing_direction_diff(
            optimals[0:2], optimals_second[0:2], [x, y], heading)
        if direction_diff > 30:
            reward = 1e-3
            
        # Zero reward of obviously too slow
        speed_diff_zero = optimals[2]-speed
        if speed_diff_zero > 0.5:
            reward = 1e-3
            
        ## Incentive for finishing the lap in less steps ##
        REWARD_FOR_FASTEST_TIME = 200 # should be adapted to track length and other rewards
        STANDARD_TIME = 11  # seconds (time that is easily done by model)
        FASTEST_TIME = 9  # seconds (best time of 1st place on the track)
        if progress == 100:
            finish_reward = max(1e-3, (-REWARD_FOR_FASTEST_TIME /
                      (15*(STANDARD_TIME-FASTEST_TIME)))*(steps-STANDARD_TIME*15))
        else:
            finish_reward = 0
        reward += finish_reward
        
        ## Zero reward if off track ##
        
        if all_wheels_on_track == False:
            reward = 1e-3

        ####################### VERBOSE #######################
        
        if self.verbose == True:
            print("Closest index: %i" % closest_index)
            print("Distance to racing line: %f" % dist)
            print("=== Distance reward (w/out multiple): %f ===" % (distance_reward))
            print("Optimal speed: %f" % optimals[2])
            print("Speed difference: %f" % speed_diff)
            print("=== Speed reward (w/out multiple): %f ===" % speed_reward)
            print("Direction difference: %f" % direction_diff)
            print("Predicted time: %f" % projected_time)
            print("=== Steps reward: %f ===" % steps_reward)
            #print("=== Finish reward: %f ===" % finish_reward)
            
        #################### RETURN REWARD ####################
        
        # Always return a float value
        return float(reward)


reward_object = Reward() # add parameter verbose=True to get noisy output for testing


def reward_function(params):
    return reward_object.reward_function(params)

In [10]:
reward_function(a)

3.988329860582705

In [9]:
### TEST###


a = {"all_wheels_on_track": True,
    "distance_from_center": 0,
    "track_width": 1,
    'waypoints': np.load("reinvent_base.npy")[:,0:2],
    'closest_waypoints': [0,0],
    'heading': 0,
    'is_left_of_center': True,
    'speed': 2.7,
    'x': 3.2,
    'y':0.7,
    'progress':30,
    'steps':10,
    'steering_angle':20,
    'is_offtrack':False}