### Reward Functions 
By default three sample reward functions are provided. For experiment sake I have tested all three of the default reward functions separately, then I decided to use the following reward function.    


In [None]:
import math
def reward_function(params):
    '''
    Example of rewarding the agent to stay inside the two borders of the track
    '''
    
    # Read input parameters
    all_wheels_on_track = params['all_wheels_on_track']
    distance_from_center = params['distance_from_center']
    track_width = params['track_width']
    
    steps = params['steps']
    progress = params['progress']
    track_length = params['track_length']

    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    
    abs_steering = abs(params['steering_angle']) # We don't care whether it is left or right steering

    # Give a very low reward by default
    reward = 1e-3

    # Give a high reward if no wheels go off the track and
    # the agent is somewhere in between the track borders
    if all_wheels_on_track and (0.5*track_width - distance_from_center) >= 0.05:
        reward = 5.0
        
    MAX_SPEED_PER_SECOND = 2
    STEPS_PER_SECOND = 15
    # Total num of steps we want the car to finish the lap, it will vary depends on the track length
    TOTAL_NUM_STEPS = int((track_length/MAX_SPEED_PER_SECOND)*STEPS_PER_SECOND)

    # Give additional reward if the car pass every 100 steps faster than expected
    if (steps % 100) == 0 and progress > (steps / TOTAL_NUM_STEPS) * 100 :
        reward += 4.0

    # Penalize if car steer too much to prevent zigzag
    ABS_STEERING_THRESHOLD = 20.0
    if abs_steering > ABS_STEERING_THRESHOLD:
        reward *= 0.8

    # Calculate the direction of the center line based on the closest waypoints
    next_point = waypoints[closest_waypoints[1]]
    prev_point = waypoints[closest_waypoints[0]]

    # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
    track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
    # Convert to degree
    track_direction = math.degrees(track_direction)

    # Calculate the difference between the track direction and the heading direction of the car
    direction_diff = abs(track_direction - heading)
    if direction_diff > 180:
        direction_diff = 360 - direction_diff

    # Penalize the reward if the difference is too large
    DIRECTION_THRESHOLD = 10.0
    if direction_diff > DIRECTION_THRESHOLD:
        reward *= 0.8
    
    # Always return a float value
    return float(reward)


The above reward function worked very well for the official track of the 2019 AWS DeepRacer Championship Cup finals. Finished more than 3 laps successfully and the best time during race was 12.419 sec. 

When I cloned and used the same model on the re:Invent 2018 which was the first Championship Cup track, I realized that it was overfitting and it could not complete a single lap successfully. 

As a remedy for that I aggregated the different rewards and re-wrote the reward function in the following way. 


In [None]:
import math
def waypoints_reward(params):
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    # set max reward
    reward = 1.0
    # Calculate the direction of the center line based on the closest waypoints
    next_point = waypoints[closest_waypoints[1]]
    prev_point = waypoints[closest_waypoints[0]]
    # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
    track_direction = math.atan2(
        next_point[1] - prev_point[1], next_point[0] - prev_point[0])
    # Convert to degree
    track_direction = math.degrees(track_direction)
    # Calculate the difference between the track direction and the heading direction of the car
    direction_diff = abs(track_direction - heading)
    if direction_diff > 180:
        direction_diff = 360 - direction_diff
    # Penalize the reward if the difference is too large
    DIRECTION_THRESHOLD = 10.0
    if direction_diff > DIRECTION_THRESHOLD:
        reward = 0.5
    return float(reward)


def steering_reward(params):
    # set max reward
    reward = 1.0
    abs_steering = abs(params['steering_angle'])
    # Penalize if car steer too much to prevent zigzag
    ABS_STEERING_THRESHOLD = 20.0
    if abs_steering > ABS_STEERING_THRESHOLD:
        reward = 0.8
    return float(reward)


def reward_function(params):
    # Read input parameters
    track_width = params['track_width']
    distance_from_center = params['distance_from_center']
    all_wheels_on_track = params['all_wheels_on_track']
	# is_reversed = params['is_reversed']

    # compute reward for distance from center
    r_center = 1 - (distance_from_center / (0.5 * track_width))
    if r_center < 0.002:  # or is_reversed:
        return 1e-3  # likely crashed

    # compute waypoint reward
    r_waypoints = waypoints_reward(params)

    # compute speed reward
    r_speed = min(1.0, (params['speed'] / 3.5))

    # compute steering reward
    r_steering = steering_reward(params)

    # compute all wheels on track reward
    r_wheel = 1 if all_wheels_on_track else 0

    # aggregate various rewards by weight 
    final_reward = (
        (r_center*0.25) + (r_speed*0.40) +
        (r_waypoints*0.1) + (r_steering*0.15) + (r_wheel*0.15)
    )

    return float(final_reward)

After this change the model trained well and was able to complete the evaluation laps successfully on re:Invent 2018 the first Championship Cup track and the best time was around 16 seconds.