# Basic model

In [None]:
def reward_function(params):
    # Example of penalize steering, which helps mitigate zig-zag behaviors

    # Read input parameters
    distance_from_center = params['distance_from_center']
    track_width = params['track_width']
    abs_steering = abs(params['steering_angle']) # Only need the absolute steering angle

    # Calculate 3 marks that are farther and father away from the center line
    marker_1 = 0.1 * track_width
    marker_2 = 0.25 * track_width
    marker_3 = 0.5 * track_width

    # Give higher reward if the car is closer to center line and vice versa
    if distance_from_center <= marker_1:
        reward = 1.0
    elif distance_from_center <= marker_2:
        reward = 0.5
    elif distance_from_center <= marker_3:
        reward = 0.1
    else:
        reward = 1e-3  # likely crashed/ close to off track

    # Steering penality threshold, change the number based on your action space setting
    ABS_STEERING_THRESHOLD = 15 
    
    # Penalize reward if the car is steering too much
    if abs_steering > ABS_STEERING_THRESHOLD:
        reward *= 0.8
        
    return float(reward)


# Basic model plus new parameters (Time = 23.259)

In [None]:
def reward_function(params):
    # Read input parameters
    distance_from_center = params['distance_from_center']
    track_width = params['track_width']
    abs_steering = abs(params['steering_angle'])
    speed = params['speed']
    heading = params['heading']
    is_offtrack = params['is_offtrack']
    all_wheels_on_track = params['all_wheels_on_track']
    is_reversed = params['is_reversed']
    
    # Initialize reward
    reward = 1.0

    # Penalize if the car is off track
    if is_offtrack:
        return 1e-3
    
    # Reward for keeping all wheels on track
    if all_wheels_on_track:
        reward += 1.0
    
    # Calculate 3 markers that are at varying distances from the center line
    marker_1 = 0.1 * track_width
    marker_2 = 0.25 * track_width
    marker_3 = 0.5 * track_width

    # Reward based on distance from center line
    if distance_from_center <= marker_1:
        reward += 1.0
    elif distance_from_center <= marker_2:
        reward += 0.5
    elif distance_from_center <= marker_3:
        reward += 0.1
    else:
        reward += 1e-3  # likely crashed or close to off track

    # Reward for speed
    MAX_SPEED = 4.0  # Define the maximum speed
    reward += (speed / MAX_SPEED) ** 2  # Square the speed ratio to reward higher speeds more
    
    # Penalize excessive steering to prevent zig-zag behavior
    ABS_STEERING_THRESHOLD = 15  # Define a threshold for steering angle
    if abs_steering > ABS_STEERING_THRESHOLD:
        reward *= 0.8
    
    # Reward for heading alignment with driving direction
    # Since we need to drive counterclockwise, penalize large deviations from the desired heading
    if not is_reversed:
        desired_heading = (heading + 360) % 360  # Normalize heading to range [0, 360)
    else:
        desired_heading = (heading + 180) % 360  # Reverse the heading for clockwise driving

    # Penalize for large deviations from the desired heading
    direction_diff = min(abs(heading - desired_heading), 360 - abs(heading - desired_heading))
    DIRECTION_THRESHOLD = 10.0  # Allowable deviation in degrees
    if direction_diff < DIRECTION_THRESHOLD:
        reward += 0.5
    else:
        reward *= 0.5
    
    # Additional reward for maintaining high speed on straight lines
    # Assume straight lines are identified by low absolute steering angle
    if abs_steering < 5:
        reward += (speed / MAX_SPEED) ** 2

    return float(reward)



# Increased Speed (Time = 20.196)

In [None]:
def reward_function(params):
    # Read input parameters
    distance_from_center = params['distance_from_center']
    track_width = params['track_width']
    abs_steering = abs(params['steering_angle'])
    speed = params['speed']
    heading = params['heading']
    is_offtrack = params['is_offtrack']
    all_wheels_on_track = params['all_wheels_on_track']
    is_reversed = params['is_reversed']
    
    # Initialize reward
    reward = 1.0

    # Penalize if the car is off track
    if is_offtrack:
        return 1e-3
    
    # Reward for keeping all wheels on track
    if all_wheels_on_track:
        reward += 1.0
    
    # Calculate 3 markers that are at varying distances from the center line
    marker_1 = 0.1 * track_width
    marker_2 = 0.25 * track_width
    marker_3 = 0.5 * track_width

    # Reward based on distance from center line
    if distance_from_center <= marker_1:
        reward += 1.0
    elif distance_from_center <= marker_2:
        reward += 0.5
    elif distance_from_center <= marker_3:
        reward += 0.1
    else:
        reward += 1e-3  # likely crashed or close to off track

    # Speed reward with a higher weight
    MAX_SPEED = 4.0  # Define the maximum speed
    speed_reward = (speed / MAX_SPEED) ** 2
    reward += 2 * speed_reward  # Double the reward for maintaining high speed
    
    # Penalize excessive steering to prevent zig-zag behavior
    ABS_STEERING_THRESHOLD = 30  # Define a threshold for steering angle
    if abs_steering > ABS_STEERING_THRESHOLD:
        reward *= 0.9  # Slightly reduce the penalty

    # Reward for heading alignment with driving direction
    if not is_reversed:
        desired_heading = (heading + 360) % 360  # Normalize heading to range [0, 360)
    else:
        desired_heading = (heading + 180) % 360  # Reverse the heading for clockwise driving

    # Penalize for large deviations from the desired heading
    direction_diff = min(abs(heading - desired_heading), 360 - abs(heading - desired_heading))
    DIRECTION_THRESHOLD = 15  # Allowable deviation in degrees increased to 15 for higher speed allowance
    if direction_diff < DIRECTION_THRESHOLD:
        reward += 0.5
    else:
        reward *= 0.8  # Reduce penalty for misalignment slightly
    
    # Additional reward for maintaining high speed on straight lines
    # Assume straight lines are identified by low absolute steering angle
    if abs_steering < 5:
        reward += 2 * (speed / MAX_SPEED) ** 2  # Increase reward for maintaining speed on straight paths

    return float(reward)


# Keep always on left (Time = 20.066)

In [None]:
def reward_function(params):
    # Read input parameters
    distance_from_center = params['distance_from_center']
    track_width = params['track_width']
    abs_steering = abs(params['steering_angle'])
    speed = params['speed']
    heading = params['heading']
    is_offtrack = params['is_offtrack']
    all_wheels_on_track = params['all_wheels_on_track']
    is_reversed = params['is_reversed']
    is_left = params['is_left_of_center']
    
    # Initialize reward
    reward = 1.0

    # Penalize if the car is off track
    if is_offtrack:
        return 1e-3
    
    # Reward for keeping all wheels on track and for keeping the car on the left side of the track
    if all_wheels_on_track and is_left and (0.5 * track_width - distance_from_center) >= 0.05:
        reward += 1.0

    # Calculate 3 markers that are at varying distances from the center line
    marker_1 = 0.1 * track_width
    marker_2 = 0.25 * track_width
    marker_3 = 0.5 * track_width

    # Reward based on distance from center line
    if distance_from_center <= marker_1:
        reward += 1.0
    elif distance_from_center <= marker_2:
        reward += 0.5
    elif distance_from_center <= marker_3:
        reward += 0.1
    else:
        reward += 1e-3  # likely crashed or close to off track

    # Speed reward with a higher weight based on steering angle
    MAX_SPEED = 1.0  # Define the maximum speed
    speed_penalty = 1.0  # Initialize speed penalty

    if abs_steering < 15:
        speed_penalty = 1.0
    elif abs_steering < 30:
        speed_penalty = 0.67
    else:
        speed_penalty = 0.33

    speed_reward = (speed / MAX_SPEED) * speed_penalty
    reward += 2 * speed_reward  # Double the reward for maintaining high speed

    # Penalize excessive steering to prevent zig-zag behavior
    ABS_STEERING_THRESHOLD = 30  # Define a threshold for steering angle
    
    if abs_steering > ABS_STEERING_THRESHOLD:
        reward *= 0.9  # Slightly reduce the penalty

    # Reward for heading alignment with driving direction
    if not is_reversed:
        desired_heading = (heading + 360) % 360  # Normalize heading to range [0, 360)
    else:
        desired_heading = (heading + 180) % 360  # Reverse the heading for clockwise driving

    # Penalize for large deviations from the desired heading
    direction_diff = min(abs(heading - desired_heading), 360 - abs(heading - desired_heading))
    DIRECTION_THRESHOLD = 15.0  # Allowable deviation in degrees increased to 15 for higher speed allowance
    if direction_diff < DIRECTION_THRESHOLD:
        reward += 0.8
    else:
        reward *= 0.8  # Reduce penalty for misalignment slightly

    # Additional reward for maintaining high speed on straight lines
    # Assume straight lines are identified by low absolute steering angle
    if abs_steering < 5:
        reward += 2 * (speed / MAX_SPEED) ** 2  # Increase reward for maintaining speed on straight paths

    return float(reward)



# Increase speed on straight paths, adjust turn handling and include progress, steps and track length

In [None]:
def reward_function(params):
    # Read input parameters
    distance_from_center = params['distance_from_center']
    track_width = params['track_width']
    abs_steering = abs(params['steering_angle'])
    speed = params['speed']
    heading = params['heading']
    is_offtrack = params['is_offtrack']
    all_wheels_on_track = params['all_wheels_on_track']
    is_reversed = params['is_reversed']
    is_left = params['is_left_of_center']
    progress = params['progress']
    steps = params['steps']
    track_length = params['track_length']
    
    
    # Initialize reward
    reward = 1.0

    # Penalize if the car is off track
    if is_offtrack:
        reward *= 1e-3
    
    # Reward for keeping all wheels on track and for keeping the car on the left side of the track
    if all_wheels_on_track and is_left:
        reward += 1.0


    # Calculate 3 markers that are at varying distances from the center line
    marker_1 = 0.1 * track_width
    marker_2 = 0.25 * track_width
    marker_3 = 0.5 * track_width

    # Reward based on distance from center line
    if distance_from_center <= marker_1:
        reward += 1.0
    elif distance_from_center <= marker_2:
        reward += 0.5
    elif distance_from_center <= marker_3:
        reward += 0.1
    else:
        reward += 1e-3  # likely crashed or close to off track


	# Reward based on speed with a non-linear function
    speed_reward = speed ** 2  # Square the speed to give higher speeds more weight
    reward += speed_reward

    ABS_STEERING_THRESHOLD = 30  # Define a lower threshold for steering angle

    if abs_steering < 15:
        steering_penalty = 1.0
    elif abs_steering < 30:
        steering_penalty = 0.75
    else:
        steering_penalty = 0.5

    reward *= steering_penalty  # Apply penalty multiplicatively

    if abs_steering < 3:
        straight_speed_reward = speed ** 2  # Square the speed to give higher speeds more weight
        reward += straight_speed_reward * 2  # Double the reward on straight paths

    if not is_reversed:
        desired_heading = (heading + 360) % 360  # Normalize heading to range [0, 360)
    else:
        desired_heading = (heading + 180) % 360  # Reverse the heading for clockwise driving

    direction_diff = min(abs(heading - desired_heading), 360 - abs(heading - desired_heading))
    DIRECTION_THRESHOLD = 10.0  # Allowable deviation in degrees

    if direction_diff < DIRECTION_THRESHOLD:
        reward += 1.0
    else:
        reward *= 0.9  # Reduce reward for misalignment

    # Reward for progress along the track
    progress_reward = (progress / 100) * 2.0  # Reward proportional to progress
    reward += progress_reward

    # Reward for efficiency (fewer steps to complete the lap)
    if progress > 0:
        efficiency_reward = (progress / steps) * track_length
        reward += efficiency_reward

    return float(reward)

# Final Model

In [None]:
def reward_function(params):
    # Read input parameters
    distance_from_center = params['distance_from_center']
    track_width = params['track_width']
    abs_steering = abs(params['steering_angle'])
    speed = params['speed']
    is_offtrack = params['is_offtrack']
    all_wheels_on_track = params['all_wheels_on_track']
    is_left = params['is_left_of_center']
    
    # Initialize reward
    reward = 1.0

    # Penalize if the car is off track
    if is_offtrack:
        return 1e-3
    
    # Reward for keeping all wheels on track
    if all_wheels_on_track:
        reward += 1.5
    
    # Calculate 3 markers that are at varying distances from the center line
    marker_1 = 0.1 * track_width
    marker_2 = 0.25 * track_width
    marker_3 = 0.5 * track_width

    # Reward based on distance from center line and being on the left side
    if distance_from_center <= marker_1 and is_left:
        reward += 1.0
    elif distance_from_center <= marker_2 and is_left:
        reward += 0.5
    elif distance_from_center <= marker_3 and is_left:
        reward += 0.1
    else:
        reward += 1e-3  # likely crashed or close to off track

    # Speed reward with a higher weight for straight paths
    MAX_SPEED = 4.0  # Define the maximum speed
    if abs_steering < 5:  # Considered straight path if steering is minimal
        reward += (speed / MAX_SPEED) ** 2 * 4  # Quadruple the reward for high speed on straight paths
    else:  # Encourage controlled speed on turns
        reward += (speed / MAX_SPEED) ** 2

    # Penalize excessive steering to prevent zig-zag behavior
    ABS_STEERING_THRESHOLD = 20  # Define a threshold for steering angle
    if abs_steering > ABS_STEERING_THRESHOLD:
        reward *= 0.8  # Slightly reduce the reward

    # Additional reward for staying on the left side during turns
    if is_left:
        reward += 0.8  # Increase reward if the car is left of the center line

    return float(reward)

