# 20241114 function

In [1]:
import math

# static
# Must update if the env changes.
TOP_SPEED = 4.0  
STEERING_ANGLE_THRESHOLD = 10
IS_CLOCKWISE = False
INTERVAL_TO_TARGET_WAY_POINT = 3
ESTINATED_TOTAL_NUM_STEPS = 300


def get_direction_in_degree(next_point, prev_point):
	'''
	The speed in the direction of the waypoints.
	'''
	# Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
	track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
	# Convert to degree (-180, 180)
	return math.degrees(track_direction)
	
def get_speed_in_direction(target_direction, heading, speed):
	direction_diff = abs(target_direction - heading)
	if direction_diff > 180:
		direction_diff = 360 - direction_diff
	speed_bonus = math.cos(math.radians(direction_diff)) * speed / TOP_SPEED
	return speed_bonus
	

def reward_function(params):
	'''
	1. Don't run off the track.
	2. Base reward when keeping all wheels on track and not heading towards the opposite way.
	3. Encourage the agent to have a high speed towards a far side way point.
	4. Give additional reward if the car pass every 100 steps faster than expected.
	'''
	
	# Read input variables
	x = params['x']
	y = params['y']
	waypoints = params['waypoints']
	closest_waypoints = params['closest_waypoints']
	heading = params['heading']
	speed = params['speed']
	is_offtrack = params['is_offtrack']
	all_wheels_on_track = params['all_wheels_on_track']
	is_reversed = params['is_reversed'] # if the agent is driving on clock-wise (True) or counter clock-wise (False).
	steps = params['steps']
	progress = params['progress']
	
	reward = 0.0
	# 1. Don't terminate off track.
	if is_offtrack:
		print('Vehicle is off track!')
		return -10.0
		
	# 2. Base reward when keeping all wheels on track and not heading towards the opposite way.
	if all_wheels_on_track and ((IS_CLOCKWISE and is_reversed) or (not IS_CLOCKWISE and not is_reversed)):
		reward += 0.1
	else:
		if not all_wheels_on_track:
			print('The vehicle is having a wheel off track.')
			
		if (IS_CLOCKWISE and not is_reversed) or (not IS_CLOCKWISE and is_reversed):
			print('The vehicle is heading backword.')
		
	# 3. Encourage the agent to have a high speed towards a far side way point. 
	target_way_point_idx = (closest_waypoints[1] + INTERVAL_TO_TARGET_WAY_POINT) % len(waypoints)
	target_way_point = waypoints[target_way_point_idx]
	
	target_direction = get_direction_in_degree(target_way_point, [x, y])
	speed_bonus = get_speed_in_direction(target_direction, heading, speed)
	print('The speed bonus is %f.' % speed_bonus)
	reward += speed_bonus
	
	# 4. Give additional reward if the car pass every 100 steps faster than expected.
	if (steps % 100) == 0 and progress > (steps / ESTINATED_TOTAL_NUM_STEPS) * 100 :
		reward += 10.0
		print('Progress checkpoint reward acquired. Progress: %f, Step: %f' % (progress, steps))
	
	return float(reward)

# 20241116 function

In [2]:
import math

# static
# Must update if the env changes.
TOP_SPEED = 4.0
IS_CLOCKWISE = False
INTERVAL_TO_TARGET_WAY_POINT = 3
ESTINATED_TOTAL_NUM_STEPS = 160 # Usually it makes a progress of 54-67 in 100 steps. 


def get_direction_in_degree(next_point, prev_point):
	'''
	The speed in the direction of the waypoints.
	'''
	# Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
	track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
	# Convert to degree (-180, 180)
	return math.degrees(track_direction)
	
def get_speed_in_direction(target_direction, heading, speed):
	direction_diff = abs(target_direction - heading)
	if direction_diff > 180:
		direction_diff = 360 - direction_diff
	speed_bonus = math.cos(math.radians(direction_diff)) * speed / TOP_SPEED
	return speed_bonus
	

def reward_function(params):
	'''
	1. Don't run off the track.
	2. Base reward when keeping all wheels on track and not heading towards the opposite way.
	3. Encourage the agent to have a high speed towards a far side way point.
	4. Give additional reward if the car pass every 20 steps faster than expected. The total 
	'''
	
	# Read input variables
	x = params['x']
	y = params['y']
	waypoints = params['waypoints']
	closest_waypoints = params['closest_waypoints']
	heading = params['heading']
	speed = params['speed']
	is_offtrack = params['is_offtrack']
	all_wheels_on_track = params['all_wheels_on_track']
	is_reversed = params['is_reversed'] # if the agent is driving on clock-wise (True) or counter clock-wise (False).
	steps = params['steps']
	progress = params['progress']
	
	reward = 0.0
	# 1. Don't terminate off track.
	if is_offtrack:
		print('Vehicle is off track!')
		return -10.0
		
	# 2. Base reward when keeping all wheels on track and not heading towards the opposite way.
	if all_wheels_on_track and ((IS_CLOCKWISE and is_reversed) or (not IS_CLOCKWISE and not is_reversed)):
		reward += 0.1
	else:
		if not all_wheels_on_track:
			print('The vehicle is having a wheel off track.')
			
		if (IS_CLOCKWISE and not is_reversed) or (not IS_CLOCKWISE and is_reversed):
			print('The vehicle is heading backword.')
		
	# 3. Encourage the agent to have a high speed towards a far side way point. 
	target_way_point_idx = (closest_waypoints[1] + INTERVAL_TO_TARGET_WAY_POINT) % len(waypoints)
	target_way_point = waypoints[target_way_point_idx]
	
	target_direction = get_direction_in_degree(target_way_point, [x, y])
	speed_bonus = get_speed_in_direction(target_direction, heading, speed)
	print('The speed bonus is %f.' % speed_bonus)
	reward += speed_bonus
	
	# 4. Give additional reward if the car pass every 20 steps faster than expected.
	if (steps % 20) == 0 and progress > (steps / ESTINATED_TOTAL_NUM_STEPS) * 100 :
		reward += 10.0
		print('Progress checkpoint reward acquired. Progress: %f, Step: %f' % (progress, steps))
	
	return float(reward)

# 20241117

In [1]:
import math

# static
# Must update if the env changes.
TOP_SPEED = 3.0
IS_CLOCKWISE = False
INTERVAL_TO_TARGET_WAY_POINT = 8
STEP_VALUE = 0.1
previous_progress = 0

def get_direction_in_degree(next_point, prev_point):
	'''
	The speed in the direction of the waypoints.
	'''
	# Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
	track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
	# Convert to degree (-180, 180)
	return math.degrees(track_direction)
	
def get_speed_in_direction(target_direction, heading, speed):
	direction_diff = abs(target_direction - heading)
	if direction_diff > 180:
		direction_diff = 360 - direction_diff
	speed_bonus = math.cos(math.radians(direction_diff)) * speed / TOP_SPEED
	return speed_bonus
	
def reward_function(params):
	'''
	1. Don't run off the track.
	2. Encourage the agent to have a high speed towards a far side way point.
	3. Give as much rewards as the progress made.
	4. Every step values.
	'''
	
	# Read input variables
	x = params['x']
	y = params['y']
	waypoints = params['waypoints']
	closest_waypoints = params['closest_waypoints']
	heading = params['heading']
	speed = params['speed']
	is_offtrack = params['is_offtrack']
	steps = params['steps']
	progress = params['progress']
	
	reward = 0.0
	# 1. Don't terminate off track.
	if is_offtrack:
		print('Vehicle is off track!')
		print(waypoints)
		return -20.0
		
	# 2. Encourage the agent to have a high speed towards a far side way point. 
	target_way_point_idx = (closest_waypoints[1] + INTERVAL_TO_TARGET_WAY_POINT) % len(waypoints)
	target_way_point = waypoints[target_way_point_idx]
	
	target_direction = get_direction_in_degree(target_way_point, [x, y])
	speed_bonus = get_speed_in_direction(target_direction, heading, speed)
	print('The speed bonus is %f.' % speed_bonus)
	reward += speed_bonus
	
	# 3. Give as much rewards as the progress made.
	global previous_progress
	progress_bonus = progress - previous_progress
	if progress_bonus > 0:
		reward += progress_bonus
		print('Previous progress: %f.2, current progress: %f.2, earned score: %f.2' % (previous_progress, progress, progress_bonus))
	previous_progress = progress
	
	# 4. Every step values.
	reward -= STEP_VALUE
	
	return float(reward)

# 20241117 revise

In [17]:
import math

# static
# Must update if the env changes.
TOP_SPEED = 3.0
IS_CLOCKWISE = False
INTERVAL_TO_TARGET_WAY_POINT = 8
STEP_VALUE = 0.1
previous_progress = 0

def get_waypoint_idx(total_length, current_idx, step):
    return (current_idx + total_length + step) % total_length

def get_direction_in_degree(next_point, prev_point):
    '''
    Calculate the direction relative to the <0 direction of x-axis.
    Left of it is +, right of it is -.    
    '''
    # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
    track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
    # Convert to degree (-180, 180)
    return math.degrees(track_direction)
    
def get_speed_in_direction(target_direction, heading, speed):
    '''
    The speed in the direction of the waypoints.
    '''
    direction_diff = abs(target_direction - heading)
    if direction_diff > 180:
        direction_diff = 360 - direction_diff
    speed_bonus = math.cos(math.radians(direction_diff)) * speed / TOP_SPEED
    return speed_bonus

def is_turning(waypoints, closest_waypoints):
    '''
    If turning left within INTERVAL_TO_TARGET_WAY_POINT, return 1.
    If turning right within INTERVAL_TO_TARGET_WAY_POINT, return 2.
    Otherwise, return 0.
    '''
    previous_turning = 3
    previous_degree = get_direction_in_degree(
        waypoints[get_waypoint_idx(len(waypoints), closest_waypoints[1], -INTERVAL_TO_TARGET_WAY_POINT)],
        waypoints[get_waypoint_idx(len(waypoints), closest_waypoints[0], -INTERVAL_TO_TARGET_WAY_POINT)])
    for i in range(-INTERVAL_TO_TARGET_WAY_POINT, 0):
        current_degree = get_direction_in_degree(waypoints[get_waypoint_idx(len(waypoints), closest_waypoints[1], i + 1)], waypoints[get_waypoint_idx(len(waypoints), closest_waypoints[0], i + 1)])
        if current_degree - previous_degree > 0.3:
            current_turning = 1
        elif current_degree - previous_degree < -0.3:
            current_turning = 2
        else:
            current_turning = 0
        if previous_turning == 3:
            previous_turning = current_turning
        elif current_turning != previous_turning:
            return 0
        else:
            previous_turning = current_turning
        previous_degree = current_degree

    return previous_turning
        
    

    
def reward_function(params):
    '''
    1. Don't run off the track.
    2. Encourage the agent to have a high speed towards a far side way point.
    3. Give as much rewards as the progress made.
    4. Every step costs.
    5. It is better on the left when turning left. Visa versa.
    '''
    
    # Read input variables
    x = params['x']
    y = params['y']
    waypoints = params['waypoints']
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    speed = params['speed']
    is_offtrack = params['is_offtrack']
    steps = params['steps']
    progress = params['progress']
    distance_from_center = params['distance_from_center']
    track_width = params['track_width']
    is_left_of_center = params['is_left_of_center']
    
    reward = 0.0
    # 1. Don't terminate off track.
    if is_offtrack:
        print('Vehicle is off track!')
        return -20.0
        
    # 2. Encourage the agent to have a high speed towards a far side way point. 
    target_way_point_idx = get_waypoint_idx(len(waypoints), closest_waypoints[1], INTERVAL_TO_TARGET_WAY_POINT)
    target_way_point = waypoints[target_way_point_idx]
    print('Target way point is %d' % target_way_point_idx)
    
    target_direction = get_direction_in_degree(target_way_point, [x, y])
    speed_bonus = get_speed_in_direction(target_direction, heading, speed)
    print('The speed bonus is %f.' % speed_bonus)
    reward += speed_bonus
    
    # 3. Give as much rewards as the progress made.
    global previous_progress
    progress_bonus = progress - previous_progress
    if progress_bonus > 0:
        reward += progress_bonus
        print('Previous progress: %f.2, current progress: %f.2, earned score: %f.2' % (previous_progress, progress, progress_bonus))
    previous_progress = progress
    
    # 4. Every step costs.
    reward -= STEP_VALUE
    
    # 5. It is better on the left when turning left. Visa versa.
    shortcut_reward = 0
    way_turning = is_turning(waypoints, closest_waypoints)
    if (way_turning == 1 and is_left_of_center) or (way_turning == 2 and not is_left_of_center):
        shortcut_reward = 1 - (track_width / 2 - distance_from_center) / (track_width / 2)
        
    reward += shortcut_reward
    
    return float(reward)

# 20241120

In [None]:
'''
20241120
Falk Tandetzky + yang0369
https://github.com/falktan/deepracer
https://github.com/yang0369/AWS_DeepRacer
'''

import math

OPTIMIZED_WAYPOINTS = [
        [3.06664, 0.69989],
        [3.21372, 0.69357],
        [3.36169, 0.6893],
        [3.51032, 0.68657],
        [3.65944, 0.68518],
        [3.80869, 0.68499],
        [3.9577, 0.68593],
        [4.10629, 0.688],
        [4.25437, 0.69122],
        [4.40189, 0.69562],
        [4.54878, 0.70129],
        [4.69495, 0.7083],
        [4.84035, 0.71677],
        [4.9849, 0.7268],
        [5.12852, 0.73849],
        [5.27111, 0.75197],
        [5.41256, 0.76741],
        [5.55265, 0.78511],
        [5.69115, 0.80542],
        [5.82783, 0.82863],
        [5.96225, 0.85532],
        [6.09384, 0.88621],
        [6.22194, 0.92207],
        [6.34568, 0.96381],
        [6.46387, 1.01256],
        [6.57482, 1.06969],
        [6.67653, 1.13638],
        [6.76588, 1.21406],
        [6.83839, 1.3035],
        [6.8965, 1.40041],
        [6.94112, 1.50274],
        [6.96947, 1.60974],
        [6.97707, 1.71948],
        [6.96702, 1.82873],
        [6.94149, 1.93565],
        [6.90175, 2.03894],
        [6.84699, 2.13674],
        [6.77532, 2.22592],
        [6.69013, 2.30621],
        [6.59411, 2.37815],
        [6.48935, 2.44258],
        [6.37761, 2.50053],
        [6.26056, 2.55329],
        [6.13955, 2.60203],
        [6.01585, 2.648],
        [5.89082, 2.69257],
        [5.76067, 2.73919],
        [5.63058, 2.78629],
        [5.5006, 2.83412],
        [5.37081, 2.88295],
        [5.2413, 2.93305],
        [5.11223, 2.98473],
        [4.9838, 3.03838],
        [4.85635, 3.09451],
        [4.73023, 3.15374],
        [4.60596, 3.21695],
        [4.48296, 3.2828],
        [4.36104, 3.35081],
        [4.24006, 3.42061],
        [4.11988, 3.49191],
        [4.00046, 3.56448],
        [3.88179, 3.63809],
        [3.76397, 3.71247],
        [3.64724, 3.7873],
        [3.53105, 3.86073],
        [3.41419, 3.93239],
        [3.29624, 4.00105],
        [3.17677, 4.06545],
        [3.0554, 4.12417],
        [2.93169, 4.17515],
        [2.80549, 4.21581],
        [2.67785, 4.24822],
        [2.5493, 4.27301],
        [2.42021, 4.29067],
        [2.29093, 4.30153],
        [2.16175, 4.30562],
        [2.03303, 4.30283],
        [1.90519, 4.29292],
        [1.7788, 4.27535],
        [1.65459, 4.24957],
        [1.53376, 4.21418],
        [1.41797, 4.16786],
        [1.30974, 4.10893],
        [1.21287, 4.03538],
        [1.13093, 3.94692],
        [1.06435, 3.84609],
        [1.01121, 3.73603],
        [0.96999, 3.61869],
        [0.93956, 3.49541],
        [0.91891, 3.36729],
        [0.90708, 3.23527],
        [0.90334, 3.10018],
        [0.90681, 2.9629],
        [0.91698, 2.82419],
        [0.93341, 2.68483],
        [0.95571, 2.54557],
        [0.98342, 2.40706],
        [1.01626, 2.26986],
        [1.05392, 2.13444],
        [1.09624, 2.00121],
        [1.14311, 1.87057],
        [1.19482, 1.7431],
        [1.25158, 1.61938],
        [1.31382, 1.50015],
        [1.38221, 1.38643],
        [1.45757, 1.27943],
        [1.54096, 1.18072],
        [1.63386, 1.09253],
        [1.7384, 1.01844],
        [1.85098, 0.955],
        [1.97002, 0.90067],
        [2.09459, 0.85453],
        [2.2239, 0.81579],
        [2.35729, 0.78373],
        [2.49419, 0.75767],
        [2.63406, 0.73695],
        [2.77639, 0.72086],
        [2.92074, 0.70874],
        [3.06664, 0.69989]
    ]

def dist(point1, point2):
    return ((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2) ** 0.5


# thanks to https://stackoverflow.com/questions/20924085/python-conversion-between-coordinates
def rect(r, theta):
    """
    theta in degrees

    returns tuple; (float, float); (x,y)
    """

    x = r * math.cos(math.radians(theta))
    y = r * math.sin(math.radians(theta))
    return x, y


# thanks to https://stackoverflow.com/questions/20924085/python-conversion-between-coordinates
def polar(x, y):
    """
    returns r, theta(degrees)
    """

    r = (x ** 2 + y ** 2) ** .5
    theta = math.degrees(math.atan2(y,x))
    return r, theta


def angle_mod_360(angle):
    """
    Maps an angle to the interval -180, +180.

    Examples:
    angle_mod_360(362) == 2
    angle_mod_360(270) == -90

    :param angle: angle in degree
    :return: angle in degree. Between -180 and +180
    """

    n = math.floor(angle/360.0)

    angle_between_0_and_360 = angle - n*360.0

    if angle_between_0_and_360 <= 180.0:
        return angle_between_0_and_360
    else:
        return angle_between_0_and_360 - 360


def get_waypoints_ordered_in_driving_direction(params):
    # waypoints are always provided in counter clock wise order
    if params['is_reversed']: # driving clock wise.
        # return list(reversed(params['waypoints']))
        return list(reversed(OPTIMIZED_WAYPOINTS))
    else: # driving counter clock wise.
        # return params['waypoints']
        return OPTIMIZED_WAYPOINTS


def up_sample(waypoints, factor):
    """
    Adds extra waypoints in between provided waypoints

    :param waypoints:
    :param factor: integer. E.g. 3 means that the resulting list has 3 times as many points.
    :return:
    """
    p = waypoints
    n = len(p)

    return [[i / factor * p[(j+1) % n][0] + (1 - i / factor) * p[j][0],
             i / factor * p[(j+1) % n][1] + (1 - i / factor) * p[j][1]] for j in range(n) for i in range(factor)]


def get_target_point(params):
    waypoints = up_sample(get_waypoints_ordered_in_driving_direction(params), 20)

    car = [params['x'], params['y']]

    distances = [dist(p, car) for p in waypoints]
    min_dist = min(distances)
    i_closest = distances.index(min_dist)

    n = len(waypoints)

    waypoints_starting_with_closest = [waypoints[(i+i_closest) % n] for i in range(n)]

    r = params['track_width'] * 0.9

    is_inside = [dist(p, car) < r for p in waypoints_starting_with_closest]
    i_first_outside = is_inside.index(False)

    if i_first_outside < 0:  # this can only happen if we choose r as big as the entire track
        return waypoints[i_closest]

    return waypoints_starting_with_closest[i_first_outside]


def get_target_steering_degree(params):
    tx, ty = get_target_point(params)
    car_x = params['x']
    car_y = params['y']
    dx = tx-car_x
    dy = ty-car_y
    heading = params['heading']
    print('x: %f, y: %f, tx: %f, ty: %f' % (car_x, car_y, tx, ty))

    _, target_angle = polar(dx, dy)

    steering_angle = target_angle - heading

    return angle_mod_360(steering_angle)


def score_steer_to_point_ahead(params):
    best_stearing_angle = get_target_steering_degree(params)
    steering_angle = params['steering_angle']

    error = (steering_angle - best_stearing_angle) / 60.0  # 60 degree is already really bad

    score = 1.0 - abs(error)

    return max(score, 0.01)  # optimizer is rumored to struggle with negative numbers and numbers too close to zero


def reward_function(params):
    reward = score_steer_to_point_ahead(params)
    print('reward: %f' % reward)
    return float(reward)

# 20241121

'''json
[
	{
		"steering_angle": 30.0,
		"speed": 1.4,
		"index": 11
	},
	{
		"steering_angle": 19.2,
		"speed": 1.5,
		"index": 4
	},
	{
		"steering_angle": 6.9,
		"speed": 1.6,
		"index": 1
	},
	{
		"steering_angle": -5.7,
		"speed": 1.7,
		"index": 7
	},
	{
		"steering_angle": 8.1,
		"speed": 2.3,
		"index": 3
	},
	{
		"steering_angle": -6.1,
		"speed": 2.4,
		"index": 0
	},
	{
		"steering_angle": -0.3,
		"speed": 2.8,
		"index": 5
	},
	{
		"steering_angle": 14.4,
		"speed": 2.8,
		"index": 10
	},
	{
		"steering_angle": 0.3,
		"speed": 3.3,
		"index": 6
	},
	{
		"steering_angle": 12.8,
		"speed": 3.6,
		"index": 9
	},
	{
		"steering_angle": -10.8,
		"speed": 3.7,
		"index": 8
	},
	{
		"steering_angle": 0.5,
		"speed": 3.9,
		"index": 2
	}
]
'''

Action space:
|index|angle|speed|
|--|--|--|
|0|-6.1|2.4|
|1|6.9|1.6|
|2|0.5|3.9|
|3|8.1|2.3|
|4|19.2|1.5|
|5|-0.3|2.8|
|6|0.3|3.3|
|7|-5.7|1.7|
|8|-10.8|3.7|
|9|12.8|3.6|
|10|14.4|2.8|
|11|30.0|1.4|


1. Top speed 1, discount rate 0.5, learning rate 0.001, 20min

2. Top speed 1.5, discount rate 0.5, learning rate 0.001, 20min

3. Top speed 2, discount rate 0.5, learning rate 0.001, 20min

4. Top speed 2.3, discount rate 0.5, learning rate 0.0005, 20min

5. Top speed 2.5, discount rate 0.5, learning rate 0.0005, 20min

6. Top speed 2.7, discount rate 0.5, learning rate 0.0005, 20min

7. Top speed 2.7, discount rate 0.5, learning rate 0.0005, 20min

8. Top speed 2.9, discount rate 0.5, learning rate 0.0005, 20min

9. Top speed 3.1, discount rate 0.5, learning rate 0.0003, 20min

In [2]:
'''
20241121
Falk Tandetzky + yang0369
https://github.com/falktan/deepracer
https://github.com/yang0369/AWS_DeepRacer
'''

import math

OPTIMIZED_WAYPOINTS = [
        [3.06664, 0.69989],
        [3.21372, 0.69357],
        [3.36169, 0.6893],
        [3.51032, 0.68657],
        [3.65944, 0.68518],
        [3.80869, 0.68499],
        [3.9577, 0.68593],
        [4.10629, 0.688],
        [4.25437, 0.69122],
        [4.40189, 0.69562],
        [4.54878, 0.70129],
        [4.69495, 0.7083],
        [4.84035, 0.71677],
        [4.9849, 0.7268],
        [5.12852, 0.73849],
        [5.27111, 0.75197],
        [5.41256, 0.76741],
        [5.55265, 0.78511],
        [5.69115, 0.80542],
        [5.82783, 0.82863],
        [5.96225, 0.85532],
        [6.09384, 0.88621],
        [6.22194, 0.92207],
        [6.34568, 0.96381],
        [6.46387, 1.01256],
        [6.57482, 1.06969],
        [6.67653, 1.13638],
        [6.76588, 1.21406],
        [6.83839, 1.3035],
        [6.8965, 1.40041],
        [6.94112, 1.50274],
        [6.96947, 1.60974],
        [6.97707, 1.71948],
        [6.96702, 1.82873],
        [6.94149, 1.93565],
        [6.90175, 2.03894],
        [6.84699, 2.13674],
        [6.77532, 2.22592],
        [6.69013, 2.30621],
        [6.59411, 2.37815],
        [6.48935, 2.44258],
        [6.37761, 2.50053],
        [6.26056, 2.55329],
        [6.13955, 2.60203],
        [6.01585, 2.648],
        [5.89082, 2.69257],
        [5.76067, 2.73919],
        [5.63058, 2.78629],
        [5.5006, 2.83412],
        [5.37081, 2.88295],
        [5.2413, 2.93305],
        [5.11223, 2.98473],
        [4.9838, 3.03838],
        [4.85635, 3.09451],
        [4.73023, 3.15374],
        [4.60596, 3.21695],
        [4.48296, 3.2828],
        [4.36104, 3.35081],
        [4.24006, 3.42061],
        [4.11988, 3.49191],
        [4.00046, 3.56448],
        [3.88179, 3.63809],
        [3.76397, 3.71247],
        [3.64724, 3.7873],
        [3.53105, 3.86073],
        [3.41419, 3.93239],
        [3.29624, 4.00105],
        [3.17677, 4.06545],
        [3.0554, 4.12417],
        [2.93169, 4.17515],
        [2.80549, 4.21581],
        [2.67785, 4.24822],
        [2.5493, 4.27301],
        [2.42021, 4.29067],
        [2.29093, 4.30153],
        [2.16175, 4.30562],
        [2.03303, 4.30283],
        [1.90519, 4.29292],
        [1.7788, 4.27535],
        [1.65459, 4.24957],
        [1.53376, 4.21418],
        [1.41797, 4.16786],
        [1.30974, 4.10893],
        [1.21287, 4.03538],
        [1.13093, 3.94692],
        [1.06435, 3.84609],
        [1.01121, 3.73603],
        [0.96999, 3.61869],
        [0.93956, 3.49541],
        [0.91891, 3.36729],
        [0.90708, 3.23527],
        [0.90334, 3.10018],
        [0.90681, 2.9629],
        [0.91698, 2.82419],
        [0.93341, 2.68483],
        [0.95571, 2.54557],
        [0.98342, 2.40706],
        [1.01626, 2.26986],
        [1.05392, 2.13444],
        [1.09624, 2.00121],
        [1.14311, 1.87057],
        [1.19482, 1.7431],
        [1.25158, 1.61938],
        [1.31382, 1.50015],
        [1.38221, 1.38643],
        [1.45757, 1.27943],
        [1.54096, 1.18072],
        [1.63386, 1.09253],
        [1.7384, 1.01844],
        [1.85098, 0.955],
        [1.97002, 0.90067],
        [2.09459, 0.85453],
        [2.2239, 0.81579],
        [2.35729, 0.78373],
        [2.49419, 0.75767],
        [2.63406, 0.73695],
        [2.77639, 0.72086],
        [2.92074, 0.70874],
        [3.06664, 0.69989]
    ]

RADIUS_FACTOR = 0.3

def dist(point1, point2):
    return ((point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2) ** 0.5


# thanks to https://stackoverflow.com/questions/20924085/python-conversion-between-coordinates
def rect(r, theta):
    """
    theta in degrees

    returns tuple; (float, float); (x,y)
    """

    x = r * math.cos(math.radians(theta))
    y = r * math.sin(math.radians(theta))
    return x, y


# thanks to https://stackoverflow.com/questions/20924085/python-conversion-between-coordinates
def polar(x, y):
    """
    returns r, theta(degrees)
    """

    r = (x ** 2 + y ** 2) ** .5
    theta = math.degrees(math.atan2(y,x))
    return r, theta


def angle_mod_360(angle):
    """
    Maps an angle to the interval -180, +180.

    Examples:
    angle_mod_360(362) == 2
    angle_mod_360(270) == -90

    :param angle: angle in degree
    :return: angle in degree. Between -180 and +180
    """

    n = math.floor(angle/360.0)

    angle_between_0_and_360 = angle - n*360.0

    if angle_between_0_and_360 <= 180.0:
        return angle_between_0_and_360
    else:
        return angle_between_0_and_360 - 360


def get_waypoints_ordered_in_driving_direction(params):
    # waypoints are always provided in counter clock wise order
    if params['is_reversed']: # driving clock wise.
        # return list(reversed(params['waypoints']))
        return list(reversed(OPTIMIZED_WAYPOINTS))
    else: # driving counter clock wise.
        # return params['waypoints']
        return OPTIMIZED_WAYPOINTS


def up_sample(waypoints, factor):
    """
    Adds extra waypoints in between provided waypoints

    :param waypoints:
    :param factor: integer. E.g. 3 means that the resulting list has 3 times as many points.
    :return:
    """
    p = waypoints
    n = len(p)

    return [[i / factor * p[(j+1) % n][0] + (1 - i / factor) * p[j][0],
             i / factor * p[(j+1) % n][1] + (1 - i / factor) * p[j][1]] for j in range(n) for i in range(factor)]


def get_target_point(params):
    waypoints = up_sample(get_waypoints_ordered_in_driving_direction(params), 20)

    car = [params['x'], params['y']]

    distances = [dist(p, car) for p in waypoints]
    min_dist = min(distances)
    i_closest = distances.index(min_dist)

    n = len(waypoints)

    waypoints_starting_with_closest = [waypoints[(i+i_closest) % n] for i in range(n)]

    r = params['track_width'] * RADIUS_FACTOR

    is_inside = [dist(p, car) < r for p in waypoints_starting_with_closest]
    i_first_outside = is_inside.index(False)

    if i_first_outside < 0:  # this can only happen if we choose r as big as the entire track
        return waypoints[i_closest]

    return waypoints_starting_with_closest[i_first_outside]


def get_target_steering_degree(params):
    tx, ty = get_target_point(params)
    car_x = params['x']
    car_y = params['y']
    dx = tx-car_x
    dy = ty-car_y
    heading = params['heading']
    print('x: %f, y: %f, tx: %f, ty: %f' % (car_x, car_y, tx, ty))

    _, target_angle = polar(dx, dy)

    steering_angle = target_angle - heading

    return angle_mod_360(steering_angle)


def score_steer_to_point_ahead(params):
    best_stearing_angle = get_target_steering_degree(params)
    steering_angle = params['steering_angle']

    error = (steering_angle - best_stearing_angle) / 60.0  # 60 degree is already really bad

    score = 1.0 - abs(error)

    return max(score, 0.01)  # optimizer is rumored to struggle with negative numbers and numbers too close to zero


def reward_function(params):
    reward = score_steer_to_point_ahead(params)
    print('reward: %f' % reward)
    return float(reward)

# Continue 20241117

In [30]:
import math

# static
# Must update if the env changes.
TOP_SPEED = 3.0
INTERVAL_TO_TARGET_WAY_POINT = 5
STEP_VALUE = 0.1
OFF_TRACK_PUNISH = -20.0
previous_progress = 0


OPTIMIZED_WAYPOINTS = [
        [3.06664, 0.69989],
        [3.21372, 0.69357],
        [3.36169, 0.6893],
        [3.51032, 0.68657],
        [3.65944, 0.68518],
        [3.80869, 0.68499],
        [3.9577, 0.68593],
        [4.10629, 0.688],
        [4.25437, 0.69122],
        [4.40189, 0.69562],
        [4.54878, 0.70129],
        [4.69495, 0.7083],
        [4.84035, 0.71677],
        [4.9849, 0.7268],
        [5.12852, 0.73849],
        [5.27111, 0.75197],
        [5.41256, 0.76741],
        [5.55265, 0.78511],
        [5.69115, 0.80542],
        [5.82783, 0.82863],
        [5.96225, 0.85532],
        [6.09384, 0.88621],
        [6.22194, 0.92207],
        [6.34568, 0.96381],
        [6.46387, 1.01256],
        [6.57482, 1.06969],
        [6.67653, 1.13638],
        [6.76588, 1.21406],
        [6.83839, 1.3035],
        [6.8965, 1.40041],
        [6.94112, 1.50274],
        [6.96947, 1.60974],
        [6.97707, 1.71948],
        [6.96702, 1.82873],
        [6.94149, 1.93565],
        [6.90175, 2.03894],
        [6.84699, 2.13674],
        [6.77532, 2.22592],
        [6.69013, 2.30621],
        [6.59411, 2.37815],
        [6.48935, 2.44258],
        [6.37761, 2.50053],
        [6.26056, 2.55329],
        [6.13955, 2.60203],
        [6.01585, 2.648],
        [5.89082, 2.69257],
        [5.76067, 2.73919],
        [5.63058, 2.78629],
        [5.5006, 2.83412],
        [5.37081, 2.88295],
        [5.2413, 2.93305],
        [5.11223, 2.98473],
        [4.9838, 3.03838],
        [4.85635, 3.09451],
        [4.73023, 3.15374],
        [4.60596, 3.21695],
        [4.48296, 3.2828],
        [4.36104, 3.35081],
        [4.24006, 3.42061],
        [4.11988, 3.49191],
        [4.00046, 3.56448],
        [3.88179, 3.63809],
        [3.76397, 3.71247],
        [3.64724, 3.7873],
        [3.53105, 3.86073],
        [3.41419, 3.93239],
        [3.29624, 4.00105],
        [3.17677, 4.06545],
        [3.0554, 4.12417],
        [2.93169, 4.17515],
        [2.80549, 4.21581],
        [2.67785, 4.24822],
        [2.5493, 4.27301],
        [2.42021, 4.29067],
        [2.29093, 4.30153],
        [2.16175, 4.30562],
        [2.03303, 4.30283],
        [1.90519, 4.29292],
        [1.7788, 4.27535],
        [1.65459, 4.24957],
        [1.53376, 4.21418],
        [1.41797, 4.16786],
        [1.30974, 4.10893],
        [1.21287, 4.03538],
        [1.13093, 3.94692],
        [1.06435, 3.84609],
        [1.01121, 3.73603],
        [0.96999, 3.61869],
        [0.93956, 3.49541],
        [0.91891, 3.36729],
        [0.90708, 3.23527],
        [0.90334, 3.10018],
        [0.90681, 2.9629],
        [0.91698, 2.82419],
        [0.93341, 2.68483],
        [0.95571, 2.54557],
        [0.98342, 2.40706],
        [1.01626, 2.26986],
        [1.05392, 2.13444],
        [1.09624, 2.00121],
        [1.14311, 1.87057],
        [1.19482, 1.7431],
        [1.25158, 1.61938],
        [1.31382, 1.50015],
        [1.38221, 1.38643],
        [1.45757, 1.27943],
        [1.54096, 1.18072],
        [1.63386, 1.09253],
        [1.7384, 1.01844],
        [1.85098, 0.955],
        [1.97002, 0.90067],
        [2.09459, 0.85453],
        [2.2239, 0.81579],
        [2.35729, 0.78373],
        [2.49419, 0.75767],
        [2.63406, 0.73695],
        [2.77639, 0.72086],
        [2.92074, 0.70874],
        [3.06664, 0.69989]
    ]

def get_distance_p2p(p1, p2):
    return abs(abs(p1[0]-p2[0])**2 + abs(p1[1]-p2[1])**2)**0.5


def get_distance_p2l(p, wp1, wp2):
    if wp2[0] == wp1[0]:
        return abs(p[0] - wp2[0])
    else:
        m = (wp2[1] - wp1[1]) / (wp2[0] - wp1[0])
        A = -m
        print(A)
    
    B = 1
    C = -((A * wp1[0]) + (B * wp1[1]))
    return abs(A*p[0] + B*p[1] + C) / math.sqrt(A**2 + B**2)


def get_waypoint_idx(total_length, current_idx, step):
    return (current_idx + total_length + step) % total_length


def get_direction_in_degree(next_point, prev_point):
    '''
    The speed in the direction of the waypoints.
    '''
    # Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
    track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
    # Convert to degree (-180, 180)
    return math.degrees(track_direction)
    
def get_speed_in_direction(target_direction, heading, speed):
    direction_diff = abs(target_direction - heading)
    if direction_diff > 180:
        direction_diff = 360 - direction_diff
    speed_bonus = math.cos(math.radians(direction_diff)) * speed / TOP_SPEED
    return speed_bonus



def reward_function(params):
    '''
    1. Don't run off the track.
    2. Encourage the agent to have a high speed towards a far side way point.
    3. Give as much rewards as the progress made.
    4. Every step values.
    5. Close to optimized waypoint.
    '''
    
    # Read input variables
    x = params['x']
    y = params['y']
    # waypoints = params['waypoints']
    waypoints = OPTIMIZED_WAYPOINTS
    closest_waypoints = params['closest_waypoints']
    heading = params['heading']
    speed = params['speed']
    is_offtrack = params['is_offtrack']
    steps = params['steps']
    progress = params['progress']
    distance_from_center = params['distance_from_center']
    track_width = params['track_width']
    is_left_of_center = params['is_left_of_center']
    
    reward = 0.0
    # 1. Don't terminate off track.
    if is_offtrack:
        print('Vehicle is off track!')
        return OFF_TRACK_PUNISH
        
    # 2. Encourage the agent to have a high speed towards a far side way point. 
    target_way_point_idx = get_waypoint_idx(len(waypoints), closest_waypoints[1], INTERVAL_TO_TARGET_WAY_POINT)
    target_way_point = waypoints[target_way_point_idx]
    
    target_direction = get_direction_in_degree(target_way_point, [x, y])
    speed_bonus = get_speed_in_direction(target_direction, heading, speed)
    reward += speed_bonus
    
    # 3. Give as much rewards as the progress made.
    global previous_progress
    progress_bonus = progress - previous_progress
    if progress_bonus > 0:
        reward += progress_bonus
    previous_progress = progress
    
    # 4. Every step values.
    reward -= STEP_VALUE
    
    # 5. Close to optimized waypoint.
    distance_2_waypoint_bonus = 0
    car_2_opt_way = get_distance_p2l([x,y], waypoints[closest_waypoints[0]], waypoints[closest_waypoints[1]])
    if car_2_opt_way > (track_width / 2):
        distance_2_waypoint_bonus = -0.1
    else:
        distance_2_waypoint_bonus = 1 - (car_2_opt_way / (track_width / 2))
        
    reward += distance_2_waypoint_bonus
    
    print('speed_bonus: %f, progress_bonus: %f, distance_2_waypoint_bonus: %f, reward: %f' % (speed_bonus, progress_bonus, distance_2_waypoint_bonus, reward))
    return float(reward)

In [34]:
get_distance_p2l([0.0,0.0], [1.0,1.0], [2**-2,0])

-1.3333333333333333


0.19999999999999996

In [1]:
import math

# static

OPTIMIZED_WAYPOINTS = [
        [3.06664, 0.69989],
        [3.21372, 0.69357],
        [3.36169, 0.6893],
        [3.51032, 0.68657],
        [3.65944, 0.68518],
        [3.80869, 0.68499],
        [3.9577, 0.68593],
        [4.10629, 0.688],
        [4.25437, 0.69122],
        [4.40189, 0.69562],
        [4.54878, 0.70129],
        [4.69495, 0.7083],
        [4.84035, 0.71677],
        [4.9849, 0.7268],
        [5.12852, 0.73849],
        [5.27111, 0.75197],
        [5.41256, 0.76741],
        [5.55265, 0.78511],
        [5.69115, 0.80542],
        [5.82783, 0.82863],
        [5.96225, 0.85532],
        [6.09384, 0.88621],
        [6.22194, 0.92207],
        [6.34568, 0.96381],
        [6.46387, 1.01256],
        [6.57482, 1.06969],
        [6.67653, 1.13638],
        [6.76588, 1.21406],
        [6.83839, 1.3035],
        [6.8965, 1.40041],
        [6.94112, 1.50274],
        [6.96947, 1.60974],
        [6.97707, 1.71948],
        [6.96702, 1.82873],
        [6.94149, 1.93565],
        [6.90175, 2.03894],
        [6.84699, 2.13674],
        [6.77532, 2.22592],
        [6.69013, 2.30621],
        [6.59411, 2.37815],
        [6.48935, 2.44258],
        [6.37761, 2.50053],
        [6.26056, 2.55329],
        [6.13955, 2.60203],
        [6.01585, 2.648],
        [5.89082, 2.69257],
        [5.76067, 2.73919],
        [5.63058, 2.78629],
        [5.5006, 2.83412],
        [5.37081, 2.88295],
        [5.2413, 2.93305],
        [5.11223, 2.98473],
        [4.9838, 3.03838],
        [4.85635, 3.09451],
        [4.73023, 3.15374],
        [4.60596, 3.21695],
        [4.48296, 3.2828],
        [4.36104, 3.35081],
        [4.24006, 3.42061],
        [4.11988, 3.49191],
        [4.00046, 3.56448],
        [3.88179, 3.63809],
        [3.76397, 3.71247],
        [3.64724, 3.7873],
        [3.53105, 3.86073],
        [3.41419, 3.93239],
        [3.29624, 4.00105],
        [3.17677, 4.06545],
        [3.0554, 4.12417],
        [2.93169, 4.17515],
        [2.80549, 4.21581],
        [2.67785, 4.24822],
        [2.5493, 4.27301],
        [2.42021, 4.29067],
        [2.29093, 4.30153],
        [2.16175, 4.30562],
        [2.03303, 4.30283],
        [1.90519, 4.29292],
        [1.7788, 4.27535],
        [1.65459, 4.24957],
        [1.53376, 4.21418],
        [1.41797, 4.16786],
        [1.30974, 4.10893],
        [1.21287, 4.03538],
        [1.13093, 3.94692],
        [1.06435, 3.84609],
        [1.01121, 3.73603],
        [0.96999, 3.61869],
        [0.93956, 3.49541],
        [0.91891, 3.36729],
        [0.90708, 3.23527],
        [0.90334, 3.10018],
        [0.90681, 2.9629],
        [0.91698, 2.82419],
        [0.93341, 2.68483],
        [0.95571, 2.54557],
        [0.98342, 2.40706],
        [1.01626, 2.26986],
        [1.05392, 2.13444],
        [1.09624, 2.00121],
        [1.14311, 1.87057],
        [1.19482, 1.7431],
        [1.25158, 1.61938],
        [1.31382, 1.50015],
        [1.38221, 1.38643],
        [1.45757, 1.27943],
        [1.54096, 1.18072],
        [1.63386, 1.09253],
        [1.7384, 1.01844],
        [1.85098, 0.955],
        [1.97002, 0.90067],
        [2.09459, 0.85453],
        [2.2239, 0.81579],
        [2.35729, 0.78373],
        [2.49419, 0.75767],
        [2.63406, 0.73695],
        [2.77639, 0.72086],
        [2.92074, 0.70874],
        [3.06664, 0.69989]
    ]
# Must update if the env changes.
TOP_SPEED = 4.0  
IS_CLOCKWISE = False
INTERVAL_TO_TARGET_WAY_POINT = 8
STEP_VALUE = 0.2

previous_progress = 0

def get_direction_in_degree(next_point, prev_point):
	'''
	The speed in the direction of the waypoints.
	'''
	# Calculate the direction in radius, arctan2(dy, dx), the result is (-pi, pi) in radians
	track_direction = math.atan2(next_point[1] - prev_point[1], next_point[0] - prev_point[0])
	# Convert to degree (-180, 180)
	return math.degrees(track_direction)
	
def get_speed_in_direction(target_direction, heading, speed):
	direction_diff = abs(target_direction - heading)
	if direction_diff > 180:
		direction_diff = 360 - direction_diff
	speed_bonus = math.cos(math.radians(direction_diff)) * speed / TOP_SPEED
	return speed_bonus
	
def get_distance_p2l(p, wp1, wp2):
    if wp2[0] == wp1[0]:
        return abs(p[0] - wp2[0])
    else:
        m = (wp2[1] - wp1[1]) / (wp2[0] - wp1[0])
        A = -m
        print(A)
    
    B = 1
    C = -((A * wp1[0]) + (B * wp1[1]))
    return abs(A*p[0] + B*p[1] + C) / math.sqrt(A**2 + B**2)

def reward_function(params):
	'''
	1. Don't run off the track.
	2. Base reward when keeping all wheels on track and not heading towards the opposite way.
	3. Encourage the agent to have a high speed towards a far side way point.
	4. Every step values.
	5. Give as much rewards as the progress made.
	6. Close to optimized waypoint.
	'''
	
	# Read input variables
	x = params['x']
	y = params['y']
	# waypoints = params['waypoints']
	waypoints = OPTIMIZED_WAYPOINTS
	closest_waypoints = params['closest_waypoints']
	heading = params['heading']
	speed = params['speed']
	is_offtrack = params['is_offtrack']
	all_wheels_on_track = params['all_wheels_on_track']
	is_reversed = params['is_reversed'] # if the agent is driving on clock-wise (True) or counter clock-wise (False).
	steps = params['steps']
	progress = params['progress']
	track_width = params['track_width']
	
	reward = 0.0
	# 1. Don't terminate off track.
	if is_offtrack:
		print('Vehicle is off track!')
		return -10.0
		
	# 2. Base reward when keeping all wheels on track and not heading towards the opposite way.
	if all_wheels_on_track and ((IS_CLOCKWISE and is_reversed) or (not IS_CLOCKWISE and not is_reversed)):
		reward += 0.01
	else:
		if not all_wheels_on_track:
			print('The vehicle is having a wheel off track.')
			
		if (IS_CLOCKWISE and not is_reversed) or (not IS_CLOCKWISE and is_reversed):
			print('The vehicle is heading backword.')
		
	# 3. Encourage the agent to have a high speed towards a far side way point. 
	target_way_point_idx = (closest_waypoints[1] + INTERVAL_TO_TARGET_WAY_POINT) % len(waypoints)
	target_way_point = waypoints[target_way_point_idx]
	
	target_direction = get_direction_in_degree(target_way_point, [x, y])
	speed_bonus = get_speed_in_direction(target_direction, heading, speed)
	print('The speed bonus is %f.' % speed_bonus)
	
	
	# 4. Every step values.
	reward -= STEP_VALUE
	
	# 5. Give as much rewards as the progress made.
	global previous_progress
	progress_bonus = progress - previous_progress
	if progress_bonus > 0:
		progress_bonus = progress_bonus / 5
		reward += progress_bonus
	previous_progress = progress
	
	# 6. Close to optimized waypoints.
	distance_2_waypoint_bonus = 0
	car_2_opt_way = get_distance_p2l([x,y], waypoints[closest_waypoints[0]], waypoints[closest_waypoints[1]])
	if car_2_opt_way > (track_width / 2):
		distance_2_waypoint_bonus = -0.1
	else:
		distance_2_waypoint_bonus = 1 - (car_2_opt_way / (track_width / 2))

	reward += speed_bonus * distance_2_waypoint_bonus
	
	
	
	return float(reward)

In [7]:
get_speed_in_direction(0, 15, 4)

0.9659258262890683

In [11]:
get_direction_in_degree(OPTIMIZED_WAYPOINTS[51], [5.45856218131957, 2.76881261307209])

148.05892996567508

In [12]:
get_speed_in_direction(148, -179, 4)

0.838670567945424