# Data Processing for Safe RL

In [2]:
import pandas as pd
import numpy as np

In [11]:
x_limit = 80
y_limit = 80

In [12]:
data_1 = pd.read_csv('../ml_data/train_data.csv')
data_2 = pd.read_csv('../ml_data/test_data.csv')
data = pd.concat([data_1, data_2])


## Computing rewards

To avoid numbers that are too big, change everything to radians

In [13]:
data['theta_x'] = np.deg2rad(data['theta_x'])
data['theta_y'] = np.deg2rad(data['theta_y'])

In [14]:
reward = []
x_limit = np.deg2rad(x_limit)
y_limit = np.deg2rad(y_limit)
D = 0.5*(x_limit**2 + y_limit**2)**0.5
gamma = 0.99
for row_num, row in data.iterrows():
    if row['theta_x'] <= x_limit and row['theta_x'] >= -x_limit and row['theta_y'] <= y_limit and row['theta_y'] >= -y_limit:
        dist_to_bounds_x = min(abs(x_limit - row['theta_x']), abs(-x_limit - row['theta_x']))
        dist_to_bounds_y = min(abs(y_limit - row['theta_y']), abs(-y_limit - row['theta_y']))
        dist_to_bounds = (dist_to_bounds_x**2 + dist_to_bounds_y**2)**0.5
        r = 1 + dist_to_bounds
        reward.append(r)
    else:
        reward.append(-D/(1-gamma))
data['reward'] = reward    

In [15]:
data

Unnamed: 0,time_begin,time_begin_traj,theta_x,theta_y,vel_x,vel_y,X_throttle,Y_throttle,reward
0,0.358,0.358,-0.086219,-0.187797,0.0,0.000000,0.0,0.0,2.782303
1,0.421,0.421,-0.086219,-0.187797,0.0,0.000000,0.0,0.0,2.782303
2,0.485,0.485,-0.086219,-0.187797,0.0,0.000000,0.0,0.0,2.782303
3,0.548,0.548,-0.086219,-0.187797,0.0,0.000000,0.0,0.0,2.782303
4,0.612,0.612,-0.086219,-0.187797,0.0,0.000000,0.0,0.0,2.782303
...,...,...,...,...,...,...,...,...,...
31261,1172.517,121.126,-0.384496,-0.656069,0.0,0.000000,-2.0,10.0,2.253619
31262,1172.589,121.198,-0.384496,-0.656069,0.0,0.000000,-2.0,10.0,2.253619
31263,1172.661,121.270,-0.384496,-0.646819,0.0,7.361111,-2.0,10.0,2.259103
31264,1172.733,121.342,-0.384496,-0.646819,0.0,0.000000,-2.0,10.0,2.259103


In [16]:
data.to_csv('../safe_rl_data/data.csv', index=False)

In [17]:
data.describe()

Unnamed: 0,time_begin,time_begin_traj,theta_x,theta_y,vel_x,vel_y,X_throttle,Y_throttle,reward
count,143207.0,143207.0,143207.0,143207.0,143207.0,143207.0,143207.0,143207.0,143207.0
mean,953.331442,30.882267,-0.006678,-8.3e-05,0.368827,-0.152111,-0.123332,-0.028162,-13.898717
std,674.203298,41.364048,0.793214,0.799035,16.972159,17.594994,2.173461,2.289834,37.005254
min,0.348,0.348,-1.813572,-1.816713,-165.294118,-151.571429,-10.0,-10.0,-98.730732
25%,356.609,6.497,-0.570548,-0.55135,-10.149254,-10.615385,-1.0,-1.0,1.70095
50%,875.516,15.39,0.001047,-0.039444,0.0,0.0,0.0,0.0,2.203503
75%,1462.8505,37.1595,0.549953,0.572643,10.298507,10.0,1.0,1.0,2.489879
max,3118.448,330.651,1.790533,1.803449,121.176471,153.880597,10.0,10.0,2.970174
