In [1]:
import boto3
import os


def downloadDirectoryFroms3(bucketName, remoteDirectoryName):
    s3_resource = boto3.resource('s3')
    bucket = s3_resource.Bucket(bucketName) 
    for obj in bucket.objects.filter(Prefix = remoteDirectoryName):
        if not os.path.exists(os.path.dirname(obj.key)):
            os.makedirs(os.path.dirname(obj.key))
        bucket.download_file(obj.key, obj.key)

In [2]:
import random
def angleFromCoordinate(lat1, long1, lat2, long2):
    dLon = (long2 - long1)

    y = math.sin(dLon) * math.cos(lat2)
    x = math.cos(lat1) * math.sin(lat2) - math.sin(lat1) * math.cos(lat2) * math.cos(dLon)

    brng = math.atan2(y, x)

    brng = math.degrees(brng)
    brng = (brng + 360) % 360
    #brng = 360 - brng # count degrees clockwise - remove to make counter-clockwise

    return brng

In [3]:
import math
def compute_target_angle(lat_list, lon_list, step = 5):
    if len(lat_list) != len(lon_list):
        print(f'Error! Length of lat ({len(lat_list)}) is different from length of lon ({len(lon_list)}). Returning an empty list of target_angles')
        return []
    tgt_angle_list = []
    for index, lat in enumerate(lat_list):
        if index + step > len(lat_list) - 1:
            return tgt_angle_list
        tgt_angle_list.append(angleFromCoordinate(lat, lon_list[index], lat_list[index+step], lon_list[index+step]))
    return tgt_angle_list

In [4]:
def aggregate_players_record(folder) :
    df_global = pd.DataFrame(columns=['boat_speed', 'angle_of_attack_deg','wind_speed', 'target_angle_deg', 'boat_angle_deg'])
    for file in os.listdir(folder):
        try:
            df = pd.read_csv(join(folder, file))
        except Exception as e:
            print(f'Error {e} while reading file {file}')
            continue
        # remove Igor's tracking
        if '5f3b964b52e859ef03fd61ea' in file:
            print(f'Avoid following Igor data consisting of {len(df)} rows')
            continue
        df_global = df_global.append(df, ignore_index=True)
    #df_global.set_index('datetime')
    return df_global

In [5]:
bucket = 'virtual-regatta'
folder = 'logs_players/'

for file in os.listdir(folder):
    try:
        os.remove(file)
    except:
        pass

downloadDirectoryFroms3(bucket, folder)

In [6]:
def vr_trigo_bijection(angle):
    angle_rad = angle * math.pi / 180
    cos_new_angle = math.sin(angle_rad)
    sin_new_angle = math.cos(angle_rad)

    if sin_new_angle > 0:
        return round(math.degrees(math.acos(cos_new_angle)), 0)
    else:
        return round(360 - math.degrees(math.acos(cos_new_angle)), 0)

## Data processing without accounting for compass angle

In [7]:
# import pandas as pd
# from os.path import join
# import os
# import numpy as np

# results_folder = 'pre-processed/'

# for file in os.listdir(os.path.join(folder, results_folder)):
#     try:
#         os.remove(file)
#     except:
#         pass

# for file in os.listdir(folder):
#     file_name, file_extension = os.path.splitext(join(folder, file))
#     if file_extension != '.csv':
#         print(f'File {file} is not a CSV, skipping...')
#         continue
    
#     try:
#         df = pd.read_csv(join(folder, file))
#     except Exception as e:
#         print(f'Error {e} while reading file {file}')
#         continue
    
#     df = df.drop(columns=['target_angle'])
#     lat_list = df['lat'].to_list()
#     lon_list = df['lon'].to_list()
#     tgt_angle_list = compute_target_angle(lat_list, lon_list, step = 10)
#     df['target_angle'] = pd.Series(tgt_angle_list)
    
#     #remove target_angle = 360 & remove not needed features (lat/lon)
#     features = ['datetime', 'boat_speed', 'target_angle', 'angle_of_attack', 'wind_speed', 'boat_angle']
#     #data_reduced = df[abs(df.target_angle - 360) > 0.1][features]
#     #data_reduced = df[df.target_angle < 0.1][features]
#     data_reduced = df[features].dropna()
        
#     # Remove trailing lines for which the target_angle was not calculated (by design)
#     #data_reduced.dropna()
    
#     data_reduced['boat_angle_deg'] = data_reduced['boat_angle']
#     data_reduced['target_angle_deg'] = data_reduced['target_angle']

#     # Convert angles to radians
#     data_reduced['boat_angle'] = data_reduced['boat_angle'] * np.pi / 180
#     data_reduced['target_angle'] = data_reduced['target_angle'] * np.pi / 180
    
#     # Compute cosinus of the angles (we'll predict cos rather than the angle itself)
#     data_reduced['cos_boat_angle'] = np.cos(data_reduced['boat_angle'])
#     data_reduced['cos_target_angle'] = np.cos(data_reduced['target_angle'])
    
#     data_reduced = data_reduced[data_reduced.cos_target_angle != 1.0]
    
#     # Store sinus sign (since predicting a cosinus yelds two possible angles)
#     data_reduced['sin_boat_angle'] = np.sin(data_reduced['boat_angle'])
#     data_reduced['sin_target_angle'] = np.sin(data_reduced['target_angle'])
    
#     data_reduced = data_reduced[abs(data_reduced['sin_target_angle'] - data_reduced['sin_boat_angle']) < 0.3]
    
#     # Not needed anymore (as we have the cosinus value and the sign of the sinus)
#     data_reduced = data_reduced.drop(columns=['boat_angle', 'target_angle', 'datetime'])
    
#     data_reduced.to_csv(join(folder, results_folder, file), index=False)
#     print(f'Stored pre-processed file {file}')

## Data processing accounting for compass angle

In [8]:
def vr_trigo_bijection(angle):
    angle_rad = angle * math.pi / 180
    cos_new_angle = math.sin(angle_rad)
    sin_new_angle = math.cos(angle_rad)

    if sin_new_angle > 0:
        return round(math.degrees(math.acos(cos_new_angle)), 0)
    else:
        return round(360 - math.degrees(math.acos(cos_new_angle)), 0)

In [9]:
import pandas as pd
from os.path import join
import os
import numpy as np

results_folder = 'pre-processed/'

for file in os.listdir(os.path.join(folder, results_folder)):
    try:
        os.remove(file)
    except:
        pass

for file in os.listdir(folder):
    file_name, file_extension = os.path.splitext(join(folder, file))
    if file_extension != '.csv':
        print(f'File {file} is not a CSV, skipping...')
        continue
    
    try:
        df = pd.read_csv(join(folder, file))
    except Exception as e:
        print(f'Error {e} while reading file {file}')
        continue
    
    df = df.drop(columns=['target_angle'])
    lat_list = df['lat'].to_list()
    lon_list = df['lon'].to_list()
    tgt_angle_list = compute_target_angle(lat_list, lon_list, step = 10)
    df['target_angle'] = pd.Series(tgt_angle_list)
    
    #remove target_angle = 360 & remove not needed features (lat/lon)
    features = ['datetime', 'boat_speed', 'target_angle', 'angle_of_attack', 'wind_speed', 'boat_angle']
    data_reduced = df[features].dropna()
    
    data_reduced['boat_angle_deg'] = data_reduced['boat_angle'].apply(lambda x: vr_trigo_bijection(x))
    data_reduced['target_angle_deg'] = data_reduced['target_angle'].apply(lambda x: vr_trigo_bijection(x))
    data_reduced['angle_of_attack_deg'] = data_reduced['angle_of_attack'].apply(lambda x: vr_trigo_bijection(x))

    # Convert angles to radians
    data_reduced['boat_angle'] = data_reduced['boat_angle_deg'] * np.pi / 180
    data_reduced['target_angle'] = data_reduced['target_angle_deg'] * np.pi / 180
    
    # Compute cosinus of the angles (we'll predict cos rather than the angle itself)
    data_reduced['cos_boat_angle'] = np.cos(data_reduced['boat_angle'])
    data_reduced['cos_target_angle'] = np.cos(data_reduced['target_angle'])
    
    data_reduced = data_reduced[data_reduced.cos_target_angle != 1.0]
    data_reduced.drop(columns=[c for c in list(data_reduced.columns) if c not in (
        'wind_speed', 'boat_speed', 'angle_of_attack_deg', 'boat_angle_deg', 'target_angle_deg')], inplace=True)
    
    data_reduced.to_csv(join(folder, results_folder, file), index=False)
    print(f'Stored pre-processed file {file}')



Stored pre-processed file status_5ccf239b517022fc0915acae.csv
Stored pre-processed file status_5bdead24517022fc09d60ba0.csv
Stored pre-processed file status_59d3f0c0b395b292ed63ea7e.csv
Stored pre-processed file status_5c694480517022fc090894ad.csv
Stored pre-processed file status_5f5e1f9652e859ef0302dfc1.csv
Stored pre-processed file status_5eb676b6080c8cbd45238cbc.csv
Stored pre-processed file status_59c1024eb395b292ed621334.csv
Stored pre-processed file status_5a1575457b296eaadfc427ca.csv
File pre-processed is not a CSV, skipping...
Stored pre-processed file status_59c129a0b395b292ed6216e5.csv
Stored pre-processed file status_5e791a92080c8cbd450ce5c7.csv
Stored pre-processed file status_59c18866b395b292ed621f3a.csv
Stored pre-processed file status_5ecfbea28e4c1d3e55e7bdcc.csv
Stored pre-processed file status_5a34e1827b296eaadfc6f1c3.csv
Stored pre-processed file status_59c40ab3b395b292ed625660.csv
Stored pre-processed file status_5a1b17c17b296eaadfc4b4c1.csv
Stored pre-processed file

In [10]:
data = aggregate_players_record(join(folder, results_folder))

data.describe()

Error Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'. while reading file .ipynb_checkpoints
Avoid following Igor data consisting of 0 rows


Unnamed: 0,boat_speed,angle_of_attack_deg,wind_speed,target_angle_deg,boat_angle_deg
count,10548.0,10548.0,10548.0,10548.0,10548.0
mean,5.961957,204.763936,8.096902,162.239287,144.718335
std,2.44922,117.550451,4.153398,94.732796,109.392607
min,0.0,0.0,2.0,1.0,0.0
25%,4.404475,50.0,5.397507,90.0,36.0
50%,5.024275,224.0,6.36704,106.0,137.0
75%,7.7339,315.0,11.33038,258.0,231.0
max,15.15786,360.0,21.07713,359.0,360.0


In [14]:
data['cos_attack_angle'] = data['angle_of_attack_deg'].apply(lambda x: np.cos(np.deg2rad(x)))
data['sin_attack_angle'] = data['angle_of_attack_deg'].apply(lambda x: np.sin(np.deg2rad(x)))
data['cos_boat_angle'] = data['boat_angle_deg'].apply(lambda x: np.cos(np.deg2rad(x)))
data['sin_boat_angle'] = data['boat_angle_deg'].apply(lambda x: np.sin(np.deg2rad(x)))
data['cos_target_angle'] = data['target_angle_deg'].apply(lambda x: np.cos(np.deg2rad(x)))
data['sin_target_angle'] = data['target_angle_deg'].apply(lambda x: np.sin(np.deg2rad(x)))
print(data.head(2))

   boat_speed  angle_of_attack_deg  wind_speed  target_angle_deg  \
0     7.73236                214.0     9.43467             214.0   
1     7.73236                214.0     9.43467             214.0   

   boat_angle_deg  cos_attack_angle  sin_attack_angle  cos_boat_angle  \
0           210.0         -0.829038         -0.559193       -0.866025   
1           210.0         -0.829038         -0.559193       -0.866025   

   sin_boat_angle  cos_target_angle  sin_target_angle  
0            -0.5         -0.829038         -0.559193  
1            -0.5         -0.829038         -0.559193  


In [15]:
dup_columns_check = ['boat_speed', 'cos_attack_angle', 'sin_attack_angle', 'wind_speed', 'cos_boat_angle']
dup_boat_angle = data[data.duplicated(dup_columns_check, keep=False)]
print(f'Total duplicate rows based on {dup_columns_check}: {len(dup_boat_angle)}')

Total duplicate rows based on ['boat_speed', 'cos_attack_angle', 'sin_attack_angle', 'wind_speed', 'cos_boat_angle']: 4726


In [16]:
print(f'Num rows before removing duplicates: {len(data)}')
data = data.drop_duplicates(subset=dup_columns_check)
print(f'Num rows after removing duplicates: {len(data)}')

Num rows before removing duplicates: 10548
Num rows after removing duplicates: 5992


In [17]:
print(data.head(2))

   boat_speed  angle_of_attack_deg  wind_speed  target_angle_deg  \
0     7.73236                214.0     9.43467             214.0   
2     7.78952                215.0     9.59005             215.0   

   boat_angle_deg  cos_attack_angle  sin_attack_angle  cos_boat_angle  \
0           210.0         -0.829038         -0.559193       -0.866025   
2           211.0         -0.819152         -0.573576       -0.857167   

   sin_boat_angle  cos_target_angle  sin_target_angle  
0       -0.500000         -0.829038         -0.559193  
2       -0.515038         -0.819152         -0.573576  


## Preprocess and save for cosine regression

In [19]:
cosine_data =  data[['boat_speed', 'wind_speed', 'cos_attack_angle', 'sin_attack_angle', 'cos_target_angle', 'sin_target_angle', 'cos_boat_angle']]
print(cosine_data.head(2))
print(len(cosine_data))

   boat_speed  wind_speed  cos_attack_angle  sin_attack_angle  \
0     7.73236     9.43467         -0.829038         -0.559193   
2     7.78952     9.59005         -0.819152         -0.573576   

   cos_target_angle  sin_target_angle  cos_boat_angle  
0         -0.829038         -0.559193       -0.866025  
2         -0.819152         -0.573576       -0.857167  
5992


In [20]:
cosine_data.to_csv('cosine_data.csv', index=False)

In [22]:
s3 = boto3.resource('s3')
s3.Bucket('vr-autopilot-ml').upload_file('cosine_data.csv','sagemaker-experiment-data/cosine_data.csv')

## Preprocess and save for sine regression

In [24]:
sine_data =  data[['boat_speed', 'wind_speed', 'cos_attack_angle', 'sin_attack_angle', 'cos_target_angle', 'sin_target_angle', 'sin_boat_angle']]
print(sine_data.head(2))
print(len(sine_data))

   boat_speed  wind_speed  cos_attack_angle  sin_attack_angle  \
0     7.73236     9.43467         -0.829038         -0.559193   
2     7.78952     9.59005         -0.819152         -0.573576   

   cos_target_angle  sin_target_angle  sin_boat_angle  
0         -0.829038         -0.559193       -0.500000  
2         -0.819152         -0.573576       -0.515038  
5992


In [25]:
sine_data.to_csv('sine_data.csv', index=False)
s3 = boto3.resource('s3')
s3.Bucket('vr-autopilot-ml').upload_file('sine_data.csv','sagemaker-experiment-data/sine_data.csv')

In [None]:
assert False

In [11]:
corr = data[['cos_target_angle', 'cos_boat_angle']].corr()
corr.style.background_gradient()

KeyError: "None of [Index(['cos_target_angle', 'cos_boat_angle'], dtype='object')] are in the [columns]"

In [None]:
corr = data[['sin_target_angle', 'sin_boat_angle']].corr()
corr.style.background_gradient()

In [None]:
corr = data.corr()
corr.style.background_gradient()

In [None]:
data['cos_boat_angle'][1000:1500].plot()
data['cos_target_angle'][1000:1500].plot()

In [None]:
data['sin_boat_angle'][1000:1500].plot()
data['sin_target_angle'][1000:1500].plot()

In [None]:
test = pd.DataFrame(columns=['x', 'y'])

test.x = data['sin_boat_angle'][:].to_numpy()
test.y = data['sin_target_angle'][:].to_numpy()

test.head()
test.plot.scatter(x='x',y='y')

In [None]:
test = pd.DataFrame(columns=['x', 'y'])

test.x = data['cos_boat_angle'][:].to_numpy()
test.y = data['cos_target_angle'][:].to_numpy()

test.head()
test.plot.scatter(x='x',y='y')

In [None]:
test = pd.DataFrame(columns=['x', 'y'])

test.x = data['boat_angle_deg'][:].to_numpy()
test.y = data['target_angle_deg'][:].to_numpy()

test.head()
test.plot.scatter(x='x',y='y')

In [12]:
data.to_csv('../all-in-one.csv', index=False)