In [1]:
import numpy as np
import pandas as pd
import os
import datetime
import time
import matplotlib.pyplot as plt

os.chdir('../')

In [2]:
def prepare_additional_location_features(df, moving_average_window, window_center=True):  
        
    # Add time rounded to seconds
    df['epoch_time'] = df['epoch_time'].round(-3)
    
    # Group values by rounded time
    df = df.groupby(['epoch_time'], as_index=False).mean().drop(['Ignore1', 'Ignore2'], axis=1)
    
    # Calculate difference to get distance and speed
    df_diff = df.diff() \
                .rename(columns={'epoch_time': 'epoch_time_change', 
                                 'accuracy': 'accuracy_change', 
                                 'Latitude': 'Latitude_change', 
                                 'Longitude': 'Longitude_change', 
                                 'Altitude': 'Altitude_change'})
    
    # Additional features
    df_diff['distance'] = (df_diff['Latitude_change'].pow(2) + df_diff['Longitude_change'].pow(2)).pow(0.5)
    df_diff['speed'] = df_diff['distance']/df_diff['epoch_time_change']
    df_diff['vertical_speed'] = df_diff['Altitude_change']/df_diff['epoch_time_change']
    df_diff['direction'] = df_diff['Longitude_change']/df_diff['Latitude_change']
    df_diff['vertical_direction'] = df_diff['Altitude_change']/df_diff['distance']
    
    df_diff.drop('epoch_time_change', axis=1, inplace=True) 

    
    # Second diff to get change of the new features
    df_diff_2 = df_diff[['speed', 'vertical_speed', 'direction', 'vertical_direction']] \
                    .diff() \
                    .rename(columns={'speed': 'speed_change', 
                                     'vertical_speed': 'vertical_speed_change', 
                                     'direction': 'direction_change', 
                                     'vertical_direction': 'vertical_direction_change'})
        
    
    # Merge new features back to the main dataframe
    df = df[['epoch_time']].merge(df_diff, left_index=True, right_index=True) \
                           .merge(df_diff_2, left_index=True, right_index=True)
    
    
    # Additional abs values features
    df['abs_speed_change'] = abs(df['speed_change'])
    df['abs_vertical_speed_change'] = abs(df['vertical_speed_change'])
    df['abs_direction_change'] = abs(df['direction_change'])
    df['abs_vertical_direction_change'] = abs(df['vertical_direction_change'])    
        
    # Add moving averages within selected window
    for column in df.columns[1:]:
        df[column + '_' + str(moving_average_window) + '_s_window_avg'] = df[column].rolling(window=moving_average_window, center=window_center).mean()
        
        
    # Remove infinity values
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    
    # Fill NA values    
    df.fillna(method='ffill', axis=0, inplace=True)
    df.fillna(method='bfill', axis=0, inplace=True)
        
    return df

In [3]:
window = 3

filename = './data/train/Location.parquet'
df = pd.read_parquet(filename)
df = prepare_additional_location_features(df, window, window_center=True)
df.to_parquet('./data/train/features_denys.parquet', index=False)

filename = './data/validate/Location.parquet'
df = pd.read_parquet(filename)
df = prepare_additional_location_features(df, window, window_center=True)
df.to_parquet('./data/validate/features_denys.parquet', index=False)

filename = './data/test/Location.parquet'
df = pd.read_parquet(filename)
df = prepare_additional_location_features(df, window, window_center=True)
df.to_parquet('./data/test/features_denys.parquet', index=False)

## Analysis

In [4]:
pd.set_option('display.max_columns', None)

In [5]:
# Join labels for analysis
features_file = './data/train/features_denys.parquet'
features = pd.read_parquet(features_file)

label_file = './data/train/Label.parquet'
label = pd.read_parquet(label_file)

# features = features.merge(label)

In [6]:
features

Unnamed: 0,epoch_time,accuracy,Latitude,Longitude,Altitude,accuracy_change,Latitude_change,Longitude_change,Altitude_change,distance,speed,vertical_speed,direction,vertical_direction,speed_change,vertical_speed_change,direction_change,vertical_direction_change,abs_speed_change,abs_vertical_speed_change,abs_direction_change,abs_vertical_direction_change,accuracy_3_s_window_avg,Latitude_3_s_window_avg,Longitude_3_s_window_avg,Altitude_3_s_window_avg,accuracy_change_3_s_window_avg,Latitude_change_3_s_window_avg,Longitude_change_3_s_window_avg,Altitude_change_3_s_window_avg,distance_3_s_window_avg,speed_3_s_window_avg,vertical_speed_3_s_window_avg,direction_3_s_window_avg,vertical_direction_3_s_window_avg,speed_change_3_s_window_avg,vertical_speed_change_3_s_window_avg,direction_change_3_s_window_avg,vertical_direction_change_3_s_window_avg,abs_speed_change_3_s_window_avg,abs_vertical_speed_change_3_s_window_avg,abs_direction_change_3_s_window_avg,abs_vertical_direction_change_3_s_window_avg
0,1490431658000,64.0,50.844494,-0.132922,97.664610,-16.0,-2.771417e-06,-1.914368e-06,30.603290,3.368317e-06,1.684158e-09,0.015302,0.690754,9.085632e+06,1.034176e-08,-0.007958,-0.034234,-8.474991e+06,1.034176e-08,0.007958,0.034234,8.474991e+06,53.333333,50.844489,-0.132926,120.514645,-17.333333,-8.323806e-06,-2.673123e-06,12.257147,9.183774e-06,8.622387e-09,0.007157,0.435508,3.199864e+06,8.209498e-08,-0.004581,-0.466973,-3.026450e+06,8.209498e-08,6.403403e-03,0.466973,3.095093e+06
1,1490431660000,48.0,50.844491,-0.132924,128.267900,-16.0,-2.771417e-06,-1.914368e-06,30.603290,3.368317e-06,1.684158e-09,0.015302,0.690754,9.085632e+06,1.034176e-08,-0.007958,-0.034234,-8.474991e+06,1.034176e-08,0.007958,0.034234,8.474991e+06,53.333333,50.844489,-0.132926,120.514645,-17.333333,-8.323806e-06,-2.673123e-06,12.257147,9.183774e-06,8.622387e-09,0.007157,0.435508,3.199864e+06,8.209498e-08,-0.004581,-0.466973,-3.026450e+06,8.209498e-08,6.403403e-03,0.466973,3.095093e+06
2,1490431661000,48.0,50.844481,-0.132931,135.611425,0.0,-1.005300e-05,-6.600000e-06,7.343525,1.202592e-05,1.202592e-08,0.007344,0.656520,6.106413e+05,1.034176e-08,-0.007958,-0.034234,-8.474991e+06,1.034176e-08,0.007958,0.034234,8.474991e+06,36.000000,50.844481,-0.132928,132.771792,-17.333333,-8.323806e-06,-2.673123e-06,12.257147,9.183774e-06,8.622387e-09,0.007157,0.435508,3.199864e+06,8.209498e-08,-0.004581,-0.466973,-3.026450e+06,8.209498e-08,6.403403e-03,0.466973,3.095093e+06
3,1490431662000,12.0,50.844469,-0.132930,134.436050,-36.0,-1.214700e-05,4.950000e-07,-1.175375,1.215708e-05,1.215708e-08,-0.001175,-0.040751,-9.668233e+04,1.311593e-10,-0.008519,-0.697271,-7.073237e+05,1.311593e-10,0.008519,0.697271,7.073237e+05,30.666667,50.844406,-0.132883,135.347114,-5.333333,-7.479133e-05,4.582400e-05,2.575322,9.071737e-05,9.071737e-08,0.002575,-0.031465,1.734138e+05,8.209498e-08,-0.004581,-0.466973,-3.026450e+06,8.209498e-08,6.403403e-03,0.466973,3.095093e+06
4,1490431663000,32.0,50.844267,-0.132787,135.993866,20.0,-2.021740e-04,1.435770e-04,1.557816,2.479691e-04,2.479691e-07,0.001558,-0.710166,6.282297e+03,2.358120e-07,0.002733,-0.669415,1.029646e+05,2.358120e-07,0.002733,0.669415,1.029646e+05,18.666667,50.844322,-0.132831,136.387955,-12.000000,-8.369067e-05,5.179133e-05,1.040841,9.952526e-05,9.952526e-08,0.001041,-0.352815,-6.378591e+03,8.807890e-09,-0.001534,-0.321350,-1.797924e+05,1.484876e-07,4.144786e-03,0.589774,2.917568e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
908626,1499268278000,12.0,50.846796,-0.133278,117.764547,0.0,-3.700000e-08,-2.150000e-07,0.000000,2.181605e-07,2.181605e-10,0.000000,5.810811,0.000000e+00,-2.183312e-10,0.000000,0.077478,0.000000e+00,2.183312e-10,0.000000,0.077478,0.000000e+00,12.000000,50.846796,-0.133278,117.764547,0.000000,-4.366667e-08,-2.510000e-07,0.000000,2.547702e-07,2.547702e-10,0.000000,5.742785,0.000000e+00,-2.540561e-10,0.000000,-0.026963,6.584742e-10,2.540561e-10,6.418477e-17,0.078615,0.000000e+00
908627,1499268279000,12.0,50.846796,-0.133278,117.764547,0.0,-1.900000e-08,-1.080000e-07,0.000000,1.096586e-07,1.096586e-10,0.000000,5.684211,0.000000e+00,-1.085019e-10,0.000000,-0.126600,0.000000e+00,1.085019e-10,0.000000,0.126600,0.000000e+00,12.000000,50.846796,-0.133278,117.764547,0.000000,-2.166666e-08,-1.256667e-07,0.000000,1.275213e-07,1.275213e-10,0.000000,5.831675,0.000000e+00,-1.272489e-10,0.000000,0.088890,6.584742e-10,1.272489e-10,6.418477e-17,0.173289,0.000000e+00
908628,1499268280000,12.0,50.846796,-0.133278,117.764547,0.0,-8.999997e-09,-5.400000e-08,0.000000,5.474486e-08,5.474486e-11,0.000000,6.000002,0.000000e+00,-5.491370e-11,0.000000,0.315791,0.000000e+00,5.491370e-11,0.000000,0.315791,0.000000e+00,12.000000,50.846796,-0.133278,117.764547,0.000000,-1.100000e-08,-6.266667e-08,0.000000,6.362661e-08,6.362661e-11,0.000000,5.628070,0.000000e+00,-6.389470e-11,0.000000,-0.203605,6.584742e-10,6.389470e-11,6.418477e-17,0.414132,0.000000e+00
908629,1499268281000,12.0,50.846796,-0.133278,117.764547,0.0,-5.000004e-09,-2.600000e-08,0.000000,2.647641e-08,2.647641e-11,0.000000,5.199996,0.000000e+00,-2.826846e-11,0.000000,-0.800006,0.000000e+00,2.826846e-11,0.000000,0.800006,0.000000e+00,12.000000,50.846796,-0.133278,117.764547,0.000000,4.983333e-07,-3.466667e-08,0.000000,5.301374e-07,5.301374e-10,0.000000,3.728031,0.000000e+00,4.665108e-10,0.000000,-1.900039,6.584742e-10,5.219655e-10,6.418477e-17,2.110566,0.000000e+00


In [7]:
# Calculate average speed by labels
features.groupby('label')['speed'].median().apply(lambda x: x*1000000)

KeyError: 'label'

In [None]:
features.groupby('label')['abs_vertical_speed_change'].median().apply(lambda x: x*1000)

In [None]:
features.groupby('label')['abs_speed_change'].median().apply(lambda x: x*1000000)

In [None]:
features.groupby('label')['abs_speed_change_3_s'].median().apply(lambda x: x*1000000)

In [None]:
features.groupby('label')['abs_direction_change'].median()

In [None]:
features.groupby('label')['abs_vertical_direction_change'].median().apply(lambda x: x/1000)