In [1]:
import numpy as np
import pandas as pd
import os
import datetime
import time
import matplotlib.pyplot as plt

os.chdir('../')

In [2]:
def prepare_additional_location_features(df, moving_average_window, window_center=True):  
        
    # Add time rounded to seconds
    df['epoch_time'] = df['epoch_time'].round(-3)
    
    # Group values by rounded time
    df = df.groupby(['epoch_time'], as_index=False).mean().drop(['Ignore1', 'Ignore2'], axis=1)
    
    # Calculate difference to get distance and speed
    df_diff = df.diff() \
                .rename(columns={'epoch_time': 'epoch_time_change', 
                                 'accuracy': 'accuracy_change', 
                                 'Latitude': 'Latitude_change', 
                                 'Longitude': 'Longitude_change', 
                                 'Altitude': 'Altitude_change'})
    
    # Additional features
    df_diff['distance'] = (df_diff['Latitude_change'].pow(2) + df_diff['Longitude_change'].pow(2)).pow(0.5)
    df_diff['speed'] = df_diff['distance']/df_diff['epoch_time_change']
    df_diff['vertical_speed'] = df_diff['Altitude_change']/df_diff['epoch_time_change']
    df_diff['direction'] = df_diff['Longitude_change']/df_diff['Latitude_change']
    df_diff['vertical_direction'] = df_diff['Altitude_change']/df_diff['distance']
    
    df_diff.drop('epoch_time_change', axis=1, inplace=True) 

    
    # Second diff to get change of the new features
    df_diff_2 = df_diff[['speed', 'vertical_speed', 'direction', 'vertical_direction']] \
                    .diff() \
                    .rename(columns={'speed': 'speed_change', 
                                     'vertical_speed': 'vertical_speed_change', 
                                     'direction': 'direction_change', 
                                     'vertical_direction': 'vertical_direction_change'})
        
    
    # Merge new features back to the main dataframe
    df = df.merge(df_diff, left_index=True, right_index=True) \
           .merge(df_diff_2, left_index=True, right_index=True)
    
    
    # Additional abs values features
    df['abs_speed_change'] = abs(df['speed_change'])
    df['abs_vertical_speed_change'] = abs(df['vertical_speed_change'])
    df['abs_direction_change'] = abs(df['direction_change'])
    df['abs_vertical_direction_change'] = abs(df['vertical_direction_change'])    
        
    # Add moving averages within selected window
    for column in df.columns[1:]:
        df[column + '_' + str(moving_average_window) + '_s_window_avg'] = df[column].rolling(window=moving_average_window, center=window_center).mean()
        
        
    # Remove infinity values
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    
    # Fill NA values    
    df.fillna(method='ffill', axis=0, inplace=True)
    df.fillna(method='bfill', axis=0, inplace=True)
        
    return df

In [3]:
window = 3

filename = './data/train/Location.parquet'
df = pd.read_parquet(filename)
df = prepare_additional_location_features(df, window, window_center=True)
df.to_parquet('./data/train/features_denys.parquet')

filename = './data/validate/Location.parquet'
df = pd.read_parquet(filename)
df = prepare_additional_location_features(df, window, window_center=True)
df.to_parquet('./data/validate/features_denys.parquet')

filename = './data/test/Location.parquet'
df = pd.read_parquet(filename)
df = prepare_additional_location_features(df, window, window_center=True)
df.to_parquet('./data/test/features_denys.parquet')

## Analysis

In [10]:
pd.set_option('display.max_columns', None)

In [4]:
# Join labels for analysis
features_file = './data/train/features_denys.parquet'
features = pd.read_parquet(features_file)

label_file = './data/train/Label.parquet'
label = pd.read_parquet(label_file)

features = features.merge(label)

In [11]:
features

Unnamed: 0,epoch_time,accuracy,Latitude,Longitude,Altitude,accuracy_change,Latitude_change,Longitude_change,Altitude_change,distance,speed,vertical_speed,direction,vertical_direction,speed_change,vertical_speed_change,direction_change,vertical_direction_change,abs_speed_change,abs_vertical_speed_change,abs_direction_change,abs_vertical_direction_change,accuracy_3_s_window_avg,Latitude_3_s_window_avg,Longitude_3_s_window_avg,Altitude_3_s_window_avg,accuracy_change_3_s_window_avg,Latitude_change_3_s_window_avg,Longitude_change_3_s_window_avg,Altitude_change_3_s_window_avg,distance_3_s_window_avg,speed_3_s_window_avg,vertical_speed_3_s_window_avg,direction_3_s_window_avg,vertical_direction_3_s_window_avg,speed_change_3_s_window_avg,vertical_speed_change_3_s_window_avg,direction_change_3_s_window_avg,vertical_direction_change_3_s_window_avg,abs_speed_change_3_s_window_avg,abs_vertical_speed_change_3_s_window_avg,abs_direction_change_3_s_window_avg,abs_vertical_direction_change_3_s_window_avg,label
0,1490431658000,64.0,50.844494,-0.132922,97.664610,-16.0,-0.000003,-1.914368e-06,30.603290,0.000003,1.684158e-09,0.015302,0.690754,9.085632e+06,1.034176e-08,-0.007958,-0.034234,-8.474991e+06,1.034176e-08,0.007958,0.034234,8.474991e+06,53.333333,50.844489,-0.132926,120.514645,-17.333333,-0.000008,-0.000003,12.257147,0.000009,8.622387e-09,0.007157,0.435508,3.199864e+06,8.209498e-08,-0.004581,-0.466973,-3.026450e+06,8.209498e-08,0.006403,0.466973,3.095093e+06,4
1,1490431660000,48.0,50.844491,-0.132924,128.267900,-16.0,-0.000003,-1.914368e-06,30.603290,0.000003,1.684158e-09,0.015302,0.690754,9.085632e+06,1.034176e-08,-0.007958,-0.034234,-8.474991e+06,1.034176e-08,0.007958,0.034234,8.474991e+06,53.333333,50.844489,-0.132926,120.514645,-17.333333,-0.000008,-0.000003,12.257147,0.000009,8.622387e-09,0.007157,0.435508,3.199864e+06,8.209498e-08,-0.004581,-0.466973,-3.026450e+06,8.209498e-08,0.006403,0.466973,3.095093e+06,4
2,1490431661000,48.0,50.844481,-0.132931,135.611425,0.0,-0.000010,-6.600000e-06,7.343525,0.000012,1.202592e-08,0.007344,0.656520,6.106413e+05,1.034176e-08,-0.007958,-0.034234,-8.474991e+06,1.034176e-08,0.007958,0.034234,8.474991e+06,36.000000,50.844481,-0.132928,132.771792,-17.333333,-0.000008,-0.000003,12.257147,0.000009,8.622387e-09,0.007157,0.435508,3.199864e+06,8.209498e-08,-0.004581,-0.466973,-3.026450e+06,8.209498e-08,0.006403,0.466973,3.095093e+06,4
3,1490431662000,12.0,50.844469,-0.132930,134.436050,-36.0,-0.000012,4.950000e-07,-1.175375,0.000012,1.215708e-08,-0.001175,-0.040751,-9.668233e+04,1.311593e-10,-0.008519,-0.697271,-7.073237e+05,1.311593e-10,0.008519,0.697271,7.073237e+05,30.666667,50.844406,-0.132883,135.347114,-5.333333,-0.000075,0.000046,2.575322,0.000091,9.071737e-08,0.002575,-0.031465,1.734138e+05,8.209498e-08,-0.004581,-0.466973,-3.026450e+06,8.209498e-08,0.006403,0.466973,3.095093e+06,4
4,1490431663000,32.0,50.844267,-0.132787,135.993866,20.0,-0.000202,1.435770e-04,1.557816,0.000248,2.479691e-07,0.001558,-0.710166,6.282297e+03,2.358120e-07,0.002733,-0.669415,1.029646e+05,2.358120e-07,0.002733,0.669415,1.029646e+05,18.666667,50.844322,-0.132831,136.387955,-12.000000,-0.000084,0.000052,1.040841,0.000100,9.952526e-08,0.001041,-0.352815,-6.378591e+03,8.807890e-09,-0.001534,-0.321350,-1.797924e+05,1.484876e-07,0.004145,0.589774,2.917568e+05,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660848,1499267848000,8.0,50.845158,-0.133353,126.626823,2.0,0.000062,-2.111700e-05,0.058807,0.000065,6.546536e-08,0.000059,-0.340784,8.982976e+02,4.429717e-09,0.000009,-0.035748,8.942901e+01,4.429717e-09,0.000009,0.035748,8.942901e+01,7.333333,50.845159,-0.133353,126.639748,0.666667,0.000062,-0.000020,0.068586,0.000065,6.485760e-08,0.000069,-0.316013,1.046884e+03,3.145635e-09,0.000018,0.006384,2.227126e+02,3.145635e-09,0.000018,0.030216,2.227126e+02,5
660849,1499267849000,8.0,50.845223,-0.133373,126.724403,0.0,0.000065,-1.969300e-05,0.097580,0.000068,6.807180e-08,0.000098,-0.302221,1.433486e+03,2.606446e-09,0.000039,0.038563,5.351881e+02,2.606446e-09,0.000039,0.038563,5.351881e+02,8.000000,50.845224,-0.133374,126.731956,0.666667,0.000065,-0.000021,0.092209,0.000068,6.808858e-08,0.000092,-0.331685,1.343931e+03,3.230976e-09,0.000024,-0.015671,2.970471e+02,3.230976e-09,0.000024,0.041380,2.970471e+02,5
660850,1499267850000,8.0,50.845290,-0.133397,126.844643,0.0,0.000067,-2.348700e-05,0.120239,0.000071,7.072857e-08,0.000120,-0.352050,1.700010e+03,2.656767e-09,0.000023,-0.049829,2.665241e+02,2.656767e-09,0.000023,0.049829,2.665241e+02,8.000000,50.845289,-0.133397,126.846092,0.000000,0.000066,-0.000022,0.114136,0.000069,6.926371e-08,0.000114,-0.342289,1.646454e+03,1.175135e-09,0.000022,-0.010604,3.025225e+02,2.333674e-09,0.000022,0.036313,3.025225e+02,5
660851,1499267851000,8.0,50.845354,-0.133421,126.969231,0.0,0.000065,-2.408800e-05,0.124588,0.000069,6.899076e-08,0.000125,-0.372597,1.805865e+03,-1.737809e-09,0.000004,-0.020547,1.058554e+02,1.737809e-09,0.000004,0.020547,1.058554e+02,8.000000,50.845357,-0.133422,126.936544,0.000000,0.000067,-0.000025,0.090452,0.000072,7.204302e-08,0.000090,-0.372797,1.284349e+03,2.779311e-09,-0.000024,-0.030508,-3.621042e+02,3.937850e-09,0.000042,0.030508,6.103572e+02,5


In [6]:
# Calculate average speed by labels
features.groupby('label')['speed'].median().apply(lambda x: x*1000000)

label
1    0.000783
2    0.015530
3    0.026351
4    0.047860
5    0.166307
6    0.038697
7    0.149028
8    0.110829
Name: speed, dtype: float64

In [7]:
features.groupby('label')['abs_vertical_speed_change'].median().apply(lambda x: x*1000)

label
1    0.005229
2    0.018939
3    0.023653
4    0.026245
5    0.043777
6    0.019604
7    0.020630
8    0.016068
Name: abs_vertical_speed_change, dtype: float64

In [8]:
features.groupby('label')['abs_speed_change'].median().apply(lambda x: x*1000000)

label
1    0.000182
2    0.001914
3    0.004344
4    0.005304
5    0.004501
6    0.003151
7    0.004261
8    0.003928
Name: abs_speed_change, dtype: float64

In [9]:
features.groupby('label')['abs_speed_change_3_s'].median().apply(lambda x: x*1000000)

KeyError: 'Column not found: abs_speed_change_3_s'

In [None]:
features.groupby('label')['abs_direction_change'].median()

In [None]:
features.groupby('label')['abs_vertical_direction_change'].median().apply(lambda x: x/1000)