In [1]:
import numpy as np
import pandas as pd
import os
import datetime
import time
import matplotlib.pyplot as plt

os.chdir('../')

In [34]:
def prepare_additional_location_features(df, moving_average_window, window_center=True):  
        
    # Add time rounded to seconds
    df['epoch_time'] = df['epoch_time'].round(-3)
    
    # Group values by rounded time
    df = df.groupby(['epoch_time'], as_index=False).mean().drop(['Ignore1', 'Ignore2'], axis=1)
    
    # Calculate difference to get distance and speed
    df_diff = df.diff() \
                .rename(columns={'epoch_time': 'epoch_time_change', 
                                 'accuracy': 'accuracy_change', 
                                 'Latitude': 'Latitude_change', 
                                 'Longitude': 'Longitude_change', 
                                 'Altitude': 'Altitude_change'})
    
    # Additional features
    df_diff['distance'] = (df_diff['Latitude_change'].pow(2) + df_diff['Longitude_change'].pow(2)).pow(0.5)
    df_diff['speed'] = df_diff['distance']/df_diff['epoch_time_change']
    df_diff['vertical_speed'] = df_diff['Altitude_change']/df_diff['epoch_time_change']
    df_diff['direction'] = df_diff['Longitude_change']/df_diff['Latitude_change']
    df_diff['vertical_direction'] = df_diff['Altitude_change']/df_diff['distance']
    
    df_diff.drop('epoch_time_change', axis=1, inplace=True) 

    
    # Second diff to get change of the new features
    df_diff_2 = df_diff[['speed', 'vertical_speed', 'direction', 'vertical_direction']] \
                    .diff() \
                    .rename(columns={'speed': 'speed_change', 
                                     'vertical_speed': 'vertical_speed_change', 
                                     'direction': 'direction_change', 
                                     'vertical_direction': 'vertical_direction_change'})
        
    
    # Merge new features back to the main dataframe
    df = df.merge(df_diff, left_index=True, right_index=True, how='left') \
           .merge(df_diff_2, left_index=True, right_index=True, how='left')
    
    
    # Additional abs values features
    df['abs_speed_change'] = abs(df['speed_change'])
    df['abs_vertical_speed_change'] = abs(df['vertical_speed_change'])
    df['abs_direction_change'] = abs(df['direction_change'])
    df['abs_vertical_direction_change'] = abs(df['vertical_direction_change'])    
        
    # Add moving averages within selected window
    for column in df.columns[1:]:
        df[column + '_' + str(moving_average_window) + '_s_window_avg'] = df[column].rolling(window=moving_average_window, center=window_center).mean()
        
        
    # Remove infinity values
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    
    # Fill NA values for change columns with 0
    columns = list(df.columns)
    change_columns = [column for column in columns if '_change' in column]            
    df[change_columns].fillna(value=0)    
    
    # Fill NA for the rest columns
    df.fillna(method='ffill', axis=0, inplace=True)
    df.fillna(method='bfill', axis=0, inplace=True)
        
    return df

In [35]:
window = 3

filename = './data/train/Location.parquet'
df = pd.read_parquet(filename)
df = prepare_additional_location_features(df, window, window_center=True)
df.to_parquet('./data/train/features_denys.parquet', index=False)

filename = './data/validate/Location.parquet'
df = pd.read_parquet(filename)
df = prepare_additional_location_features(df, window, window_center=True)
df.to_parquet('./data/validate/features_denys.parquet', index=False)

filename = './data/test/Location.parquet'
df = pd.read_parquet(filename)
df = prepare_additional_location_features(df, window, window_center=True)
df.to_parquet('./data/test/features_denys.parquet', index=False)

## Analysis

In [4]:
pd.set_option('display.max_columns', None)

In [41]:
# Join labels for analysis
features_file = './data/validate/features_denys.parquet'
features = pd.read_parquet(features_file)

label_file = './data/validate/Label.parquet'
label = pd.read_parquet(label_file)

features = label.merge(features, how='outer')

# features.to_csv('./data/location_analysis.csv')

In [42]:
features

Unnamed: 0,epoch_time,label,accuracy,Latitude,Longitude,Altitude,accuracy_change,Latitude_change,Longitude_change,Altitude_change,distance,speed,vertical_speed,direction,vertical_direction,speed_change,vertical_speed_change,direction_change,vertical_direction_change,abs_speed_change,abs_vertical_speed_change,abs_direction_change,abs_vertical_direction_change,accuracy_3_s_window_avg,Latitude_3_s_window_avg,Longitude_3_s_window_avg,Altitude_3_s_window_avg,accuracy_change_3_s_window_avg,Latitude_change_3_s_window_avg,Longitude_change_3_s_window_avg,Altitude_change_3_s_window_avg,distance_3_s_window_avg,speed_3_s_window_avg,vertical_speed_3_s_window_avg,direction_3_s_window_avg,vertical_direction_3_s_window_avg,speed_change_3_s_window_avg,vertical_speed_change_3_s_window_avg,direction_change_3_s_window_avg,vertical_direction_change_3_s_window_avg,abs_speed_change_3_s_window_avg,abs_vertical_speed_change_3_s_window_avg,abs_direction_change_3_s_window_avg,abs_vertical_direction_change_3_s_window_avg
0,1497427492000,3.0,8.0,50.826732,-0.119872,133.989176,0.0,9.995000e-06,1.065600e-05,0.054710,1.460994e-05,1.460994e-08,0.000055,1.066133,3744.726343,-9.698427e-10,0.000007,5.067450e-01,651.893577,9.698427e-10,0.000007,5.067450e-01,651.893577,7.333333,50.826730,-0.119870,133.992721,-0.666667,9.918667e-06,1.144033e-05,0.056081,1.579702e-05,1.579702e-08,0.000056,1.410270,3.545492e+03,6.946288e-10,0.000004,0.734806,83.027463,1.341191e-09,7.546562e-06,0.734806,3.876960e+02
1,1497427493000,3.0,6.0,50.826738,-0.119856,134.054522,-2.0,6.164000e-06,1.605900e-05,0.065347,1.720135e-05,1.720135e-08,0.000065,2.605289,3798.918001,2.591408e-09,0.000011,1.539156e+00,54.191659,2.591408e-09,0.000011,1.539156e+00,54.191659,6.666667,50.826738,-0.119855,134.071434,-0.666667,8.054667e-06,1.445567e-05,0.078713,1.676249e-05,1.676249e-08,0.000079,1.917207,4.608814e+03,9.654647e-10,0.000023,0.506937,1063.321492,1.612027e-09,2.263214e-05,0.856997,1.063321e+03
2,1497427494000,3.0,6.0,50.826746,-0.119839,134.170604,0.0,8.005000e-06,1.665200e-05,0.116082,1.847618e-05,1.847618e-08,0.000116,2.080200,6282.797240,1.274829e-09,0.000051,-5.250889e-01,2483.879239,1.274829e-09,0.000051,5.250889e-01,2483.879239,6.000000,50.826746,-0.119839,134.174209,-0.666667,7.682333e-06,1.656200e-05,0.102775,1.827799e-05,1.827799e-08,0.000103,2.199173,5.568636e+03,1.515503e-09,0.000024,0.281966,959.821831,1.515503e-09,2.406192e-05,0.744138,9.598218e+02
3,1497427495000,3.0,6.0,50.826755,-0.119822,134.297500,0.0,8.878000e-06,1.697500e-05,0.126896,1.915645e-05,1.915645e-08,0.000127,1.912030,6624.191836,6.802709e-10,0.000011,-1.681701e-01,341.394596,6.802709e-10,0.000011,1.681701e-01,341.394596,6.000000,50.826754,-0.119822,134.278397,0.000000,7.918000e-06,1.663733e-05,0.104188,1.843593e-05,1.843593e-08,0.000104,2.120779,5.614623e+03,1.579428e-10,0.000001,-0.078394,45.987275,1.145457e-09,3.962014e-05,0.383779,1.837529e+03
4,1497427496000,3.0,6.0,50.826761,-0.119806,134.367085,0.0,6.871000e-06,1.628500e-05,0.069585,1.767518e-05,1.767518e-08,0.000070,2.370106,3936.879827,-1.481272e-09,-0.000057,4.580765e-01,-2687.312009,1.481272e-09,0.000057,4.580765e-01,2687.312009,6.000000,50.826759,-0.119807,134.365120,0.000000,5.365333e-06,1.577467e-05,0.086723,1.696663e-05,1.696663e-08,0.000087,14.937465,5.029406e+03,-1.469299e-09,-0.000017,12.816687,-585.217127,1.922813e-09,2.467358e-05,12.928800,1.206324e+03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160712,7497460776000,,16.0,50.874213,0.012996,63.847630,0.0,-1.010000e-07,5.800000e-08,0.000000,1.164689e-07,1.164689e-10,0.000000,-0.574257,0.000000,-1.164689e-10,0.000000,4.040804e-08,0.000000,1.164689e-10,0.000000,4.040804e-08,0.000000,16.000000,50.874213,0.012996,63.847630,0.000000,-1.180000e-07,6.766667e-08,0.000000,1.360251e-07,1.360251e-10,0.000000,-0.572381,4.850638e-12,-1.354467e-10,0.000000,0.002352,0.000014,1.354467e-10,2.882171e-18,0.002352,1.479445e-10
160713,7497460777000,,16.0,50.874213,0.012996,63.847630,0.0,-5.100000e-08,2.900000e-08,0.000000,5.866856e-08,5.866856e-11,0.000000,-0.568627,0.000000,-5.780032e-11,0.000000,5.629955e-03,0.000000,5.780032e-11,0.000000,5.629955e-03,0.000000,16.000000,50.874213,0.012996,63.847630,0.000000,-5.900000e-08,3.400000e-08,0.000000,6.809740e-08,6.809740e-11,0.000000,-0.580962,4.850638e-12,-6.792767e-11,0.000000,-0.008581,0.000014,6.792767e-11,2.882171e-18,0.012334,1.479445e-10
160714,7497460778000,,16.0,50.874213,0.012996,63.847630,0.0,-2.500000e-08,1.500000e-08,0.000000,2.915476e-08,2.915476e-11,0.000000,-0.600000,0.000000,-2.951380e-11,0.000000,-3.137259e-02,0.000000,2.951380e-11,0.000000,3.137259e-02,0.000000,16.000000,50.874213,0.012996,63.847630,0.000000,-1.230000e-07,-7.593333e-07,0.000000,8.094121e-07,8.094121e-10,0.000000,2.252096,4.850638e-12,7.413147e-10,0.000000,2.833057,0.000014,7.995241e-10,2.882171e-18,2.853972,1.479445e-10
160715,7497460779000,,16.0,50.874212,0.012994,63.847630,0.0,-2.930000e-07,-2.322000e-06,0.000000,2.340413e-06,2.340413e-09,0.000000,7.924915,0.000000,2.311258e-09,0.000000,8.524915e+00,0.000000,2.311258e-09,0.000000,8.524915e+00,0.000000,16.000000,50.874212,0.012995,63.847630,0.000000,-1.550000e-07,-1.156333e-06,0.000000,1.180276e-06,1.180276e-09,0.000000,5.076559,4.850638e-12,3.708642e-10,0.000000,2.824463,0.000014,1.169975e-09,2.882171e-18,2.858813,1.479445e-10


In [None]:
features.groupby('label')['speed'].median().apply(lambda x: x*1000)

In [None]:
features.groupby('label')['abs_vertical_speed_change'].median().apply(lambda x: x*1000)

In [None]:
features.groupby('label')['abs_speed_change'].median().apply(lambda x: x*1000000)

In [None]:
features.groupby('label')['abs_speed_change_3_s'].median().apply(lambda x: x*1000000)

In [None]:
features.groupby('label')['abs_direction_change'].median()

In [None]:
features.groupby('label')['abs_vertical_direction_change'].median().apply(lambda x: x/1000)