In [2]:
import numpy as np
import pandas as pd
import os
import datetime
import time
import matplotlib.pyplot as plt

os.chdir('../')

In [8]:
def prepare_additional_location_features(df, moving_average_window):  
        
    # Add time rounded to seconds
    df['epoch_time'] = ((df['epoch_time']/1000).round(0)*1000).astype('int64')
    
    # Group values by rounded time
    df = df.groupby(['epoch_time'], as_index=False).mean().drop(['Ignore1', 'Ignore2'], axis=1)
    
    # Calculate difference to get distance and speed
    df_diff = df.diff()
    
    # Additional features
    df_diff['distance'] = (df_diff['Latitude'].pow(2) + df_diff['Longitude'].pow(2)).pow(0.5)
    df_diff['speed'] = df_diff['distance']/df_diff['epoch_time']
    df_diff['vertical_speed'] = df_diff['Altitude']/df_diff['epoch_time']
    df_diff['direction'] = df_diff['Longitude']/df_diff['Latitude']
    df_diff['vertical_direction'] = df_diff['Altitude']/df_diff['distance']
    
    # Remove infinity values
    df_diff.replace([np.inf, -np.inf], np.nan, inplace=True)
    
    # Second diff to get change of the features
    df_diff_2 = df_diff.diff().rename(columns={'speed': 'speed_change', 
                                               'vertical_speed': 'vertical_speed_change', 
                                               'direction': 'direction_change', 
                                               'vertical_direction': 'vertical_direction_change'})
    
    # Remove infinity values
    df_diff_2.replace([np.inf, -np.inf], np.nan, inplace=True)
    
    # Merge new features back to the main dataframe
    df = df.merge(df_diff[['distance','speed', 'vertical_speed', 'direction', 'vertical_direction']], left_index=True, right_index=True) \
           .merge(df_diff_2[['speed_change', 'vertical_speed_change', 'direction_change', 'vertical_direction_change']], left_index=True, right_index=True)
    
    
    # Additional abs values features
    df['abs_speed_change'] = abs(df['speed_change'])
    df['abs_vertical_speed_change'] = abs(df['vertical_speed_change'])
    df['abs_direction_change'] = abs(df['direction_change'])
    df['abs_vertical_direction_change'] = abs(df['vertical_direction_change'])    
        
    # Add moving averages within selected window
    for column in df.columns[1:]:
        df[column + '_' + str(moving_average_window) + '_s'] = df[column].rolling(window=moving_average_window, center=True).mean()
        
    return df

In [9]:
window = 3

filename = './data/train/Location.parquet'
df = pd.read_parquet(filename)
df = prepare_additional_location_features(df, window)
df.to_parquet('./data/train/features_denys.parquet')

filename = './data/validate/Location.parquet'
df = pd.read_parquet(filename)
df = prepare_additional_location_features(df, window)
df.to_parquet('./data/validate/features_denys.parquet')

filename = './data/test/Location.parquet'
df = pd.read_parquet(filename)
df = prepare_additional_location_features(df, window)
df.to_parquet('./data/test/features_denys.parquet')