In [105]:
from datetime import timedelta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from geopy.distance import vincenty

In [201]:
def distance_delta(row):
    lat = row.latitude
    long = row.longitude
    lat_1 = row.latitude_shift_1
    long_1 = row.longitude_shift_1
    
    if any(map(np.isnan, [lat, long, lat_1, long_1])):
        return np.nan
    return vincenty((lat, long), (lat_1, long_1)).feet

In [208]:
def is_walk(row):
    return row.timestamp_delta > timedelta(minutes=5) and row.distance_delta > max(row.horizontalAccuracy, row.verticalAccuracy)

In [213]:
date_columns = ['timestamp']
df = pd.read_csv("./gps_dataset.csv", sep=";", parse_dates=date_columns)
df.sort_values('timestamp')
df['week_day'] = df.timestamp.dt.weekday_name
df['timestamp_delta'] = df.timestamp - df.timestamp.shift(1)
df['latitude_shift_1'] = df.latitude.shift(1)
df['longitude_shift_1'] = df.longitude.shift(1)
df['distance_delta'] = df.apply(distance_delta, axis=1)
df['is_new_walk_by_time'] = df.timestamp_delta > timedelta(minutes=5)
df['is_new_walk_by_distance'] = df.distance_delta > df[["horizontalAccuracy", "B"]].max(axis=1)
df['is_walk'] = df.apply(is_walk, axis=1)
df

TypeError: 'method' object cannot be interpreted as an integer

To do:
- filter distance delta by horizonal / verical accuracy threshold. Is the distane between these points meaningful enough to suggest that someone has actually moved?
- ask what course, speed, and horizontalAccuracy	verticalAccuracy do / are all about

In [211]:
df[df.is_walk]

Unnamed: 0,timestamp,latitude,longitude,altitude,horizontalAccuracy,verticalAccuracy,speed,course,week_day,timestamp_delta,latitude_shift_1,longitude_shift_1,distance_delta,is_new_walk_by_time,is_new_walk_by_distance,is_walk
61,2018-03-13 23:50:12,40.736822,-73.98677,28.89757,65.0,65.0,-3.6,-1.0,Tuesday,02:29:28,40.702374,-73.984167,12571.244457,True,True,True
219,2018-03-14 12:59:28,40.737035,-73.987102,32.516083,30.0,30.0,0.0,-1.0,Wednesday,01:07:44,40.736935,-73.986922,61.771465,True,True,True
367,2018-03-14 21:59:52,40.737699,-73.989982,29.768972,65.0,65.0,-3.6,-1.0,Wednesday,08:38:07,40.735126,-73.990477,947.413808,True,True,True


In [203]:
df[df.is_new_walk_by_distance]

Unnamed: 0,timestamp,latitude,longitude,altitude,horizontalAccuracy,verticalAccuracy,speed,course,week_day,timestamp_delta,latitude_shift_1,longitude_shift_1,distance_delta,is_new_walk_by_time,is_new_walk_by_distance
1,2018-03-13 21:15:20,40.702438,-73.984151,28.514435,50.000000,50.000000,0.000000,251.718750,Tuesday,00:00:19,40.702452,-73.984135,6.759995,False,True
2,2018-03-13 21:15:20,40.702425,-73.984172,29.354584,30.000000,30.000000,1.332000,207.421875,Tuesday,00:00:00,40.702438,-73.984151,7.505748,False,True
3,2018-03-13 21:15:37,40.702373,-73.984281,19.380890,25.000000,25.000000,5.256000,278.789062,Tuesday,00:00:17,40.702425,-73.984172,35.669630,False,True
4,2018-03-13 21:15:44,40.702355,-73.984412,20.954529,25.000000,25.000000,5.220000,270.703125,Tuesday,00:00:07,40.702373,-73.984281,36.909769,False,True
5,2018-03-13 21:15:51,40.702327,-73.984551,18.081909,25.000000,25.000000,9.324000,251.015625,Tuesday,00:00:07,40.702355,-73.984412,39.867911,False,True
6,2018-03-13 21:15:56,40.702300,-73.984664,14.060150,25.000000,25.000000,7.092000,272.812500,Tuesday,00:00:05,40.702327,-73.984551,32.839574,False,True
7,2018-03-13 21:16:03,40.702337,-73.984773,15.728363,25.000000,25.000000,4.572000,261.562500,Tuesday,00:00:07,40.702300,-73.984664,33.092605,False,True
8,2018-03-13 21:16:12,40.702385,-73.984881,14.312653,25.000000,25.000000,5.904000,285.117188,Tuesday,00:00:09,40.702337,-73.984773,34.677743,False,True
9,2018-03-13 21:16:19,40.702404,-73.984992,15.765320,10.000000,10.000000,5.076000,284.414062,Tuesday,00:00:07,40.702385,-73.984881,31.545936,False,True
10,2018-03-13 21:16:26,40.702448,-73.985105,14.511047,25.000000,25.000000,5.616000,277.031250,Tuesday,00:00:07,40.702404,-73.984992,35.194434,False,True


In [204]:
df[df.is_new_walk_by_time]

Unnamed: 0,timestamp,latitude,longitude,altitude,horizontalAccuracy,verticalAccuracy,speed,course,week_day,timestamp_delta,latitude_shift_1,longitude_shift_1,distance_delta,is_new_walk_by_time,is_new_walk_by_distance
61,2018-03-13 23:50:12,40.736822,-73.98677,28.89757,65.0,65.0,-3.6,-1.0,Tuesday,02:29:28,40.702374,-73.984167,12571.244457,True,True
158,2018-03-14 11:39:44,40.736866,-73.986781,29.113699,65.0,65.0,-3.6,-1.0,Wednesday,11:33:30,40.736848,-73.986697,24.184859,True,True
219,2018-03-14 12:59:28,40.737035,-73.987102,32.516083,30.0,30.0,0.0,-1.0,Wednesday,01:07:44,40.736935,-73.986922,61.771465,True,True
367,2018-03-14 21:59:52,40.737699,-73.989982,29.768972,65.0,65.0,-3.6,-1.0,Wednesday,08:38:07,40.735126,-73.990477,947.413808,True,True
459,2018-03-14 22:31:55,40.738591,-73.993763,28.24526,65.0,65.0,-3.6,-1.0,Wednesday,00:05:52,40.738621,-73.993879,33.953354,True,True
463,2018-03-14 23:20:57,40.738574,-73.993766,28.024649,65.0,65.0,-3.6,-1.0,Wednesday,00:48:34,40.738623,-73.993868,33.431879,True,True
465,2018-03-14 23:27:36,40.738694,-73.994015,27.731878,65.0,65.0,-3.6,-1.0,Wednesday,00:06:30,40.738621,-73.993869,48.418425,True,True
508,2018-03-15 11:37:20,40.736814,-73.986764,28.921131,65.0,65.0,-3.6,-1.0,Thursday,12:01:16,40.736845,-73.986755,11.566452,True,True


In [200]:
df[df.is_new_walk_by_distance | df.is_new_walk_by_time]

Unnamed: 0,timestamp,latitude,longitude,altitude,horizontalAccuracy,verticalAccuracy,speed,course,week_day,timestamp_delta,latitude_shift_1,longitude_shift_1,distance_delta,is_new_walk_by_time,is_new_walk_by_distance
61,2018-03-13 23:50:12,40.736822,-73.98677,28.89757,65.0,65.0,-3.6,-1.0,Tuesday,02:29:28,40.702374,-73.984167,2.380918,True,True
158,2018-03-14 11:39:44,40.736866,-73.986781,29.113699,65.0,65.0,-3.6,-1.0,Wednesday,11:33:30,40.736848,-73.986697,0.00458,True,False
219,2018-03-14 12:59:28,40.737035,-73.987102,32.516083,30.0,30.0,0.0,-1.0,Wednesday,01:07:44,40.736935,-73.986922,0.011699,True,False
367,2018-03-14 21:59:52,40.737699,-73.989982,29.768972,65.0,65.0,-3.6,-1.0,Wednesday,08:38:07,40.735126,-73.990477,0.179434,True,True
459,2018-03-14 22:31:55,40.738591,-73.993763,28.24526,65.0,65.0,-3.6,-1.0,Wednesday,00:05:52,40.738621,-73.993879,0.006431,True,False
463,2018-03-14 23:20:57,40.738574,-73.993766,28.024649,65.0,65.0,-3.6,-1.0,Wednesday,00:48:34,40.738623,-73.993868,0.006332,True,False
465,2018-03-14 23:27:36,40.738694,-73.994015,27.731878,65.0,65.0,-3.6,-1.0,Wednesday,00:06:30,40.738621,-73.993869,0.00917,True,False
508,2018-03-15 11:37:20,40.736814,-73.986764,28.921131,65.0,65.0,-3.6,-1.0,Thursday,12:01:16,40.736845,-73.986755,0.002191,True,False
