In [1]:
from core.TrajectoryDF import NumPandasTraj as NumTrajDF
from features.temporal_features import TemporalFeatures as temporal
import pandas as pd
import utilities.constants as const
import numpy as np

In [2]:
%%time
# Now lets import the gulls.csv and convert it into our
# NumPandasTraj.

gulls = pd.read_csv('./data/gulls.csv')
gulls = NumTrajDF(gulls,
                 latitude='location-lat',
                 longitude='location-long',
                 datetime='timestamp',
                 traj_id='tag-local-identifier',
                 rest_of_columns=[])
gulls.head()

CPU times: user 270 ms, sys: 35.2 ms, total: 305 ms
Wall time: 304 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,event-id,visible,lon,lat,sensor-type,individual-taxon-canonical-name,individual-local-identifier,study-name
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
91732,2009-05-27 14:00:00,1082620685,True,24.58617,61.24783,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...
91732,2009-05-27 20:00:00,1082620686,True,24.58217,61.23267,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...
91732,2009-05-28 05:00:00,1082620687,True,24.53133,61.18833,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...
91732,2009-05-28 08:00:00,1082620688,True,24.582,61.23283,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...
91732,2009-05-28 14:00:00,1082620689,True,24.5825,61.23267,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...


In [3]:
%%time
# Now lets import the geolife_sample.csv and convert it into our
# NumPandasTraj.

pdf = pd.read_csv('./data/geolife_sample.csv')
dataframe = NumTrajDF(pdf,'lat','lon','datetime','id')
dataframe.head()

CPU times: user 470 ms, sys: 58.5 ms, total: 529 ms
Wall time: 527 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2008-10-23 16:53:05,39.984094,116.319236
1,2008-10-23 16:53:06,39.984198,116.319322
1,2008-10-23 05:53:11,39.984224,116.319402
1,2008-10-23 05:53:16,39.984211,116.319389
1,2008-10-23 05:53:21,39.984217,116.319422


In [4]:
%%time
# Here, we will calculate the date, time, day of the week and whether
# it is weekend or not in the same cell. Upon running, the library
# always executes within 3-4 seconds. Compared to dask version of this,
# only the time column creation took 7.5 seconds. So this version
# is more efficient in terms of code execution and memory usage.
#
# dataframe = temporal.create_time_column(dataframe)
# dataframe = temporal.create_time_of_day_column(dataframe)
# dataframe = temporal.create_date_column(dataframe)
# dataframe = temporal.create_day_of_week_column(dataframe)
# dataframe = temporal.create_weekend_indicator_column(dataframe)
# dataframe.head()
#

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.05 µs


In [5]:
# %%time
# # Here, we will calculate the date, time, day of the week and whether
# # it is weekend or not in the same cell. Upon running, the library
# # always executes within 2 seconds even though this has more number
# # of trajectories as compared to geolife_sample.csv.
#
# gulls = temporal.create_time_column(gulls)
# gulls = temporal.create_time_of_day_column(gulls)
# gulls = temporal.create_date_column(gulls)
# gulls = temporal.create_day_of_week_column(gulls)
# gulls = temporal.create_weekend_indicator_column(gulls)
# gulls.head()

In [6]:
%%time

# Now, let's calculate the time duration.
delta_one = temporal.get_traj_duration(dataframe)
delta_two = temporal.get_traj_duration(dataframe, traj_id='5')
delta_one

CPU times: user 127 ms, sys: 18 ms, total: 145 ms
Wall time: 203 ms


Unnamed: 0_level_0,Traj_Duration
traj_id,Unnamed: 1_level_1
1,52 days 18:38:07
5,146 days 01:34:07


In [7]:
%%time

# Now, lets get the starting time of all the trajectory IDs.
temporal.get_start_time(gulls)

CPU times: user 60.3 ms, sys: 21.8 ms, total: 82.1 ms
Wall time: 1.38 s


Unnamed: 0_level_0,DateTime
traj_id,Unnamed: 1_level_1
91732,2009-05-27 14:00:00
91733,2009-08-15 15:00:00
91734,2009-05-25 00:05:00
91735,2009-05-27 14:00:00
91737,2009-07-26 05:00:00
...,...
91920,2009-08-19 19:00:00
91921,2009-08-19 19:00:00
91924,2009-08-19 19:00:00
91929,2009-08-19 19:00:00


In [8]:
%%time

from features.helper_functions import Helpers as help

gl = temporal.create_date_column(dataframe)
print(type(gl))
gl.head()

<class 'core.TrajectoryDF.NumPandasTraj'>
CPU times: user 213 ms, sys: 14 ms, total: 227 ms
Wall time: 226 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,Date
traj_id,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2008-10-23 16:53:05,39.984094,116.319236,2008-10-23
1,2008-10-23 16:53:06,39.984198,116.319322,2008-10-23
1,2008-10-23 05:53:11,39.984224,116.319402,2008-10-23
1,2008-10-23 05:53:16,39.984211,116.319389,2008-10-23
1,2008-10-23 05:53:21,39.984217,116.319422,2008-10-23


In [9]:
import pymove as pm

pm_traj = pm.MoveDataFrame(pdf, 'lat', 'lon', 'datetime', 'id')

In [10]:
%%time

pm_traj.generate_date_features()
pm_traj.set_index(['id', 'date'], drop=True, inplace=True)
pm_traj.head()

CPU times: user 152 ms, sys: 10.3 ms, total: 163 ms
Wall time: 161 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,datetime
id,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,2008-10-23,39.984094,116.319236,2008-10-23 16:53:05
1,2008-10-23,39.984198,116.319322,2008-10-23 16:53:06
1,2008-10-23,39.984224,116.319402,2008-10-23 05:53:11
1,2008-10-23,39.984211,116.319389,2008-10-23 05:53:16
1,2008-10-23,39.984217,116.319422,2008-10-23 05:53:21


In [11]:
%%time


CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 5.48 µs
