In [1]:
from core.TrajectoryDF import NumPandasTraj as NumTrajDF
from features.spatial_features import SpatialFeatures as spatial
from features.helper_functions import Helpers as Helpers
from utilities.conversions import Conversions as con
import pandas as pd

In [2]:
%%time
# Reading the geolife dataset and converting to NumPandasTraj.
geolife = pd.read_csv('./data/geolife_sample.csv')
geolife = NumTrajDF(geolife,'lat','lon','datetime','id')

# Reading the gulls dataset and converting to NumPandasTraj.
gulls = pd.read_csv('./data/gulls.csv')
gulls = NumTrajDF(gulls,
                 latitude='location-lat',
                 longitude='location-long',
                 datetime='timestamp',
                 traj_id='tag-local-identifier',
                 rest_of_columns=[])

# Reading the atlantic dataset, cleaning it up and then converting
# it to NumPandasTraj.
atlantic = pd.read_csv('./data/atlantic.csv')
atlantic = con.convert_directions_to_degree_lat_lon(atlantic, 'Latitude',"Longitude")
def convert_to_datetime(row):
        this_date = '{}-{}-{}'.format(str(row['Date'])[0:4], str(row['Date'])[4:6], str(row['Date'])[6:])
        this_time = '{:02d}:{:02d}:00'.format(int(row['Time']/100), int(str(row['Time'])[-2:]))
        return '{} {}'.format(this_date, this_time)
atlantic['DateTime'] = atlantic.apply(convert_to_datetime, axis=1)
atlantic = NumTrajDF(atlantic,
                         latitude='Latitude',
                         longitude='Longitude',
                         datetime='DateTime',
                         traj_id='ID',
                         rest_of_columns=[])
atlantic.head()

CPU times: user 10.1 s, sys: 143 ms, total: 10.3 s
Wall time: 10.3 s


Unnamed: 0_level_0,Unnamed: 1_level_0,Name,Date,Time,Event,Status,lat,lon,Maximum Wind,Minimum Pressure,Low Wind NE,...,Low Wind SW,Low Wind NW,Moderate Wind NE,Moderate Wind SE,Moderate Wind SW,Moderate Wind NW,High Wind NE,High Wind SE,High Wind SW,High Wind NW
DateTime,traj_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1851-06-25 00:00:00,AL011851,UNNAMED,18510625,0,,HU,28.0,-94.8,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1851-06-25 06:00:00,AL011851,UNNAMED,18510625,600,,HU,28.0,-95.4,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1851-06-25 12:00:00,AL011851,UNNAMED,18510625,1200,,HU,28.0,-96.0,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1851-06-25 18:00:00,AL011851,UNNAMED,18510625,1800,,HU,28.1,-96.5,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1851-06-25 21:00:00,AL011851,UNNAMED,18510625,2100,L,HU,28.2,-96.8,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999


In [3]:
%%time

# Now, lets get the bounding boxes of all the dataframes.
print(spatial.get_bounding_box(geolife))
print(spatial.get_bounding_box(gulls))
print(spatial.get_bounding_box(atlantic))

(22.147577, 113.54884299999999, 41.132062, 121.156224)
(-62.71617, 4.02517, 65.2325, 103.19317)
(7.2, -359.1, 81.0, 63.0)
CPU times: user 12 ms, sys: 235 µs, total: 12.2 ms
Wall time: 10.4 ms


In [4]:
%%time

# Now, lets get the start times of all the dataframes without
# giving a traj_id.
print(spatial.get_start_location(geolife))
print(spatial.get_start_location(gulls))
print(spatial.get_start_location(atlantic))

(39.984224, 116.319402)
(61.27033, 24.144000000000002)
(28.0, -94.8)
CPU times: user 43.1 ms, sys: 0 ns, total: 43.1 ms
Wall time: 41 ms


In [5]:
%%time

# Now lets check the start times of all dataframes with a traj_id.
print(spatial.get_start_location(geolife, '1'))
print(spatial.get_start_location(gulls, "91732"))
print(spatial.get_start_location(atlantic, 'AL011851'))

(39.984224, 116.319402)
(61.24783000000001, 24.586170000000003)
(28.0, -94.8)
CPU times: user 96.5 ms, sys: 36 µs, total: 96.6 ms
Wall time: 95.1 ms


In [6]:
%%time

# Now, lets get the start times of all the dataframes without
# giving a traj_id.
print(spatial.get_end_location(geolife))
print(spatial.get_end_location(gulls))
print(spatial.get_end_location(atlantic))

(39.999978000000006, 116.32746000000002)
(56.566, 21.19633)
(40.7, -45.4)
CPU times: user 43.3 ms, sys: 226 µs, total: 43.5 ms
Wall time: 41.7 ms


In [7]:
%%time

# Now lets check the start times of all dataframes with a traj_id.
print(spatial.get_end_location(geolife, '1'))
print(spatial.get_end_location(gulls, "91732"))
print(spatial.get_end_location(atlantic, 'AL011851'))

(39.977878999999994, 116.326628)
(31.09783, 29.812170000000002)
(31.0, -100.2)
CPU times: user 83.8 ms, sys: 3.71 ms, total: 87.5 ms
Wall time: 85.4 ms


In [8]:
%%time

# Now, lets calculate the distance between 2 consecutive points for
# each dataset one by one.
# First, we will check the distances between 2 consecutive points
# in the geolife dataset.
geolife = spatial.create_distance_between_consecutive_column(geolife,
                                                             True)
geolife.head(10)

CPU times: user 219 ms, sys: 28.5 ms, total: 247 ms
Wall time: 615 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,Distance_prev_to_curr
DateTime,traj_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2008-10-23 16:53:05,1,39.984094,116.319236,0.0
2008-10-23 16:53:06,1,39.984198,116.319322,0.01369
2008-10-23 05:53:11,1,39.984224,116.319402,0.007404
2008-10-23 05:53:16,1,39.984211,116.319389,0.001821
2008-10-23 05:53:21,1,39.984217,116.319422,0.00289
2008-10-23 05:53:23,1,39.98471,116.319865,0.066556
2008-10-23 05:53:28,1,39.984674,116.31981,0.006163
2008-10-23 05:53:33,1,39.984623,116.319773,0.006488
2008-10-23 05:53:38,1,39.984606,116.319732,0.003972
2008-10-23 05:53:43,1,39.984555,116.319728,0.005681


In [9]:
# %%time
# # Here, we check the distance between 2 consecutive points
# # in the seagulls dataset.
gulls = spatial.create_distance_between_consecutive_column(gulls,
                                                           True)
gulls.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,event-id,visible,lon,lat,sensor-type,individual-taxon-canonical-name,individual-local-identifier,study-name,Distance_prev_to_curr
DateTime,traj_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2009-05-27 14:00:00,91732,1082620685,True,24.58617,61.24783,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,0.0
2009-05-27 20:00:00,91732,1082620686,True,24.58217,61.23267,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,1.699244
2009-05-28 05:00:00,91732,1082620687,True,24.53133,61.18833,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,5.63212
2009-05-28 08:00:00,91732,1082620688,True,24.582,61.23283,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,5.643315
2009-05-28 14:00:00,91732,1082620689,True,24.5825,61.23267,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,0.032131


In [10]:
# %%time
# Here, we check the distance between 2 consecutive points
# in the atlantic dataset.
atlantic = spatial.create_distance_between_consecutive_column(atlantic,
                                                              True)
atlantic.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Name,Date,Time,Event,Status,lat,lon,Maximum Wind,Minimum Pressure,Low Wind NE,...,Low Wind NW,Moderate Wind NE,Moderate Wind SE,Moderate Wind SW,Moderate Wind NW,High Wind NE,High Wind SE,High Wind SW,High Wind NW,Distance_prev_to_curr
DateTime,traj_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1851-06-25 00:00:00,AL011851,UNNAMED,18510625,0,,HU,28.0,-94.8,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,0.0
1851-06-25 06:00:00,AL011851,UNNAMED,18510625,600,,HU,28.0,-95.4,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,58.907516
1851-06-25 12:00:00,AL011851,UNNAMED,18510625,1200,,HU,28.0,-96.0,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,58.907516
1851-06-25 18:00:00,AL011851,UNNAMED,18510625,1800,,HU,28.1,-96.5,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,50.310977
1851-06-25 21:00:00,AL011851,UNNAMED,18510625,2100,L,HU,28.2,-96.8,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,31.44437


In [11]:
%%time

# Now, lets calculate the distance from start to current point for
# each dataset one by one.
# Here, we check the distance between start to all points
# in the geolife dataset.

geolife = spatial.create_distance_from_start_column(geolife,
                                                    True)
geolife.head()

CPU times: user 222 ms, sys: 58.9 ms, total: 281 ms
Wall time: 648 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,Distance_prev_to_curr,Distance_start_to_curr
DateTime,traj_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2008-10-23 16:53:05,1,39.984094,116.319236,0.0,0.0
2008-10-23 16:53:06,1,39.984198,116.319322,0.01369,0.01369
2008-10-23 05:53:11,1,39.984224,116.319402,0.007404,0.020223
2008-10-23 05:53:16,1,39.984211,116.319389,0.001821,0.018417
2008-10-23 05:53:21,1,39.984217,116.319422,0.00289,0.020933


In [12]:
%%time
# Here, we check the distance between start to all points
# in the seagulls dataset.
gulls = spatial.create_distance_from_start_column(gulls,
                                                  True)
gulls.head()

CPU times: user 213 ms, sys: 53.1 ms, total: 266 ms
Wall time: 635 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,event-id,visible,lon,lat,sensor-type,individual-taxon-canonical-name,individual-local-identifier,study-name,Distance_prev_to_curr,Distance_start_to_curr
DateTime,traj_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2009-05-27 14:00:00,91732,1082620685,True,24.58617,61.24783,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,0.0,0.0
2009-05-27 20:00:00,91732,1082620686,True,24.58217,61.23267,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,1.699244,1.699244
2009-05-28 05:00:00,91732,1082620687,True,24.53133,61.18833,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,5.63212,7.238296
2009-05-28 08:00:00,91732,1082620688,True,24.582,61.23283,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,5.643315,1.682778
2009-05-28 14:00:00,91732,1082620689,True,24.5825,61.23267,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,0.032131,1.697111


In [13]:
%%time

# Here, we check the distance between start to all points
# in the atlantic dataset.
atlantic = spatial.create_distance_from_start_column(atlantic,
                                                     True)
atlantic.head()

CPU times: user 220 ms, sys: 92.5 ms, total: 312 ms
Wall time: 602 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,Name,Date,Time,Event,Status,lat,lon,Maximum Wind,Minimum Pressure,Low Wind NE,...,Moderate Wind NE,Moderate Wind SE,Moderate Wind SW,Moderate Wind NW,High Wind NE,High Wind SE,High Wind SW,High Wind NW,Distance_prev_to_curr,Distance_start_to_curr
DateTime,traj_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1851-06-25 00:00:00,AL011851,UNNAMED,18510625,0,,HU,28.0,-94.8,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,0.0,0.0
1851-06-25 06:00:00,AL011851,UNNAMED,18510625,600,,HU,28.0,-95.4,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,58.907516,58.907516
1851-06-25 12:00:00,AL011851,UNNAMED,18510625,1200,,HU,28.0,-96.0,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,58.907516,117.814677
1851-06-25 18:00:00,AL011851,UNNAMED,18510625,1800,,HU,28.1,-96.5,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,50.310977,167.196062
1851-06-25 21:00:00,AL011851,UNNAMED,18510625,2100,L,HU,28.2,-96.8,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,31.44437,197.430193


In [14]:
%%time

# Here, we are calculating the distance travelled in traj_id 1
# on the date 2008-10-23.
spatial.get_distance_by_date_and_traj_id(geolife, '2008-10-23', '1')

CPU times: user 220 ms, sys: 4.33 ms, total: 224 ms
Wall time: 223 ms


14.74827854559045

In [15]:
%%time

# Now, lets check the distance of all the points
# in the geolife dataset from the coordinates (0, 0).
geolife = spatial.create_distance_from_given_point_column(geolife,
                                                          (0, 0),
                                                          True)
geolife.head()

CPU times: user 296 ms, sys: 72 ms, total: 368 ms
Wall time: 819 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,Distance_prev_to_curr,Distance_start_to_curr,"Distance_to_(0, 0)"
DateTime,traj_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2008-10-23 16:53:05,1,39.984094,116.319236,0.0,0.0,12215.866851
2008-10-23 16:53:06,1,39.984198,116.319322,0.01369,0.01369,12215.870331
2008-10-23 05:53:11,1,39.984224,116.319402,0.007404,0.020223,12215.875951
2008-10-23 05:53:16,1,39.984211,116.319389,0.001821,0.018417,12215.875333
2008-10-23 05:53:21,1,39.984217,116.319422,0.00289,0.020933,12215.877811


In [16]:
%%time

# Now, lets check if all the points in the geolife dataset
# are within 12000 KMs of the coordinates (0, 0).
geolife = spatial.create_point_within_range_column(geolife,
                                                   (0,0),
                                                   15000,
                                                   True)
geolife.head()

CPU times: user 309 ms, sys: 72.2 ms, total: 382 ms
Wall time: 895 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,Distance_prev_to_curr,Distance_start_to_curr,"Distance_to_(0, 0)","Within_15000_km_from_(0, 0)"
DateTime,traj_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2008-10-23 16:53:05,1,39.984094,116.319236,0.0,0.0,12215.866851,True
2008-10-23 16:53:06,1,39.984198,116.319322,0.01369,0.01369,12215.870331,True
2008-10-23 05:53:11,1,39.984224,116.319402,0.007404,0.020223,12215.875951,True
2008-10-23 05:53:16,1,39.984211,116.319389,0.001821,0.018417,12215.875333,True
2008-10-23 05:53:21,1,39.984217,116.319422,0.00289,0.020933,12215.877811,True


In [17]:
%%time

# Now, lets calculate the speed of the object from the start
# point of the trajectory to the current point.

geolife = spatial.create_speed_from_prev_column(geolife, True)
print(geolife['Speed_from_prev'].isnull().sum(axis=0))
geolife.head(10)

0              NaN
1         0.000278
2        13.001389
3         0.001389
4         0.001389
           ...    
74996     0.001389
74997     0.001389
74998     0.001389
74999     0.000833
75000     0.000556
Name: DateTime, Length: 75001, dtype: float64
0             NaN
1        0.000000
2        0.000000
3        0.000000
4        0.000000
           ...   
74996    0.001389
74997    0.001389
74998    0.001389
74999    0.001389
75000    0.001389
Name: DateTime, Length: 75001, dtype: float64
0             NaN
1        0.001389
2        0.000000
3        0.001389
4        0.000000
           ...   
67656    0.001389
67657    0.001389
67658    0.001389
67659    0.001389
67660    0.005556
Name: DateTime, Length: 67661, dtype: float64
142662
CPU times: user 300 ms, sys: 101 ms, total: 400 ms
Wall time: 421 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,Distance_prev_to_curr,Distance_start_to_curr,"Distance_to_(0, 0)","Within_15000_km_from_(0, 0)",Speed_from_prev
DateTime,traj_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2008-10-23 16:53:05,1,39.984094,116.319236,0.0,0.0,12215.866851,True,
2008-10-23 16:53:06,1,39.984198,116.319322,0.01369,0.01369,12215.870331,True,49.284551
2008-10-23 05:53:11,1,39.984224,116.319402,0.007404,0.020223,12215.875951,True,0.000569
2008-10-23 05:53:16,1,39.984211,116.319389,0.001821,0.018417,12215.875333,True,1.31118
2008-10-23 05:53:21,1,39.984217,116.319422,0.00289,0.020933,12215.877811,True,2.080563
2008-10-23 05:53:23,1,39.98471,116.319865,0.066556,0.086969,12215.897175,True,119.800794
2008-10-23 05:53:28,1,39.984674,116.31981,0.006163,0.080938,12215.893922,True,4.437351
2008-10-23 05:53:33,1,39.984623,116.319773,0.006488,0.074521,12215.892635,True,4.671522
2008-10-23 05:53:38,1,39.984606,116.319732,0.003972,0.070902,12215.889879,True,2.85973
2008-10-23 05:53:43,1,39.984555,116.319728,0.005681,0.066218,12215.891272,True,4.090444


In [18]:
atlantic = spatial.create_speed_from_prev_column(atlantic, True)

0        NaN
1        6.0
2        6.0
3        6.0
4        3.0
        ... 
49100    6.0
49101    6.0
49102    6.0
49103    6.0
49104    6.0
Name: DateTime, Length: 49105, dtype: float64
