In [1]:
from core.TrajectoryDF import NumPandasTraj as NumTrajDF
from features.spatial_features import SpatialFeatures as spatial
from utilities.conversions import Conversions as con
from utilities.helper_functions import Helpers as help
import pandas as pd

In [2]:
%%time
# Reading the geolife dataset and converting to NumPandasTraj.
geolife = pd.read_csv('./data/geolife_sample.csv')
geolife = NumTrajDF(geolife,'lat','lon','datetime','id')

# Reading the gulls dataset and converting to NumPandasTraj.
gulls = pd.read_csv('./data/gulls.csv')
gulls = NumTrajDF(gulls,
                 latitude='location-lat',
                 longitude='location-long',
                 datetime='timestamp',
                 traj_id='tag-local-identifier',
                 rest_of_columns=[])

# Reading the atlantic dataset, cleaning it up and then converting
# it to NumPandasTraj.
atlantic = pd.read_csv('./data/atlantic.csv')
atlantic = con.convert_directions_to_degree_lat_lon(atlantic, 'Latitude',"Longitude")
def convert_to_datetime(row):
        this_date = '{}-{}-{}'.format(str(row['Date'])[0:4], str(row['Date'])[4:6], str(row['Date'])[6:])
        this_time = '{:02d}:{:02d}:00'.format(int(row['Time']/100), int(str(row['Time'])[-2:]))
        return '{} {}'.format(this_date, this_time)
atlantic['DateTime'] = atlantic.apply(convert_to_datetime, axis=1)
atlantic = NumTrajDF(atlantic,
                         latitude='Latitude',
                         longitude='Longitude',
                         datetime='DateTime',
                         traj_id='ID',
                         rest_of_columns=[])
atlantic.head()

CPU times: user 7.88 s, sys: 77.3 ms, total: 7.96 s
Wall time: 7.96 s


Unnamed: 0_level_0,Unnamed: 1_level_0,Name,Date,Time,Event,Status,lat,lon,Maximum Wind,Minimum Pressure,Low Wind NE,...,Low Wind SW,Low Wind NW,Moderate Wind NE,Moderate Wind SE,Moderate Wind SW,Moderate Wind NW,High Wind NE,High Wind SE,High Wind SW,High Wind NW
DateTime,traj_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1851-06-25 00:00:00,AL011851,UNNAMED,18510625,0,,HU,28.0,-94.8,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1851-06-25 06:00:00,AL011851,UNNAMED,18510625,600,,HU,28.0,-95.4,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1851-06-25 12:00:00,AL011851,UNNAMED,18510625,1200,,HU,28.0,-96.0,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1851-06-25 18:00:00,AL011851,UNNAMED,18510625,1800,,HU,28.1,-96.5,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999
1851-06-25 21:00:00,AL011851,UNNAMED,18510625,2100,L,HU,28.2,-96.8,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,-999


In [3]:
%%time

# Now, lets get the bounding boxes of all the dataframes.
print(spatial.get_bounding_box(geolife))
print(spatial.get_bounding_box(gulls))
print(spatial.get_bounding_box(atlantic))

(22.147577, 113.54884299999999, 41.132062, 121.156224)
(-62.71617, 4.02517, 65.2325, 103.19317)
(7.2, -359.1, 81.0, 63.0)
CPU times: user 6.86 ms, sys: 0 ns, total: 6.86 ms
Wall time: 5.7 ms


In [4]:
%%time

# Now, lets get the start times of all the dataframes without
# giving a traj_id.
print(spatial.get_start_location(geolife))
print(spatial.get_start_location(gulls))
print(spatial.get_start_location(atlantic))

(39.984224, 116.319402)
(61.27033, 24.144000000000002)
(28.0, -94.8)
CPU times: user 44.5 ms, sys: 4.24 ms, total: 48.7 ms
Wall time: 46.4 ms


In [5]:
%%time

# Now lets check the start times of all dataframes with a traj_id.
print(spatial.get_start_location(geolife, '1'))
print(spatial.get_start_location(gulls, "91732"))
print(spatial.get_start_location(atlantic, 'AL011851'))

(39.984224, 116.319402)
(61.24783000000001, 24.586170000000003)
(28.0, -94.8)
CPU times: user 80.1 ms, sys: 0 ns, total: 80.1 ms
Wall time: 78.7 ms


In [6]:
%%time

# Now, lets get the start times of all the dataframes without
# giving a traj_id.
print(spatial.get_end_location(geolife))
print(spatial.get_end_location(gulls))
print(spatial.get_end_location(atlantic))

(39.999978000000006, 116.32746000000002)
(56.566, 21.19633)
(40.7, -45.4)
CPU times: user 36.7 ms, sys: 3.84 ms, total: 40.5 ms
Wall time: 38.2 ms


In [7]:
%%time

# Now lets check the start times of all dataframes with a traj_id.
print(spatial.get_end_location(geolife, '1'))
print(spatial.get_end_location(gulls, "91732"))
print(spatial.get_end_location(atlantic, 'AL011851'))

(39.977878999999994, 116.326628)
(31.09783, 29.812170000000002)
(31.0, -100.2)
CPU times: user 78.8 ms, sys: 4.15 ms, total: 83 ms
Wall time: 80.8 ms


In [8]:
%%time

# Now, lets calculate the distance between 2 consecutive points for
# each dataset one by one.

geolife = spatial.create_distance_between_consecutive_column(dataframe=geolife,
                                                             inplace=True,
                                                             metres=True)
geolife.head(10)

CPU times: user 323 ms, sys: 70.1 ms, total: 393 ms
Wall time: 540 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,Distance_prev_to_curr
DateTime,traj_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2008-10-23 16:53:05,1,39.984094,116.319236,0.0
2008-10-23 16:53:06,1,39.984198,116.319322,13.690153
2008-10-23 05:53:11,1,39.984224,116.319402,7.403788
2008-10-23 05:53:16,1,39.984211,116.319389,1.821083
2008-10-23 05:53:21,1,39.984217,116.319422,2.889671
2008-10-23 05:53:23,1,39.98471,116.319865,66.555997
2008-10-23 05:53:28,1,39.984674,116.31981,6.162987
2008-10-23 05:53:33,1,39.984623,116.319773,6.488225
2008-10-23 05:53:38,1,39.984606,116.319732,3.971848
2008-10-23 05:53:43,1,39.984555,116.319728,5.681172


In [9]:
%%time

gulls = spatial.create_distance_between_consecutive_column(dataframe=gulls,
                                                           inplace=True,
                                                           metres=True)
gulls.head()

CPU times: user 183 ms, sys: 51.9 ms, total: 235 ms
Wall time: 330 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,event-id,visible,lon,lat,sensor-type,individual-taxon-canonical-name,individual-local-identifier,study-name,Distance_prev_to_curr
DateTime,traj_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2009-05-27 14:00:00,91732,1082620685,True,24.58617,61.24783,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,0.0
2009-05-27 20:00:00,91732,1082620686,True,24.58217,61.23267,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,1699.244398
2009-05-28 05:00:00,91732,1082620687,True,24.53133,61.18833,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,5632.120064
2009-05-28 08:00:00,91732,1082620688,True,24.582,61.23283,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,5643.314949
2009-05-28 14:00:00,91732,1082620689,True,24.5825,61.23267,gps,Larus fuscus,91732A,Navigation experiments in lesser black-backed ...,32.131494


In [11]:
%%time

atlantic = spatial.create_distance_between_consecutive_column(dataframe=atlantic,
                                                              inplace=True,
                                                              metres=True)
atlantic.head()

CPU times: user 546 ms, sys: 161 ms, total: 707 ms
Wall time: 679 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,Name,Date,Time,Event,Status,lat,lon,Maximum Wind,Minimum Pressure,Low Wind NE,...,Low Wind NW,Moderate Wind NE,Moderate Wind SE,Moderate Wind SW,Moderate Wind NW,High Wind NE,High Wind SE,High Wind SW,High Wind NW,Distance_prev_to_curr
DateTime,traj_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1851-06-25 00:00:00,AL011851,UNNAMED,18510625,0,,HU,28.0,-94.8,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,0.0
1851-06-25 06:00:00,AL011851,UNNAMED,18510625,600,,HU,28.0,-95.4,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,58907.516366
1851-06-25 12:00:00,AL011851,UNNAMED,18510625,1200,,HU,28.0,-96.0,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,58907.516366
1851-06-25 18:00:00,AL011851,UNNAMED,18510625,1800,,HU,28.1,-96.5,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,50310.976622
1851-06-25 21:00:00,AL011851,UNNAMED,18510625,2100,L,HU,28.2,-96.8,80,-999,-999,...,-999,-999,-999,-999,-999,-999,-999,-999,-999,31444.370265
