In [1]:
import pandas as pd
from ptrail.core.TrajectoryDF import PTRAILDataFrame
from ptrail.preprocessing.filters import Filters as filters
from ptrail.features.kinematic_features import KinematicFeatures
from ptrail.features.temporal_features import TemporalFeatures
from ptrail.preprocessing.interpolation import Interpolation
from ptrail.utilities.conversions import Conversions as con
import ptrail.utilities.constants as const
import datetime as dt

# Reading the gulls dataset and converting to PTRAILDataFrame.
# Also, lets, print the first 5 points of the dataset to
# see how the dataframe looks.
gulls = pd.read_csv('./data/gulls.csv')
gulls = PTRAILDataFrame(gulls,
                        latitude='location-lat',
                        longitude='location-long',
                        datetime='timestamp',
                        traj_id='tag-local-identifier',
                        rest_of_columns=[])

In [2]:
pdf = pd.read_csv('./data/geolife_sample.csv')
geolife = PTRAILDataFrame(data_set=pdf,
                             latitude='lat',
                             longitude='lon',
                             datetime='datetime',
                             traj_id='id')


In [3]:
atlantic = pd.read_csv('./data/atlantic.csv')
atlantic = con.convert_directions_to_degree_lat_lon(atlantic, 'Latitude',"Longitude")
def convert_to_datetime(row):
        this_date = '{}-{}-{}'.format(str(row['Date'])[0:4], str(row['Date'])[4:6], str(row['Date'])[6:])
        this_time = '{:02d}:{:02d}:00'.format(int(row['Time']/100), int(str(row['Time'])[-2:]))
        return '{} {}'.format(this_date, this_time)
atlantic['DateTime'] = atlantic.apply(convert_to_datetime, axis=1)
atlantic = PTRAILDataFrame(atlantic,
                          latitude='Latitude',
                          longitude='Longitude',
                          datetime='DateTime',
                          traj_id='ID',
                          rest_of_columns=[])
atlantic = TemporalFeatures.create_date_column(atlantic)

In [4]:
pdf = pd.read_csv('./data/starkey.csv')
starkey = PTRAILDataFrame(data_set=pdf,
                          latitude='lat',
                          longitude='lon',
                          datetime='DateTime',
                          traj_id='Id')

In [5]:
t1 = gulls.reset_index().loc[gulls.reset_index()[const.TRAJECTORY_ID] == '91732'][[const.TRAJECTORY_ID, const.DateTime, const.LAT, const.LONG]]
t1 = PTRAILDataFrame(data_set=t1,
                     datetime='DateTime',
                     traj_id='traj_id',
                     latitude='lat',
                     longitude='lon')

In [6]:
def segment_traj_by_week(df):
    """
        Given a dataframe containing trajectory data, segment all
        the trajectories by each week.

        Parameters
        ----------
            df: PTRAILDataFrame
                The dataframe containing trajectory data.

        Returns
        -------
            pandas.core.dataframe.DataFrame:
                The dataframe containing segmented trajectories
                with a new column added called segment_id
    """
    # First, create the date column and get all the unique traj_ids
    # in the dataframe..
    df = TemporalFeatures.create_date_column(df)
    ids_ = list(df.traj_id.value_counts().keys())

    # Get the ideal number of IDs by which the dataframe is to be split.
    df_chunks = []
    for i in range(len(ids_)):
        small_df = df.reset_index().loc[df.reset_index()[const.TRAJECTORY_ID] == ids_[i]]
        df_chunks.append(small_df)

    # Now, iterate over the entire dataframe and then segment
    # the trajectories by 1 week each.
    results = []
    for i in range(len(ids_)):
        # Take the traj_df of a single Trajectory out from the
        # list of chunks and find their max and min timestamps.
        traj = df_chunks[i]
        t_max = traj.reset_index()[const.DateTime].max()
        t_min = traj.reset_index()[const.DateTime].min()

        # For iteration purposes, set t_1 to min and t_2 to
        # t_1 + 7 days.
        t_1 = t_min
        t_2 = t_1 + dt.timedelta(days=7)
        seg_id = 1

        # Now, segment the trajectories into smaller segments
        # wherein each segment contains the points of a span
        # of 7 days only.
        while t_2 < t_max:
            if t_2 < t_max:
                seg = filters.filter_by_date(traj,
                                             start_date=t_1.strftime('%Y-%m-%d'),
                                             end_date=t_2.strftime('%Y-%m-%d'))
                # Once filtered, assign the segment with a segment ID.
                seg['seg_id'] = seg_id

                # Increment the segment id, t_1 and t_2 values by
                # 1, 7 days and 7 days respectively to continue
                # the iteration.
                seg_id += 1
                t_1 += dt.timedelta(days=7)
                t_2 += dt.timedelta(days=7)
                results.append(seg.drop(columns=['index', 'level_0']))

            # If, t_2 is greater than the max time present in the
            # trajectory, then assign t_2 = max and proceed
            # further with segmentation.
            elif t_2 >= t_max:
                seg = filters.filter_by_date(traj,
                                             start_date=t_1.strftime('%Y-%m-%d'),
                                             end_date=t_max.strftime('%Y-%m-%d'))
                # Once filtered, assign the segment with a segment ID.
                seg['seg_id'] = seg_id

                # Increment the segment id, t_1 and t_2 values by
                # 1, 7 days and 7 days respectively to continue
                # the iteration.
                seg_id += 1
                t_1 += dt.timedelta(days=7)
                t_2 += dt.timedelta(days=7)
                results.append(seg.drop(columns=['index', 'level_0']))

    # Finally, concat the dataframes, set the index as
    # [traj_id, seg_id, DateTime].
    return pd.concat(results).reset_index().set_index(['traj_id', 'seg_id', 'DateTime'])

In [7]:
%%time

seg = segment_traj_by_week(starkey)

CPU times: user 3min 49s, sys: 408 ms, total: 3min 49s
Wall time: 3min 49s


In [15]:
print(len(starkey))

287136


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lat,lon,StarkeyTime,GMDate,GMTime,LocDate,LocTime,RadNum,Species,UTME,UTMN,Year,Grensunr,Grensuns,Obswt,Date
traj_id,seg_id,DateTime,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
9.10313e+42,1,1993-05-06 17:04:47,45.218440,-118.566257,168829487,01:04:47,19930507,19930506,17:04:47,67,E,377026,5008419,93,12:34:00,03:10:00,1.67,1993-05-06
9.10313e+42,1,1993-05-06 17:21:12,45.218985,-118.565890,168826332,01:21:12,19930507,19930506,17:21:12,67,E,377053,5008474,93,12:34:00,03:10:00,1.73,1993-05-06
9.10313e+42,1,1993-05-06 17:27:53,45.219536,-118.565141,168830873,01:27:53,19930507,19930506,17:27:53,67,E,377108,5008516,93,12:34:00,03:10:00,1.73,1993-05-06
9.10313e+42,1,1993-05-06 17:51:48,45.219260,-118.565516,168832308,01:51:48,19930507,19930506,17:51:48,67,E,377083,5008494,93,12:34:00,03:10:00,1.73,1993-05-06
9.10313e+42,1,1993-05-06 18:40:04,45.220128,-118.561336,168835204,02:40:04,19930507,19930506,18:40:04,67,E,377406,5008591,93,12:34:00,03:10:00,1.46,1993-05-06
9.10313e+42,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9.10313e+42,2,1993-05-20 10:07:23,45.247662,-118.542212,170014043,18:07:23,19930520,19930520,10:07:23,67,E,378975,5011622,93,12:19:00,03:25:00,1.34,1993-05-20
9.10313e+42,2,1993-05-20 10:43:48,45.247657,-118.542594,170016228,18:43:48,19930520,19930520,10:43:48,67,E,378959,5011619,93,12:19:00,03:25:00,1.34,1993-05-20
9.10313e+42,2,1993-05-20 11:19:51,45.247662,-118.542212,170018391,19:19:51,19930520,19930520,11:19:51,67,E,378983,5011625,93,12:19:00,03:25:00,1.34,1993-05-20
9.10313e+42,2,1993-05-20 11:56:00,45.248483,-118.541469,170020560,19:56:00,19930520,19930520,11:56:00,67,E,379020,5011695,93,12:19:00,03:25:00,1.46,1993-05-20


In [12]:
import numpy as np
seg.reset_index().loc[np.logical_and(seg.reset_index()['seg_id'] == 3, seg.reset_index()['traj_id'] == '910313E37')]

Unnamed: 0,traj_id,seg_id,DateTime,lat,lon,StarkeyTime,GMDate,GMTime,LocDate,LocTime,RadNum,Species,UTME,UTMN,Year,Grensunr,Grensuns,Obswt,Date
515,910313E37,3,1993-05-20 00:01:37,45.258491,-118.540211,169977697,08:01:37,19930520,19930520,00:01:37,67,E,379144,5012820,93,12:19:00,03:25:00,1.55,1993-05-20
516,910313E37,3,1993-05-20 00:43:33,45.257671,-118.540953,169980213,08:43:33,19930520,19930520,00:43:33,67,E,379094,5012720,93,12:19:00,03:25:00,1.50,1993-05-20
517,910313E37,3,1993-05-20 01:23:01,45.257661,-118.541718,169982581,09:23:01,19930520,19930520,01:23:01,67,E,379022,5012736,93,12:19:00,03:25:00,1.50,1993-05-20
518,910313E37,3,1993-05-20 03:31:47,45.254105,-118.545063,169990307,11:31:47,19930520,19930520,03:31:47,67,E,378766,5012345,93,12:19:00,03:25:00,1.62,1993-05-20
519,910313E37,3,1993-05-20 04:06:04,45.253316,-118.543512,169992364,12:06:04,19930520,19930520,04:06:04,67,E,378891,5012240,93,12:19:00,03:25:00,1.67,1993-05-20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
783,910313E37,3,1993-05-27 20:57:42,45.252044,-118.557623,170657862,04:57:42,19930528,19930527,20:57:42,67,E,377771,5012128,93,12:12:00,03:33:00,1.57,1993-05-27
784,910313E37,3,1993-05-27 21:33:04,45.252049,-118.557241,170659984,05:33:04,19930528,19930527,21:33:04,67,E,377807,5012127,93,12:12:00,03:33:00,1.57,1993-05-27
785,910313E37,3,1993-05-27 22:09:58,45.252605,-118.556109,170662198,06:09:58,19930528,19930527,22:09:58,67,E,377881,5012182,93,12:12:00,03:33:00,1.58,1993-05-27
786,910313E37,3,1993-05-27 22:48:02,45.252610,-118.555727,170664482,06:48:02,19930528,19930527,22:48:02,67,E,377930,5012188,93,12:12:00,03:33:00,1.58,1993-05-27


In [None]:
# stats = KinematicFeatures.generate_kinematic_stats(gulls)

In [None]:
# stats.head(16)