In [1]:
import movingpandas as mpd
import geopandas as gp
import pandas as pd
from datetime import timedelta
from shapely import LineString, Point
import skmob
from skmob.preprocessing import detection
import numpy as np
from tqdm import tqdm

In [None]:
# read geolife pickle file
print("Reading geolife pickle file...")
geolife_raw_gdf = gp.GeoDataFrame(pd.read_pickle('../data/geolife/geolife_raw.pkl')).to_crs(epsg=4326)
print("Done.")


In [28]:
def splitTrajectories(geolife_raw_gdf, max_diameter=100, min_duration_minutes=15, min_length=200, to_csv=False):
    """This function splits the trajectories into smaller segments using the movingpandas library. The split is done based on the stop points.


    Args:
        geolife_raw_gdf (_type_): Geolife raw data as geodataframe.
        max_diameter (int, optional): See movingpandas documentation. Defaults to 100.
        min_duration_minutes (int, optional): See movingpandas documentation. Defaults to 15.
        min_length (int, optional): See movingpandas documentation. Defaults to 200.

    Returns:
        _type_: Geodataframe with split trajectories and user id. 
    """
    traj_collection = mpd.TrajectoryCollection(geolife_raw_gdf, 'user', t='time')
    print("Trajectory collection created.")
    split_traj = []
    if to_csv:
        
        for index, traj in enumerate(traj_collection.trajectories):
            # split trajectory
            split = mpd.StopSplitter(traj).split(max_diameter=max_diameter, min_duration=timedelta(minutes=min_duration_minutes), min_length=min_length)

            split_traj = split.to_traj_gdf()

            # add user id to each split trajectory
            split_traj['user_id'] = split_traj['traj_id'].str.split('_').str[0]

            # save to csv
            split_traj.to_csv(f'data/geolife/split_trajectories/split_geolife_{split_traj["user_id"].unique()[0]}.csv', index=False)
            print(f"Next trajectory split and saved to csv.: {index}/{len(traj_collection.trajectories)}")
        return print("All split trajectories are saved to csv in the data folder.")
    else:
        for traj in tqdm(traj_collection.trajectories):
            # split trajectory
            split = mpd.StopSplitter(traj).split(max_diameter=max_diameter, min_duration=timedelta(minutes=min_duration_minutes), min_length=min_length)

            # add split trajectories to list after converting to geodataframe
            split_traj.append(split.to_traj_gdf())

            print(f"Next trajectory split and appended to list: {index}/{len(traj_collection.trajectories)}")

        # concat all split trajectories
        split_traj = pd.concat(split_traj)

        # add user id to each split trajectory
        split_traj['user_id'] = split_traj['traj_id'].str.split('_').str[0]
        print("All split trajectories and concatenated.")
        print("Done.")
        return split_traj

In [29]:
print("Splitting trajectories...")
splitTrajectories(geolife_raw_gdf, to_csv=True)
print("Done.")

Splitting trajectories...
Trajectory collection created.


1it [10:35, 635.88s/it]

Next trajectory split and saved to csv.: 0/182


2it [10:37, 262.86s/it]

Next trajectory split and saved to csv.: 1/182


3it [20:43, 419.37s/it]

Next trajectory split and saved to csv.: 2/182


4it [23:27, 318.78s/it]

Next trajectory split and saved to csv.: 3/182


5it [28:50, 320.05s/it]

Next trajectory split and saved to csv.: 4/182


6it [51:38, 676.47s/it]

Next trajectory split and saved to csv.: 5/182


7it [1:00:17, 625.13s/it]

Next trajectory split and saved to csv.: 6/182


8it [1:00:47, 435.60s/it]

Next trajectory split and saved to csv.: 7/182


9it [1:05:07, 380.60s/it]

Next trajectory split and saved to csv.: 8/182


10it [1:12:00, 390.51s/it]

Next trajectory split and saved to csv.: 9/182


11it [1:38:48, 763.18s/it]

Next trajectory split and saved to csv.: 10/182


12it [1:40:22, 559.77s/it]

Next trajectory split and saved to csv.: 11/182


13it [1:40:55, 399.97s/it]

Next trajectory split and saved to csv.: 12/182


14it [1:43:28, 325.61s/it]

Next trajectory split and saved to csv.: 13/182


15it [1:49:39, 339.28s/it]

Next trajectory split and saved to csv.: 14/182


16it [1:53:10, 300.45s/it]

Next trajectory split and saved to csv.: 15/182


17it [2:07:11, 463.20s/it]

Next trajectory split and saved to csv.: 16/182


18it [2:12:38, 422.19s/it]

Next trajectory split and saved to csv.: 17/182


19it [2:19:32, 419.75s/it]

Next trajectory split and saved to csv.: 18/182


20it [2:24:30, 383.26s/it]

Next trajectory split and saved to csv.: 19/182


21it [2:25:27, 285.24s/it]

Next trajectory split and saved to csv.: 20/182


22it [3:37:55, 1504.55s/it]

Next trajectory split and saved to csv.: 21/182


23it [3:46:18, 1204.14s/it]

Next trajectory split and saved to csv.: 22/182


24it [3:49:32, 900.81s/it] 

Next trajectory split and saved to csv.: 23/182


25it [3:51:05, 658.42s/it]

Next trajectory split and saved to csv.: 24/182


26it [3:51:18, 464.89s/it]

Next trajectory split and saved to csv.: 25/182


27it [3:51:47, 334.25s/it]

Next trajectory split and saved to csv.: 26/182


28it [3:51:48, 234.11s/it]

Next trajectory split and saved to csv.: 27/182


29it [3:52:00, 167.56s/it]

Next trajectory split and saved to csv.: 28/182


30it [3:52:04, 118.54s/it]

Next trajectory split and saved to csv.: 29/182


31it [4:13:25, 467.19s/it]

Next trajectory split and saved to csv.: 30/182


32it [4:13:40, 331.60s/it]

Next trajectory split and saved to csv.: 31/182


33it [4:23:05, 401.42s/it]

Next trajectory split and saved to csv.: 32/182
Next trajectory split and saved to csv.: 33/182


35it [4:23:12, 198.96s/it]

Next trajectory split and saved to csv.: 34/182


36it [4:24:08, 155.94s/it]

Next trajectory split and saved to csv.: 35/182


37it [4:24:15, 111.33s/it]

Next trajectory split and saved to csv.: 36/182


38it [4:24:17, 78.43s/it] 

Next trajectory split and saved to csv.: 37/182


39it [4:24:44, 63.06s/it]

Next trajectory split and saved to csv.: 38/182


40it [4:25:10, 51.91s/it]

Next trajectory split and saved to csv.: 39/182
Next trajectory split and saved to csv.: 40/182


42it [4:25:12, 28.37s/it]

Next trajectory split and saved to csv.: 41/182


43it [4:34:57, 166.56s/it]

Next trajectory split and saved to csv.: 42/182


44it [4:35:11, 126.61s/it]

Next trajectory split and saved to csv.: 43/182


45it [4:37:08, 123.86s/it]

Next trajectory split and saved to csv.: 44/182


46it [4:41:23, 160.58s/it]

Next trajectory split and saved to csv.: 45/182


47it [4:43:13, 146.29s/it]

Next trajectory split and saved to csv.: 46/182


48it [5:07:05, 518.74s/it]

Next trajectory split and saved to csv.: 47/182


49it [5:43:07, 999.98s/it]

Next trajectory split and saved to csv.: 48/182


50it [5:44:07, 722.64s/it]

Next trajectory split and saved to csv.: 49/182


51it [5:46:06, 543.88s/it]

Next trajectory split and saved to csv.: 50/182


53it [5:52:42, 350.68s/it]

Next trajectory split and saved to csv.: 51/182
Next trajectory split and saved to csv.: 52/182


54it [5:53:44, 264.60s/it]

Next trajectory split and saved to csv.: 53/182


55it [6:07:25, 431.01s/it]

Next trajectory split and saved to csv.: 54/182


56it [6:08:22, 318.97s/it]

Next trajectory split and saved to csv.: 55/182


57it [6:08:23, 223.58s/it]

Next trajectory split and saved to csv.: 56/182


58it [6:08:27, 157.91s/it]

Next trajectory split and saved to csv.: 57/182


59it [6:11:38, 167.82s/it]

Next trajectory split and saved to csv.: 58/182


60it [6:11:54, 122.32s/it]

Next trajectory split and saved to csv.: 59/182


61it [6:11:55, 85.77s/it] 

Next trajectory split and saved to csv.: 60/182


62it [6:13:49, 94.41s/it]

Next trajectory split and saved to csv.: 61/182


63it [6:20:16, 182.05s/it]

Next trajectory split and saved to csv.: 62/182


64it [6:21:09, 143.39s/it]

Next trajectory split and saved to csv.: 63/182


65it [6:34:14, 335.84s/it]

Next trajectory split and saved to csv.: 64/182


66it [6:55:30, 617.94s/it]

Next trajectory split and saved to csv.: 65/182


67it [6:55:30, 432.67s/it]

Next trajectory split and saved to csv.: 66/182
Next trajectory split and saved to csv.: 67/182


69it [6:56:21, 244.70s/it]

Next trajectory split and saved to csv.: 68/182


71it [6:57:30, 148.48s/it]

Next trajectory split and saved to csv.: 69/182
Next trajectory split and saved to csv.: 70/182


72it [6:57:41, 111.15s/it]

Next trajectory split and saved to csv.: 71/182


73it [6:58:43, 97.26s/it] 

Next trajectory split and saved to csv.: 72/182


74it [6:59:00, 74.57s/it]

Next trajectory split and saved to csv.: 73/182


75it [6:59:02, 53.45s/it]

Next trajectory split and saved to csv.: 74/182


76it [7:01:18, 77.55s/it]

Next trajectory split and saved to csv.: 75/182


77it [7:08:24, 180.39s/it]

Next trajectory split and saved to csv.: 76/182


78it [7:08:25, 127.18s/it]

Next trajectory split and saved to csv.: 77/182


79it [7:08:25, 89.49s/it] 

Next trajectory split and saved to csv.: 78/182


80it [7:08:26, 63.01s/it]

Next trajectory split and saved to csv.: 79/182


81it [7:08:43, 49.20s/it]

Next trajectory split and saved to csv.: 80/182


82it [7:08:56, 38.36s/it]

Next trajectory split and saved to csv.: 81/182


83it [7:09:03, 29.06s/it]

Next trajectory split and saved to csv.: 82/182


84it [7:10:15, 41.84s/it]

Next trajectory split and saved to csv.: 83/182


85it [7:11:02, 43.32s/it]

Next trajectory split and saved to csv.: 84/182


86it [7:11:03, 30.67s/it]

Next trajectory split and saved to csv.: 85/182


88it [7:11:04, 15.36s/it]

Next trajectory split and saved to csv.: 86/182
Next trajectory split and saved to csv.: 87/182


89it [7:11:05, 10.83s/it]

Next trajectory split and saved to csv.: 88/182


90it [7:11:05,  7.74s/it]

Next trajectory split and saved to csv.: 89/182


91it [7:11:10,  6.94s/it]

Next trajectory split and saved to csv.: 90/182


92it [7:11:20,  7.86s/it]

Next trajectory split and saved to csv.: 91/182


93it [7:13:45, 49.03s/it]

Next trajectory split and saved to csv.: 92/182


94it [7:14:20, 44.72s/it]

Next trajectory split and saved to csv.: 93/182


95it [7:14:48, 39.65s/it]

Next trajectory split and saved to csv.: 94/182


96it [7:20:16, 126.13s/it]

Next trajectory split and saved to csv.: 95/182


98it [7:20:24, 63.55s/it] 

Next trajectory split and saved to csv.: 96/182
Next trajectory split and saved to csv.: 97/182


99it [7:20:25, 44.76s/it]

Next trajectory split and saved to csv.: 98/182


100it [7:22:37, 71.02s/it]

Next trajectory split and saved to csv.: 99/182


101it [7:22:57, 55.72s/it]

Next trajectory split and saved to csv.: 100/182


102it [7:23:03, 40.85s/it]

Next trajectory split and saved to csv.: 101/182


103it [7:26:01, 81.85s/it]

Next trajectory split and saved to csv.: 102/182


104it [7:26:11, 60.35s/it]

Next trajectory split and saved to csv.: 103/182


105it [7:36:15, 223.59s/it]

Next trajectory split and saved to csv.: 104/182


106it [7:39:09, 208.50s/it]

Next trajectory split and saved to csv.: 105/182


107it [7:55:01, 431.53s/it]

Next trajectory split and saved to csv.: 106/182


108it [7:55:33, 311.64s/it]

Next trajectory split and saved to csv.: 107/182


109it [8:26:37, 777.52s/it]

Next trajectory split and saved to csv.: 108/182


110it [8:27:37, 562.17s/it]

Next trajectory split and saved to csv.: 109/182


111it [8:29:23, 425.24s/it]

Next trajectory split and saved to csv.: 110/182


112it [8:32:40, 356.89s/it]

Next trajectory split and saved to csv.: 111/182


113it [8:33:38, 267.24s/it]

Next trajectory split and saved to csv.: 112/182


114it [8:33:57, 192.79s/it]

Next trajectory split and saved to csv.: 113/182


115it [8:34:00, 135.81s/it]

Next trajectory split and saved to csv.: 114/182


116it [8:41:01, 221.36s/it]

Next trajectory split and saved to csv.: 115/182


117it [8:41:02, 155.19s/it]

Next trajectory split and saved to csv.: 116/182


118it [8:41:25, 115.68s/it]

Next trajectory split and saved to csv.: 117/182


119it [8:41:27, 81.44s/it] 

Next trajectory split and saved to csv.: 118/182


120it [8:41:28, 57.26s/it]

Next trajectory split and saved to csv.: 119/182


121it [8:52:29, 238.56s/it]

Next trajectory split and saved to csv.: 120/182


122it [8:55:47, 226.23s/it]

Next trajectory split and saved to csv.: 121/182


123it [9:02:06, 272.10s/it]

Next trajectory split and saved to csv.: 122/182


124it [9:02:09, 191.50s/it]

Next trajectory split and saved to csv.: 123/182


125it [9:27:07, 583.27s/it]

Next trajectory split and saved to csv.: 124/182


127it [9:27:19, 288.32s/it]

Next trajectory split and saved to csv.: 125/182
Next trajectory split and saved to csv.: 126/182


128it [9:27:58, 213.51s/it]

Next trajectory split and saved to csv.: 127/182


129it [9:32:07, 224.14s/it]

Next trajectory split and saved to csv.: 128/182


130it [9:32:22, 161.42s/it]

Next trajectory split and saved to csv.: 129/182


131it [9:32:23, 113.49s/it]

Next trajectory split and saved to csv.: 130/182


132it [9:32:25, 80.01s/it] 

Next trajectory split and saved to csv.: 131/182


133it [9:32:41, 60.87s/it]

Next trajectory split and saved to csv.: 132/182


134it [11:05:46, 1718.13s/it]

Next trajectory split and saved to csv.: 133/182


135it [11:05:48, 1203.31s/it]

Next trajectory split and saved to csv.: 134/182


136it [11:07:17, 868.98s/it] 

Next trajectory split and saved to csv.: 135/182


137it [11:08:02, 621.77s/it]

Next trajectory split and saved to csv.: 136/182


138it [11:08:04, 435.71s/it]

Next trajectory split and saved to csv.: 137/182


139it [11:08:11, 306.99s/it]

Next trajectory split and saved to csv.: 138/182


140it [11:09:01, 230.08s/it]

Next trajectory split and saved to csv.: 139/182


141it [11:10:39, 190.38s/it]

Next trajectory split and saved to csv.: 140/182


143it [11:10:40, 93.45s/it] 

Next trajectory split and saved to csv.: 141/182
Next trajectory split and saved to csv.: 142/182


145it [11:56:03, 617.68s/it]

Next trajectory split and saved to csv.: 143/182
Next trajectory split and saved to csv.: 144/182


146it [11:59:05, 487.06s/it]

Next trajectory split and saved to csv.: 145/182


147it [11:59:07, 341.49s/it]

Next trajectory split and saved to csv.: 146/182


148it [12:28:18, 764.26s/it]

Next trajectory split and saved to csv.: 147/182


149it [12:44:33, 827.59s/it]

Next trajectory split and saved to csv.: 148/182


150it [12:46:59, 623.16s/it]

Next trajectory split and saved to csv.: 149/182


151it [12:47:13, 440.43s/it]

Next trajectory split and saved to csv.: 150/182
Next trajectory split and saved to csv.: 151/182


153it [12:49:04, 262.66s/it]

Next trajectory split and saved to csv.: 152/182


154it [12:49:05, 197.74s/it]

Next trajectory split and saved to csv.: 153/182


156it [12:49:09, 107.14s/it]

Next trajectory split and saved to csv.: 154/182
Next trajectory split and saved to csv.: 155/182


157it [12:49:09, 77.22s/it] 

Next trajectory split and saved to csv.: 156/182


158it [12:49:15, 56.80s/it]

Next trajectory split and saved to csv.: 157/182
Next trajectory split and saved to csv.: 158/182


160it [12:54:56, 107.95s/it]

Next trajectory split and saved to csv.: 159/182


161it [12:59:16, 144.91s/it]

Next trajectory split and saved to csv.: 160/182


162it [12:59:16, 288.62s/it]

Next trajectory split and saved to csv.: 161/182
All split trajectories are saved to csv in the data folder.





Done.
