In [1]:
import movingpandas as mpd
import geopandas as gp
import pandas as pd
from datetime import timedelta
from shapely import LineString, Point
import skmob
from skmob.preprocessing import detection
import numpy as np
from tqdm import tqdm

In [2]:
# read geolife pickle file
print("Reading geolife pickle file...")
geolife_raw_gdf = gp.GeoDataFrame(pd.read_pickle('../data/geolife/geolife_raw.pkl')).to_crs(epsg=4326)
print("Done.")


Reading geolife pickle file...
Done.


In [90]:
def splitTrajectories(geolife_raw_gdf, max_diameter=100, min_duration_minutes=15, min_length=200, to_csv=False):
    """This function splits the trajectories into smaller segments using the movingpandas library. The split is done based on the stop points.


    Args:
        geolife_raw_gdf (_type_): Geolife raw data as geodataframe.
        max_diameter (int, optional): See movingpandas documentation. Defaults to 100.
        min_duration_minutes (int, optional): See movingpandas documentation. Defaults to 15.
        min_length (int, optional): See movingpandas documentation. Defaults to 200.

    Returns:
        _type_: Geodataframe with split trajectories and user id. 
    """
    traj_collection = mpd.TrajectoryCollection(geolife_raw_gdf, traj_id_col='traj_id', obj_id_col='user', t="time")
    print("Trajectory collection created.")
    split_trajs = []
    if to_csv:
        
        for traj in tqdm(traj_collection.trajectories):
            # split trajectory
            split = mpd.StopSplitter(traj).split(max_diameter=max_diameter, min_duration=timedelta(minutes=min_duration_minutes), min_length=min_length)

            try:
                split_traj = split.to_traj_gdf()
            except ValueError:
                split_traj = traj.to_traj_gdf()
                continue

            # add user id to each split trajectory
            split_traj['user_id'] = traj.obj_id

            path = "../data/geolife/split_trajectories/" + str(traj.id) + ".csv"
            # save to csv
            split_traj.to_csv(path, index=False)
        return print("All split trajectories are saved to csv in the data folder.")
    else:
        for traj in tqdm(traj_collection.trajectories):
            # split trajectory
            split = mpd.StopSplitter(traj).split(max_diameter=max_diameter, min_duration=timedelta(minutes=min_duration_minutes), min_length=min_length)
            try:
                split_traj = split.to_traj_gdf()
            except ValueError:
                split_traj = traj.to_traj_gdf()
                continue

            # add user id to each split trajectory
            split_traj['user_id'] = traj.obj_id
            
            # add split trajectories to list after converting to geodataframe
            split_trajs.append(split_traj)

            #print(f"Next trajectory split and appended to list: {index}/{len(traj_collection.trajectories)}")

        # concat all split trajectories
        split_trajs = pd.concat(split_trajs)

        # add user id to each split trajectory
        #split_trajs['user_id'] = split_trajs['traj_id'].str.split('_').str[0]
        print("All split trajectories and concatenated.")
        print("Done.")
        return split_trajs

In [92]:
print("Splitting trajectories...")
splitTrajectories(geolife_raw_gdf, to_csv=True)


Splitting trajectories...
Trajectory collection created.


100%|██████████| 17784/17784 [16:45:29<00:00,  3.39s/it]    

All split trajectories are saved to csv in the data folder.



