In [386]:
import movingpandas as mpd
import geopandas as gp
import pandas as pd
from datetime import timedelta
from shapely import LineString, Point
import skmob
from skmob.preprocessing import detection
import numpy as np

In [3]:
# read geolife pickle file
geolife_raw_gdf = gp.GeoDataFrame(pd.read_pickle('data/geolife/geolife_raw.pkl'))


In [423]:
traj_collection = mpd.TrajectoryCollection(geolife_raw_gdf, 'user', t='time')

In [424]:
my_traj = traj_collection.trajectories[1]
print(my_traj)

Trajectory 1 (2008-10-23 05:53:05 to 2008-12-15 00:31:18) | Size: 108607 | Length: 1018053.3m
Bounds: (116.145054, 39.900944, 116.443578, 40.076116)
LINESTRING (116.319236 39.984094, 116.319322 39.984198, 116.319402 39.984224, 116.319389 39.984211, 


In [425]:
split = mpd.StopSplitter(my_traj).split(max_diameter=100, min_duration=timedelta(minutes=15), min_length=200)
split

TrajectoryCollection with 108 trajectories

In [426]:

split.to_traj_gdf()


Unnamed: 0,traj_id,start_t,end_t,geometry,length,direction
0,1_2008-10-23 05:53:05,2008-10-23 05:53:05,2008-10-23 11:10:29,"LINESTRING (116.31924 39.98409, 116.31932 39.9...",11150.474980,341.716224
1,1_2008-10-23 23:44:06,2008-10-23 23:44:06,2008-10-24 00:23:18,"LINESTRING (116.30590 40.01412, 116.30579 40.0...",6389.233020,156.261271
2,1_2008-10-24 01:46:57,2008-10-24 01:46:57,2008-10-24 02:03:11,"LINESTRING (116.32688 39.97878, 116.32699 39.9...",2140.426179,279.035617
3,1_2008-10-24 02:29:29,2008-10-24 02:29:29,2008-10-24 03:27:05,"LINESTRING (116.30847 39.98105, 116.30864 39.9...",1166.429475,111.959781
4,1_2008-10-24 03:50:05,2008-10-24 03:50:05,2008-10-24 04:13:35,"LINESTRING (116.31410 39.97956, 116.31424 39.9...",2073.172740,99.278008
...,...,...,...,...,...,...
103,1_2008-12-13 12:04:01,2008-12-13 12:04:01,2008-12-13 12:36:48,"LINESTRING (116.34341 39.97828, 116.34344 39.9...",1415.976143,129.111053
104,1_2008-12-13 12:54:15,2008-12-13 12:54:15,2008-12-13 13:44:33,"LINESTRING (116.34469 39.97805, 116.34481 39.9...",8441.780588,324.995328
105,1_2008-12-13 14:01:47,2008-12-13 14:01:47,2008-12-13 14:17:31,"LINESTRING (116.31134 40.01429, 116.31125 40.0...",1397.923951,262.797678
106,1_2008-12-14 08:08:53,2008-12-14 08:08:53,2008-12-14 08:46:01,"LINESTRING (116.30599 40.01411, 116.30586 40.0...",6165.764823,155.864986


In [5]:
def getStopSegments(user_gdf, min_duration=timedelta(minutes=30), max_diameter=100):
    """This function takes in a gdf of a user and returns a gdf of the stop segments. The user column should only contain one user.

    Args:
        user_gdf (_type_): A GeoDataFrame with the stop segments of a single user.
    """

    assert user_gdf.user.nunique() == 1, "The user column should only contain one user."

    # Create a TrajectoryCollection from the GeoDataFrame
    user_collection = mpd.TrajectoryCollection(user_gdf, traj_id_col='user', t='time', crs='epsg:4326')

    # Detect and extract trajectory segments (trips) from stop point detection
    detector = mpd.TrajectoryStopDetector(user_collection)
    stop_segments = detector.get_stop_segments(min_duration=min_duration, max_diameter=max_diameter)

    # Convert the TrajectoryCollection to a GeoDataFrame
    stop_segments = stop_segments.to_traj_gdf()

    return stop_segments

    

In [372]:
def convertGdfWithPointlistToGdfWithLineString(gdf_with_pointgeometry, groupby_column='trip_id'):
    """This function takes in a gdf with point geometry and returns a gdf with linestring geometry.

    Args:
        gdf_with_pointgeometry (_type_): A GeoDataFrame with point geometry.
    """
    gdf_with_pointgeometry = gdf_with_pointgeometry.groupby(groupby_column).agg(list).reset_index()
    gdf_with_pointgeometry['trip_start'] = gdf_with_pointgeometry.time.apply(lambda x: min(x))
    gdf_with_pointgeometry['trip_end'] = gdf_with_pointgeometry.time.apply(lambda x: max(x))
    gdf_with_pointgeometry.label = gdf_with_pointgeometry.label.apply(lambda x: min(x))
    gdf_with_pointgeometry.user = gdf_with_pointgeometry.user.apply(lambda x: x[0])
    gdf_with_pointgeometry.geometry = gdf_with_pointgeometry.geometry.apply(lambda x: LineString(x) if len(x) > 1 else x[0]) # if there is only one point, the linestring is the point
    gdf_with_pointgeometry = gdf_with_pointgeometry.drop(columns = ['time', 'lat', 'lon', 'alt'])
    return gdf_with_pointgeometry


In [382]:
def segment_traj(geolife_raw_gdf_with_one_user, stop_segments):
    """This function takes in a gdf of a user and returns a gdf of the stop segments. The user column should only contain one user.

    Args:
        geolife_raw_gdf_with_one_user (_type_): Gdf with one user
        stop_segments (_type_): Gdf with the stop segments of all users

    Returns:
        _type_: A GeoDataFrame with the trips of a single user in the form of Point geometries.
    """

    assert geolife_raw_gdf_with_one_user.user.nunique() == 1, "The user column should only contain one user."

    
    user_id = geolife_raw_gdf_with_one_user.user.unique()[0]
    
    user_stop_segments = stop_segments.query('user == @user_id')

    trip_dfs = []

    geolife_raw_gdf_with_one_user.time = pd.to_datetime(geolife_raw_gdf_with_one_user.time, format='%Y-%m-%d %H:%M:%S')

    for i, stop_segment in user_stop_segments.reset_index(drop=True).iterrows(): # We need to reset the index so that it starts at 0 and goes until the length of the dataframe - 1
        start_stop = stop_segment.datetime
        end_stop = stop_segment.leaving_datetime

        if i == 0:
            end_last_stop = user_stop_segments.datetime.min() - timedelta(minutes=1) # set to a time before the first point of this user's trajectory
        else:
            end_last_stop = user_stop_segments.leaving_datetime.iloc[i-1]
            
        # Get the points that are between the last stop and before the current stop
        trip_df = geolife_raw_gdf_with_one_user.query("(time < @start_stop) & (time > @end_last_stop)")
        trip_df['trip_id'] = i
        trip_dfs.append(trip_df)

        # if this is the last stop of multiple stops for this user then set the trip_id for the points after the last stop
        if i == len(user_stop_segments)-1:
            trip_df = geolife_raw_gdf_with_one_user.query("time > @end_stop")
            trip_df.loc[:, 'trip_id'] = i + 1
            trip_dfs.append(trip_df)

    geolife_raw_gdf_with_one_user = pd.concat(trip_dfs)

    return geolife_raw_gdf_with_one_user

In [418]:
def getSegmentedTrips(geolife_raw_gdf, spatial_radius_km=0.1, minutes_for_a_stop=30.0):
    """This function takes in a geolife gdf of one user and returns a gdf with the segmented trips.

    Args:
        geolife_raw_gdf (_type_): A GeoDataFrame with the raw geolife data.
    """
    

    tdf = skmob.TrajDataFrame(geolife_raw_gdf, latitude='lat', longitude='lon', user_id='user', datetime='time')
    stop_segments = detection.stay_locations(tdf, stop_radius_factor=1, minutes_for_a_stop=minutes_for_a_stop, spatial_radius_km=spatial_radius_km, leaving_time=True).rename(columns={'uid': 'user'})

    # Get segmented trips for all users
    geolife_raw_gdf = geolife_raw_gdf.groupby('user').apply(lambda x: segment_traj(x, stop_segments)).reset_index(drop=True)

    # Convert the points to linestrings for each trip
    geolife_raw_gdf = geolife_raw_gdf.groupby('trip_id').apply(lambda x: convertGdfWithPointlistToGdfWithLineString(x))

    # Drop temp trip_id column and reset index
    geolife_raw_gdf = geolife_raw_gdf.reset_index(drop=True).drop(columns=['trip_id'])

    # Assign new ID column to dataframe as trip_id
    geolife_raw_gdf['trip_id'] = range(0, len(geolife_raw_gdf))

    # Project gdf epsg 4326 for Peking
    geolife_raw_gdf = gp.GeoDataFrame(geolife_raw_gdf, geometry=geolife_raw_gdf.geometry, crs='epsg:4326')

    # trip length in meters
    geolife_raw_gdf['trip_length'] = geolife_raw_gdf.geometry.apply(lambda x: x.length)

    # Return segmented trips gdf
    return geolife_raw_gdf

In [419]:
test = geolife_raw_gdf.query('user in [63,82]')

In [420]:
test = getSegmentedTrips(test)

In [421]:
test

Unnamed: 0,label,user,geometry,trip_start,trip_end,trip_id,trip_length
0,0,82,POINT (116.33600 39.97537),2007-05-24 10:44:45,2007-05-24 10:44:45,0,0.000000
1,0,63,"LINESTRING (116.35756 39.89644, 116.35760 39.8...",2007-05-24 11:23:11,2010-11-28 15:00:20,1,0.290231
2,0,63,"LINESTRING (116.31040 39.83986, 116.31034 39.8...",2007-06-06 12:01:13,2010-11-28 18:03:20,2,0.190267
3,0,63,"LINESTRING (116.34702 39.94381, 116.34694 39.9...",2007-06-06 14:08:34,2010-12-06 20:11:33,3,0.058142
4,0,63,"LINESTRING (117.30762 31.88415, 117.30960 31.8...",2007-06-07 00:20:20,2010-12-07 08:34:13,4,8.307572
...,...,...,...,...,...,...,...
158,0,82,"LINESTRING (116.40015 40.05171, 116.40014 40.0...",2009-05-15 16:43:20,2009-05-15 18:03:09,158,0.051115
159,0,82,"LINESTRING (116.40134 40.05151, 116.40145 40.0...",2009-05-15 18:53:33,2009-05-15 19:55:55,159,0.053233
160,0,82,"LINESTRING (116.40244 40.05007, 116.40221 40.0...",2009-05-15 21:09:06,2009-05-15 21:29:13,160,0.003288
161,0,82,"LINESTRING (116.39995 40.05285, 116.39991 40.0...",2009-05-15 22:16:17,2009-05-15 22:56:20,161,0.026567


In [406]:
mpd.TrajectoryCollection(test, 'trip_id', t='trip_start')

TrajectoryCollection with 0 trajectories

In [365]:
convertGdfWithPointlistToGdfWithLineString(test2)

GEOSException: IllegalArgumentException: point array must contain 0 or >1 elements


In [240]:
geolife_raw_gdf.query('user in [63,82]').groupby('user').apply(lambda x: segment_traj(x, stop_segments))

ValueError: No objects to concatenate

In [212]:
test.groupby('user').apply(lambda x: x.name.values)

AttributeError: 'int' object has no attribute 'values'

In [174]:
getSegmentedTrips(test)

ValueError: No objects to concatenate

In [266]:
getSegmentedTrips(test)

AssertionError: The user column should only contain one user.

In [260]:
u_id = 87

stops_seg = getStopSegments(test)

test.time = pd.to_datetime(test.time, format='%Y-%m-%d %H:%M:%S')

for index, seg in stops_seg.iterrows():
    
    if index > 0:
        end_t_last_stop = stops_seg.end_t.iloc[index-1]
    else:
        end_t_last_stop = test.time.min() - timedelta(minutes=1) # set to a time before the first point of this user's trajectory

    # Get the points that are between the last stop and before the current stop
    test.loc[(test.time < seg.start_t) &  (test.time > end_t_last_stop), 'trip_id'] = index

    # if this is the last stop of multiple stops for this user then set the trip_id for the points after the last stop
    if index == len(stops_seg)-1:
        test.loc[test.time > seg.end_t, 'trip_id'] = index + 1

    print(seg.start_t)
    print(trip.time.min())
    print(trip.time.max())

# Convert the points to linestrings for each trip
test = test.groupby('trip_id').apply(lambda x: convertGdfWithPointlistToGdfWithLineString(x))

# Drop temp trip_id column and reset index
test = test.reset_index(drop=True).drop(columns=['trip_id'])

test

                  time        lat         lon         alt  label  user  \
0  2007-08-15 00:37:06  39.981733  116.330400  157.480315      1    87   
1  2007-08-15 00:37:42  39.982000  116.330300  157.480315      1    87   
2  2007-08-15 00:38:05  39.982000  116.330050  190.288714      1    87   
3  2007-08-15 00:38:19  39.981983  116.329767  239.501312      1    87   
4  2007-08-15 00:38:37  39.981950  116.329467  282.152231      1    87   
..                 ...        ...         ...         ...    ...   ...   
4  2007-08-20 00:49:53  39.978650  116.328900  131.233596      1    87   
5  2007-08-20 00:50:50  39.977917  116.329033  124.671916      1    87   
6  2007-08-20 00:51:57  39.977517  116.329650  272.309711      1    87   
7  2007-08-20 00:52:42  39.977283  116.330250  246.062992      1    87   
8  2007-08-20 00:54:01  39.976417  116.330683  229.658793      0    87   

                      geometry  trip_id  
0   POINT (116.33040 39.98173)      0.0  
1   POINT (116.33030 39.982

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


Unnamed: 0,label,user,geometry,trip_start,trip_end
0,1,87,"LINESTRING (116.3304 39.9817333333333, 116.330...",2007-08-15 00:37:06,2007-08-15 00:48:06
1,0,87,"LINESTRING (116.329533333333 39.9769, 116.3300...",2007-08-15 10:51:48,2007-08-20 00:54:01


In [234]:
test

Unnamed: 0,time,lat,lon,alt,label,user,geometry,trip_id
0,2007-08-15 00:37:06,39.981733,116.330400,157.480315,1,87,POINT (116.33040 39.98173),0.0
1,2007-08-15 00:37:42,39.982000,116.330300,157.480315,1,87,POINT (116.33030 39.98200),0.0
2,2007-08-15 00:38:05,39.982000,116.330050,190.288714,1,87,POINT (116.33005 39.98200),0.0
3,2007-08-15 00:38:19,39.981983,116.329767,239.501312,1,87,POINT (116.32977 39.98198),0.0
4,2007-08-15 00:38:37,39.981950,116.329467,282.152231,1,87,POINT (116.32947 39.98195),0.0
...,...,...,...,...,...,...,...,...
4,2007-08-20 00:49:53,39.978650,116.328900,131.233596,1,87,POINT (116.32890 39.97865),1.0
5,2007-08-20 00:50:50,39.977917,116.329033,124.671916,1,87,POINT (116.32903 39.97792),1.0
6,2007-08-20 00:51:57,39.977517,116.329650,272.309711,1,87,POINT (116.32965 39.97752),1.0
7,2007-08-20 00:52:42,39.977283,116.330250,246.062992,1,87,POINT (116.33025 39.97728),1.0


In [259]:
test.groupby('trip_id').apply(lambda x: convertGdfWithPointlistToGdfWithLineString(x)).reset_index(drop=True)

Unnamed: 0,trip_id,label,user,geometry,trip_start,trip_end
0,0.0,1,87,"LINESTRING (116.3304 39.9817333333333, 116.330...",2007-08-15 00:37:06,2007-08-15 00:48:06
1,1.0,0,87,"LINESTRING (116.329533333333 39.9769, 116.3300...",2007-08-15 10:51:48,2007-08-20 00:54:01


In [175]:
test.groupby('user').apply(lambda x: getStopSegments(x))

Unnamed: 0_level_0,Unnamed: 1_level_0,traj_id,start_t,end_t,geometry,length,direction
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
87,0,87_2007-08-15 00:48:31,2007-08-15 00:48:31,2007-08-15 10:51:44,"LINESTRING (116.33070 39.97658, 116.32980 39.9...",86.144454,296.907431
88,0,88_2008-09-27 13:13:32,2008-09-27 13:13:32,2008-09-27 23:17:08,"LINESTRING (116.35339 39.98474, 116.35342 39.9...",206.848438,16.68818
88,1,88_2008-09-28 10:14:56,2008-09-28 10:14:56,2008-09-28 11:40:43,"LINESTRING (116.36733 39.90778, 116.36733 39.9...",214.37002,54.93248
88,2,88_2008-09-28 12:29:59,2008-09-28 12:29:59,2008-09-28 13:10:14,"LINESTRING (116.36870 39.93355, 116.36867 39.9...",117.829914,279.446139
88,3,88_2008-09-28 23:51:35,2008-09-28 23:51:35,2008-09-29 09:35:58,"LINESTRING (116.34812 39.93647, 116.34791 39.9...",167.29185,86.064516
88,4,88_2008-09-30 13:33:04,2008-09-30 13:33:04,2008-10-01 00:54:31,"LINESTRING (116.40657 39.85894, 116.40671 39.8...",144.168314,118.434868
88,5,88_2008-10-01 03:19:48,2008-10-01 03:19:48,2008-10-01 04:18:08,"LINESTRING (116.36132 39.98031, 116.36109 39.9...",174.187054,266.883089
88,6,88_2008-10-02 13:20:47,2008-10-02 13:20:47,2008-10-03 02:02:29,"LINESTRING (116.40648 39.85902, 116.40702 39.8...",158.676658,53.896181
88,7,88_2008-10-03 10:23:23,2008-10-03 10:23:23,2008-10-03 10:55:57,"LINESTRING (116.40019 39.85603, 116.40013 39.8...",136.03868,106.621472
88,8,88_2008-10-03 11:03:57,2008-10-03 11:03:57,2008-10-04 12:26:29,"LINESTRING (116.40657 39.85887, 116.40679 39.8...",192.654318,99.234343


In [174]:
start_t = getStopSegments(test).start_t.iloc[0]
end_t = getStopSegments(test).end_t.iloc[0]

# get all points before the first stop
before = test.query('time < @start_t')
after = test.query('time > @end_t')

AssertionError: The user column should only contain one user.

In [145]:
before = before.groupby('user').agg(list).reset_index().drop(columns = ['lat', 'lon', 'alt'])
before.time = before.time.apply(lambda x: min(x))
before.label = before.label.apply(lambda x: min(x))
before.geometry = before.geometry.apply(lambda x: LineString(x))

before

Unnamed: 0,user,time,label,geometry
0,87,2007-08-15 00:37:06,1,"LINESTRING (116.3304 39.9817333333333, 116.330..."


In [151]:
geolife_raw_gdf = geolife_raw_gdf.groupby('user').agg(list).reset_index()
geolife_raw_gdf.trip_start = geolife_raw_gdf.time.apply(lambda x: min(x))
geolife_raw_gdf.label = geolife_raw_gdf.label.apply(lambda x: min(x))
geolife_raw_gdf.geometry = geolife_raw_gdf.geometry.apply(lambda x: LineString(x))

geolife_raw_gdf.drop(columns = ['lat', 'lon', 'alt'])

KeyboardInterrupt: 

In [150]:
geolife_raw_gdf.groupby('user').apply(lambda g: convertGdfWithPointlistToGdfWithLineString(g))

KeyboardInterrupt: 

In [92]:
m = before.astype({'time': 'str'}).explore()
after.astype({'time': 'str'}).explore(m=m, color='purple')

In [56]:
getStopSegments(test)

Unnamed: 0,traj_id,start_t,end_t,geometry,length,direction
0,87_2007-08-15 00:48:31,2007-08-15 00:48:31,2007-08-15 10:51:44,"LINESTRING (116.33070 39.97658, 116.32980 39.9...",86.144454,296.907431


In [33]:
# Create a TrajectoryCollection from the GeoDataFrame
geolife_collection = mpd.TrajectoryCollection(test, traj_id_col='user', t='time', crs='epsg:4326')

In [34]:
# Detect and extract trajectory segments (trips) from stop point detection
detector = mpd.TrajectoryStopDetector(geolife_collection)
stop_segments = detector.get_stop_segments(min_duration=timedelta(minutes=30),
                                     max_diameter=100)

In [36]:
# Get the GeoDataFrame representation of the stop segments for this user
# The stop segments contain the 
stop_segments = stop_segments.to_traj_gdf()

In [3]:
stop_segments

NameError: name 'stop_segments' is not defined

In [58]:
stop_segments.geometry.to_list()[0:2][0].coords[-1]

(116.330816666667, 39.9756833333333)

In [60]:
stop_segments.geometry.to_list()[0:2][1].coords[0]

(116.330955, 39.9756883)