In [None]:
import geopandas as gp
import pandas as pd
import pathlib
import numpy as np
from shapely.geometry import LineString, MultiLineString
from shapely import ops

In [None]:
# Load data from secure Veracrypt partition
raw_full_gdf = gp.read_file("W:/Master-Thesis-Repository/data/freemove_dlr_data/raw_full.geojson")
raw_points_gdf = gp.read_file("W:/Master-Thesis-Repository/data/freemove_dlr_data/od_points.geojson")

# Convert datetime columns to dt format
raw_full_gdf['LEG_START'] = pd.to_datetime(raw_full_gdf.LEG_START, format='%Y-%m-%d %H:%M:%S')
raw_full_gdf['LEG_END'] = pd.to_datetime(raw_full_gdf.LEG_END, format='%Y-%m-%d %H:%M:%S')
raw_points_gdf['datetime'] = pd.to_datetime(raw_points_gdf.datetime, format='%Y-%m-%d %H:%M:%S')

# Convert datetime to strftime for plotting with Geopandas
raw_full_gdf['LEG_START'] = raw_full_gdf['LEG_START'].dt.strftime('%Y-%m-%d %H:%M:%S')
raw_full_gdf['LEG_END'] = raw_full_gdf['LEG_END'].dt.strftime('%Y-%m-%d %H:%M:%S')
raw_points_gdf['datetime'] = raw_points_gdf['datetime'].dt.strftime('%Y-%m-%d %H:%M:%S')

In [None]:
# Merge linestrings and other attributes per Trip and across legs
full_gdf = raw_full_gdf.groupby(['PERSON_ID', 'TRIP_ID']).apply(lambda g: g.sort_values('LEG_START')).reset_index(drop=True)
full_gdf = full_gdf.groupby(['PERSON_ID', 'TRIP_ID']).agg(list).reset_index()

full_gdf['trip_geometry'] = full_gdf['geometry'].apply(lambda x: ops.linemerge(LineString(x)))

full_gdf['TRIP_START'] = full_gdf['LEG_START'].apply(lambda x: min(x))
full_gdf['TRIP_END'] = full_gdf['LEG_END'].apply(lambda x: max(x))

full_gdf['TRIP_LEN_IN_MTRS'] = full_gdf['LEG_LEN_IN_MTRS'].apply(lambda x: sum(x))
full_gdf['TRIP_DURATION_IN_SECS'] = full_gdf['LEG_DURATION_IN_SECS'].apply(lambda x: sum(x))

full_gdf

Unnamed: 0,PERSON_ID,TRIP_ID,LEG_ID,LEG_START,LEG_END,TRIP_PURPOSE_IDS,TRAFFIC_MODE,LEG_LEN_IN_MTRS,LEG_DURATION_IN_SECS,geometry,trip_geometry,TRIP_START,TRIP_END,TRIP_LEN_IN_MTRS,TRIP_DURATION_IN_SECS
0,16348,985222,"[5751809, 5751810]","[2022-10-31 09:15:57, 2022-10-31 09:27:59]","[2022-10-31 09:27:58, 2022-10-31 09:34:08]","[2, 2]","[bicycle, walk]","[2468, 106]","[721, 369]","[LINESTRING (13.504967 52.454451, 13.504983 52...","MULTILINESTRING ((13.504967 52.454451, 13.5049...",2022-10-31 09:15:57,2022-10-31 09:34:08,2574,1090
1,16348,985223,[5751811],[2022-10-31 11:04:22],[2022-10-31 11:09:12],[7],[walk],[353],[290],"[LINESTRING (13.526849 52.45699, 13.526853 52....","LINESTRING (13.526849 52.45699, 13.526853 52.4...",2022-10-31 11:04:22,2022-10-31 11:09:12,353,290
2,16348,985301,[5752119],[2022-10-31 11:38:24],[2022-10-31 11:43:51],[7],[walk],[315],[327],"[LINESTRING (13.524382 52.455285, 13.52433 52....","LINESTRING (13.524382 52.455285, 13.52433 52.4...",2022-10-31 11:38:24,2022-10-31 11:43:51,315,327
3,16348,985302,"[5752120, 5752121, 5752122]","[2022-10-31 12:01:50, 2022-10-31 12:06:41, 202...","[2022-10-31 12:06:40, 2022-10-31 12:11:48, 202...","[2, 2, 2]","[walk, stationary, walk]","[318, 8, 9]","[290, 307, 11]","[LINESTRING (13.526006 52.455956, 13.526012 52...","MULTILINESTRING ((13.526006 52.455956, 13.5260...",2022-10-31 12:01:50,2022-10-31 12:12:00,335,608
4,16348,985459,"[5752578, 5752579, 5752580, 5752581]","[2022-10-31 13:23:11, 2022-10-31 13:34:33, 202...","[2022-10-31 13:34:32, 2022-10-31 13:45:25, 202...","[6,8, 6,8, 6,8, 6,8]","[walk, bicycle, walk, bicycle]","[380, 2104, 94, 925]","[681, 652, 181, 289]","[LINESTRING (13.52692 52.457104, 13.526942 52....","MULTILINESTRING ((13.509107 52.452574, 13.5090...",2022-10-31 13:23:11,2022-10-31 13:53:17,3503,1803
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1403,17273,1008119,[5836336],[2022-11-11 09:32:50],[2022-11-11 09:58:38],[8],[walk],[2335],[1548],"[LINESTRING (13.398465 52.514515, 13.398465 52...","LINESTRING (13.398465 52.514515, 13.398465 52....",2022-11-11 09:32:50,2022-11-11 09:58:38,2335,1548
1404,17273,1008251,"[5836832, 5836834, 5836835, 5836836]","[2022-11-11 11:23:24, 2022-11-11 11:37:38, 202...","[2022-11-11 11:37:37, 2022-11-11 11:40:56, 202...","[1, 1, 1, 1]","[public_transport_train, public_transport_chan...","[2652, 118, 1716, 827]","[853, 198, 210, 528]","[LINESTRING (13.406914 52.498777, 13.407524 52...","MULTILINESTRING ((13.374089 52.499041, 13.3739...",2022-11-11 11:23:24,2022-11-11 11:53:16,5313,1789
1405,17273,1008366,"[5837091, 5837092, 5837093, 5837094]","[2022-11-11 15:13:35, 2022-11-11 15:17:21, 202...","[2022-11-11 15:17:20, 2022-11-11 15:17:54, 202...","[8, 8, 8, 8]","[walk, bicycle, walk, public_transport_bus]","[288, 25, 627, 866]","[225, 33, 673, 312]","[LINESTRING (13.407482 52.48918, 13.407433 52....","MULTILINESTRING ((13.40474 52.489805, 13.40471...",2022-11-11 15:13:35,2022-11-11 15:34:21,1806,1243
1406,17273,1011765,"[5857525, 5857526, 5857573, 5857574]","[2022-11-14 15:54:12, 2022-11-14 15:58:52, 202...","[2022-11-14 15:58:51, 2022-11-14 16:05:46, 202...","[8, 8, 8, 8]","[public_transport_train, walk, public_transpor...","[2353, 185, 925, 477]","[279, 414, 113, 360]","[LINESTRING (13.405542 52.498799, 13.405667 52...","MULTILINESTRING ((13.360381 52.500462, 13.3603...",2022-11-14 15:54:12,2022-11-14 16:13:41,3940,1166


In [33]:
raw_full_gdf[raw_full_gdf['PERSON_ID'] == 16643].explore(column="TRIP_ID")

In [25]:
raw_full_gdf.head(3)

Unnamed: 0,PERSON_ID,TRIP_ID,LEG_ID,LEG_START,LEG_END,TRIP_PURPOSE_IDS,TRAFFIC_MODE,LEG_LEN_IN_MTRS,LEG_DURATION_IN_SECS,geometry
0,16643,987965,5762414,2022-11-01 14:04:58,2022-11-01 14:11:35,8,bicycle,1485,397,"LINESTRING (13.28695 52.44254, 13.28692 52.442..."
1,16643,987965,5762421,2022-11-01 14:32:15,2022-11-01 14:56:44,8,bicycle,7237,1469,"LINESTRING (13.33133 52.49616, 13.33133 52.496..."
2,16643,995084,5789334,2022-11-04 13:47:02,2022-11-04 14:07:53,6,bicycle,6069,1251,"LINESTRING (13.40983 52.56442, 13.40982 52.564..."


In [20]:
# Find earliest and latest point of each trip across legs to identify start and endpoint of trip
e_points = raw_points_gdf.groupby(['uid', 'tid']).apply(lambda g: g[g['datetime'] == g['datetime'].max()]).reset_index(drop=True)
e_points['sep'] = 'e'

s_points = raw_points_gdf.groupby(['uid', 'tid']).apply(lambda g: g[g['datetime'] == g['datetime'].min()]).reset_index(drop=True)
s_points['sep'] = 's'

In [21]:
# merge start and endpoint to original data as new column
points_gdf = pd.merge(raw_points_gdf, pd.concat([e_points, s_points]), left_on=['uid', 'tid', 'leg_id', 'datetime'], right_on=['uid', 'tid', 'leg_id', 'datetime'], how='left', suffixes=('', '_y'))
points_gdf = points_gdf.drop(points_gdf.filter(regex='_y').columns, axis=1)

points_gdf.head(3)

Unnamed: 0,uid,tid,leg_id,datetime,lng,lat,geometry,sep
0,16643,987965,5762414,2022-11-01 14:04:58,13.286952,52.442537,POINT (13.28695 52.44254),s
1,16643,987965,5762414,2022-11-01 14:11:35,13.280938,52.450522,POINT (13.28094 52.45052),
2,16643,987965,5762421,2022-11-01 14:32:15,13.331328,52.496161,POINT (13.33133 52.49616),


In [22]:
# Filter all points that are not start or end points of trip
points_gdf = points_gdf[points_gdf['sep'].notnull()]

In [24]:
# Generate home locations (HL)
points_gdf.index=pd.to_datetime(points_gdf.datetime)
points_gdf['hl'] = points_gdf['datetime'].apply(lambda x: 1 if x in points_gdf.between_time('6:00', '10:00').datetime else 0).astype(object)
points_gdf.reset_index(inplace=True, drop=True)
points_gdf

Unnamed: 0,uid,tid,leg_id,datetime,lng,lat,geometry,sep,hl
0,16643,987965,5762414,2022-11-01 14:04:58,13.286952,52.442537,POINT (13.28695 52.44254),s,0
1,16643,987965,5762421,2022-11-01 14:56:44,13.347070,52.547154,POINT (13.34707 52.54715),e,0
2,16643,995084,5789334,2022-11-04 13:47:02,13.409835,52.564417,POINT (13.40983 52.56442),s,0
3,16643,995084,5789334,2022-11-04 14:07:53,13.343607,52.545280,POINT (13.34361 52.54528),e,0
4,16363,1001020,5808510,2022-11-06 11:50:32,13.512884,52.454531,POINT (13.51288 52.45453),s,0
...,...,...,...,...,...,...,...,...,...
2811,17248,1003107,5818441,2022-11-07 12:13:13,13.433807,52.484488,POINT (13.43381 52.48449),s,0
2812,17248,1003107,5818441,2022-11-07 12:17:36,13.433274,52.484010,POINT (13.43327 52.48401),e,0
2813,17263,1002942,5817829,2022-11-07 10:31:36,13.536408,52.431660,POINT (13.53641 52.43166),e,0
2814,17269,1003131,5818507,2022-11-07 15:01:30,13.326065,52.510312,POINT (13.32606 52.51031),s,0


In [243]:
tesselation_gdf = gp.read_file("W:/Master-Thesis-Repository/data/freemove_dlr_data/tessellation.geojson")

In [244]:
gdf = gp.GeoDataFrame(points_gdf, geometry=gp.points_from_xy(points_gdf.lng, points_gdf.lat), crs = 4326)
    
# Spatial join points to polygons
gdf = gp.sjoin(
    tesselation_gdf[["tile_id", "geometry"]],
    gdf,
    how="inner"
)

In [245]:
gdf[gdf['hl'] == 1].explore()

In [233]:
gdf.explore(
    column = "hl",
    tooltip = "hl"
)

  cat = pd.Categorical(gdf[column][~nan_idx], categories=categories)


In [226]:
points_gdf.explore(
    column = "hl",
    tooltip = "hl"
)

  cat = pd.Categorical(gdf[column][~nan_idx], categories=categories)
