In [1]:
import geopandas as gp
from shapely import Point, LineString
from tqdm.notebook import tqdm
tqdm.pandas()

In [3]:
raw_full_trip_gdf = gp.read_file("../data/geolife/geolife_raw.geojson", geometry='geometry')

In [4]:
# get start and end points

# Create SP and EP columns
raw_full_trip_gdf['TRIP_SP'] = raw_full_trip_gdf.geometry.apply(lambda x: Point(x.coords[0]))
raw_full_trip_gdf['TRIP_EP'] = raw_full_trip_gdf.geometry.apply(lambda x: Point(x.coords[-1]))

# Rename columns and drop unnecessary columns
raw_full_trip_gdf.rename(columns={
    'start_t': 'TRIP_START', 
    'end_t': 'TRIP_END', 
    'traj_id': 'TRIP_ID',
    'length': 'TRIP_LEN_IN_MTRS',
    'user_id': 'PERSON_ID'}, inplace=True)
    
raw_full_trip_gdf.drop(columns=['direction'], axis=1, inplace=True)

In [5]:
EPZ_RADIUS = 200

raw_full_trip_gdf['START_EPZ'] = raw_full_trip_gdf.TRIP_SP.apply(lambda x: x.buffer(EPZ_RADIUS))
raw_full_trip_gdf['END_EPZ'] = raw_full_trip_gdf.TRIP_EP.apply(lambda x: x.buffer(EPZ_RADIUS))

In [6]:
# check whether radius is correct (2*r)
from scipy.spatial import distance

distance.euclidean(raw_full_trip_gdf.START_EPZ[0].exterior.coords[0], raw_full_trip_gdf.START_EPZ[0].exterior.coords[32])


400.0

In [7]:
def cut_epz(row):
    try:
        return LineString([x for x in row.geometry.coords if not Point(x).within(row.START_EPZ) and not Point(x).within(row.END_EPZ)])
    except Exception as e:
        return LineString()

In [8]:
raw_full_trip_gdf['NR_POINTS'] = raw_full_trip_gdf.geometry.apply(lambda x: len(x.coords))
# raw_full_trip_gdf = raw_full_trip_gdf.query('NR_POINTS > 10')

In [9]:
raw_full_trip_gdf.progress_apply(lambda x: cut_epz(x), axis = 1)

  0%|          | 0/17783 [00:00<?, ?it/s]

0                                         LINESTRING EMPTY
1        LINESTRING (442827.2209975433 4425313.63413397...
2                                         LINESTRING EMPTY
3        LINESTRING (442954.51692436734 4425207.2319982...
4                                         LINESTRING EMPTY
                               ...                        
17778    LINESTRING (440623.6507312886 4426219.44492938...
17779    LINESTRING (452277.6457196546 4427309.07799563...
17780    LINESTRING (441441.660483156 4426304.432357409...
17781    LINESTRING (452298.09832655755 4427171.7751059...
17782    LINESTRING (452389.71076034335 4426805.4527143...
Length: 17783, dtype: object

In [56]:
gp.GeoDataFrame(raw_full_trip_gdf, geometry='START_EPZ').head(2).explore()