In [2]:
import geopandas as gp
from shapely import Point, LineString
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
tqdm.pandas()

In [36]:
# Import the data you want to privatize
raw_full_trip_gdf = gp.read_file("../data/freemove/freemove_raw.geojson", geometry='geometry')

In [39]:
# get start and end points (these steps will be done in data loader so we just have to replicate them here in order to get same data format)

# Create SP and EP columns
raw_full_trip_gdf['TRIP_SP'] = raw_full_trip_gdf.geometry.apply(lambda x: Point(x.coords[0]))
raw_full_trip_gdf['TRIP_EP'] = raw_full_trip_gdf.geometry.apply(lambda x: Point(x.coords[-1]))

In [54]:
def cut_epz(row, rad_l=100, rad_h=300):
    """This function takes in a row of a GeoDataframe and returns the private linestring truncated with SP and EP privacy zones of a random radius between the specified parameters in meters.

    Args:
        row (_type_): _description_

    Returns:
        _type_: _description_
    """
    # Generate circular privacy zone around start and end point with random radius uniform between 100-300m
    sp_rad = np.random.randint(rad_l,rad_h+1)
    ep_rad = np.random.randint(rad_l,rad_h+1)


    sp_zone = row.TRIP_SP.buffer(sp_rad)
    ep_zone = row.TRIP_EP.buffer(ep_rad)

    # Remove points of linestring that lie in either privacy zone and return empty linestring if entire trajectroy has been truncated
    try:
        priv_linestring = LineString([x for x in row.geometry.coords if not Point(x).within(sp_zone) and not Point(x).within(ep_zone)])
        if len(priv_linestring.coords) < 1:
            return LineString()
        else:
            return priv_linestring
    except Exception as e:
        return LineString()
    
    

In [41]:
raw_full_trip_gdf['geometry'] = raw_full_trip_gdf.progress_apply(lambda x: cut_epz(x), axis = 1)

raw_full_trip_gdf.head(20)

  0%|          | 0/1408 [00:00<?, ?it/s]

In [52]:
# drop helper columns
raw_full_trip_gdf.drop(['TRIP_SP', 'TRIP_EP'], axis=1, inplace=True)

# Create geojson
raw_full_trip_gdf.to_file("../data/freemove/freemove_raw_private.geojson", driver='GeoJSON')

  pd.Int64Index,


In [7]:
# Testing and example for privacy zone creation (with fix radius in this case)

EPZ_RADIUS = 200

raw_full_trip_gdf['START_EPZ'] = raw_full_trip_gdf.TRIP_SP.apply(lambda x: x.buffer(EPZ_RADIUS))
raw_full_trip_gdf['END_EPZ'] = raw_full_trip_gdf.TRIP_EP.apply(lambda x: x.buffer(EPZ_RADIUS))

# check whether radius is correct (2*r)
from scipy.spatial import distance

distance.euclidean(raw_full_trip_gdf.START_EPZ[0].exterior.coords[0], raw_full_trip_gdf.START_EPZ[0].exterior.coords[32])

400.0

In [19]:
# Example with plots
test = raw_full_trip_gdf.head(100).copy()
test['geo_private'] = test.apply(lambda x: cut_epz(x), axis = 1)

# Filter empty linestrings after privatization (0 points left)
test['NR_POINTS'] = test.geo_private.apply(lambda x: len(x.coords))
test = test.query('NR_POINTS > 0').copy()

m = test.query('TRIP_ID == 978933')[['geometry']].explore()

gp.GeoDataFrame(test[['TRIP_ID', 'geo_private']].dropna(), geometry='geo_private', crs='epsg:3035').query('TRIP_ID == 978933').explore(m = m, color='purple')