In [32]:
import geopandas as gp
import movingpandas as mpd
from datetime import timedelta
from pyproj import CRS
import pandas as pd
from tqdm import tqdm
from joblib import Parallel, delayed
import logging
logging.basicConfig(filename='log_read_freemove.log', level=logging.DEBUG)

In [33]:
print('Reading data...')
raw_points_gdf = gp.read_file("W:/Master-Thesis-Repository/data/freemove_dlr_data/od_points.geojson")
print('Data read.')

Reading data...
Data read.


In [34]:
# rename columns to match geolife point dataset
raw_points_gdf = raw_points_gdf.rename(columns={'tid': 'traj_id', 
                                                'datetime': 'time', 
                                                'lng': 'lon',
                                                'lat': 'lat',
                                                'uid': 'user'})

In [35]:
def write_geojson(gdf, path):
    assert isinstance(gdf, gp.GeoDataFrame)
    gdf.to_file(path, driver='GeoJSON')



In [36]:
print('removing points outside valid lon and lat range...')
# Remove points that fall outside the valid lon and lat range (-90 to 90 for latitude and -180 to 180 for longitude)
raw_points_gdf = raw_points_gdf[(raw_points_gdf.lat >= -90) & (raw_points_gdf.lat <= 90) & (raw_points_gdf.lon >= -180) & (raw_points_gdf.lon <= 180)]
print('points removed.')

removing points outside valid lon and lat range...
points removed.


In [37]:
print('Dropping NA values...')
len_before = len(raw_points_gdf)
logging.info('Rows before dropping NA values: {}'.format(len(raw_points_gdf)))
raw_points_gdf.dropna(inplace=True)
logging.info('Rows after dropping NA values: {}'.format(len(raw_points_gdf)))
logging.info('Rows dropped: {}'.format(len_before - len(raw_points_gdf)))
print('NA values dropped.')

Dropping NA values...
NA values dropped.


In [38]:
print('creating trajectory collection...')
# Create trajectory collection
raw_full_trip_collection = mpd.TrajectoryCollection(raw_points_gdf, traj_id_col='traj_id', obj_id_col ='user', t='time', x='lon', y='lat')
print('trajectory collection created.')

logging.info(f'This is a test log of traj id: {raw_full_trip_collection.trajectories[0].id}')
logging.info(f'Number of trajectories in data: {len(raw_full_trip_collection.trajectories)}')

creating trajectory collection...
trajectory collection created.


In [39]:
# Convert to EPSG
def convert_epsg(traj):
    result = traj.to_crs(CRS(3035))
    result.obj_id = traj.obj_id
    return result
print('converting to EPSG:3035...')
raw_full_trip_collection.trajectories = Parallel(n_jobs=-2, verbose=10)(delayed(convert_epsg)(traj) for traj in raw_full_trip_collection.trajectories)
print('converted to EPSG:3035 (Berlin).')

converting to EPSG:3035...


[Parallel(n_jobs=-2)]: Using backend LokyBackend with 7 concurrent workers.
[Parallel(n_jobs=-2)]: Batch computation too fast (0.0465s.) Setting batch_size=2.
[Parallel(n_jobs=-2)]: Done   4 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-2)]: Done  11 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-2)]: Batch computation too fast (0.1229s.) Setting batch_size=4.
[Parallel(n_jobs=-2)]: Done  22 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-2)]: Done  40 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-2)]: Done  74 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-2)]: Done 118 tasks      | elapsed:    1.0s
[Parallel(n_jobs=-2)]: Done 162 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-2)]: Done 214 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-2)]: Done 266 tasks      | elapsed:    2.4s
[Parallel(n_jobs=-2)]: Done 326 tasks      | elapsed:    3.2s
[Parallel(n_jobs=-2)]: Done 386 tasks      | elapsed:    4.0s
[Parallel(n_jobs=-2)]: Done 454 tasks      | elapsed:    4.8s
[Parallel(n_jo

converted to EPSG:3035 (Berlin).


[Parallel(n_jobs=-2)]: Done 1408 out of 1408 | elapsed:   14.2s finished


In [40]:
# Split trajectories

def split_traj(traj, MAX_DIAMETER=100, MIN_DURATION=timedelta(minutes=15), MIN_LENGTH=500):
    try:
        split = mpd.StopSplitter(traj).split(max_diameter=MAX_DIAMETER, min_duration=MIN_DURATION, min_length=MIN_LENGTH)
        for i in range(len(split.trajectories)):
            split.trajectories[i].obj_id = traj.obj_id
        return split.trajectories
    except BaseException as e:
        print(e, 'Error at traj: ', traj.id)
        logging.warning(f'{e} Error at traj: {traj.id}')

        return []

print('splitting trajectories...')
# split_trajs = Parallel(n_jobs=4, verbose=10)(delayed(split_traj)(traj) for traj in geolife_raw_collection.trajectories)
split_trajs = []
for traj in tqdm(raw_full_trip_collection.trajectories):
    try:
        split_trajs.append(split_traj(traj))
    except BaseException as e:
        print(e, 'Error at traj: ', traj.id)
        continue
    
split_trajs = [traj for sublist in split_trajs for traj in sublist]
freemove_splitted_collection = mpd.TrajectoryCollection(split_trajs)
print('trajectories split.')

splitting trajectories...


100%|██████████| 1408/1408 [00:07<00:00, 192.80it/s]


trajectories split.


In [41]:
# Smooth trajectories
def smooth_traj(traj, PROCESS_NOISE_STD=0.1, MEASUREMENT_NOISE_STD=10):
    try:
        result = mpd.KalmanSmootherCV(traj).smooth(process_noise_std=PROCESS_NOISE_STD, measurement_noise_std=MEASUREMENT_NOISE_STD)
        result.obj_id = traj.obj_id
        return result
    except BaseException as e:
        print(e, 'Error at traj: ', traj.id)
        logging.warning(f'{e} Error at traj: {traj.id}')
        return traj

print('smoothing trajectories...')
# geolife_splitted_smooth_collection = Parallel(n_jobs=4, verbose=10)(delayed(smooth_traj)(traj) for traj in geolife_splitted_collection.trajectories)
freemove_smooth_collection = []
for traj in tqdm(raw_full_trip_collection.trajectories):
    try:
        freemove_smooth_collection.append(smooth_traj(traj))
    except BaseException as e:
        print(e, 'Error at traj: ', traj.id)
        continue

freemove_smooth_collection = mpd.TrajectoryCollection(freemove_smooth_collection)
print('trajectories smoothed.')

smoothing trajectories...


100%|██████████| 1408/1408 [00:19<00:00, 74.03it/s]


trajectories smoothed.


In [42]:
# Generalize trajectories
def generalize_traj(traj, TOLERANCE=1.0):
    try:
        result = mpd.DouglasPeuckerGeneralizer(traj).generalize(tolerance=TOLERANCE)
        result.obj_id = traj.obj_id
        return result
    except BaseException as e:
        print(e, 'Error at traj: ', traj.id)
        logging.warning(f'{e} Error at traj: {traj.id}')
        return traj

# Douglas-Peucker generalization for non-smoothed trajectories
print('generalizing trajectories... (1)')
logging.info('generalizing trajectories... (1)')
# geolife_splitted_generalized_collection = Parallel(n_jobs=4, verbose=10)(delayed(generalize_traj)(traj) for traj in geolife_splitted_collection.trajectories)
freemove_generalized_collection = []
for traj in tqdm(raw_full_trip_collection.trajectories):
    try:
        freemove_generalized_collection.append(generalize_traj(traj))
    except BaseException as e:
        print(e, 'Error at traj: ', traj.id)
        continue
freemove_generalized_collection = mpd.TrajectoryCollection(freemove_generalized_collection)
print('trajectories generalized. (1)')
logging.info('trajectories generalized. (1)')

generalizing trajectories... (1)


100%|██████████| 1408/1408 [00:05<00:00, 280.29it/s]


trajectories generalized. (1)


In [43]:
# Douglas-Peucker generalization for smoothed trajectories
print('generalizing trajectories... (2)')
logging.info('generalizing trajectories... (2)')
# geolife_splitted_smooth_generalized_collection = Parallel(n_jobs=4, verbose=10)(delayed(generalize_traj)(traj) for traj in geolife_splitted_smooth_collection.trajectories)
freemove_smooth_generalized_collection = []
for traj in tqdm(freemove_smooth_collection.trajectories):
    try:
        freemove_smooth_generalized_collection.append(generalize_traj(traj))
    except BaseException as e:
        print(e, 'Error at traj: ', traj.id)
        continue

freemove_smooth_generalized_collection = mpd.TrajectoryCollection(freemove_smooth_generalized_collection)
print('trajectories generalized. (2)')
logging.info('trajectories generalized. (2)')

generalizing trajectories... (2)


100%|██████████| 1408/1408 [00:03<00:00, 415.63it/s]


trajectories generalized. (2)


In [44]:
def convert_trajcollection_to_gdf(trajcollection):
    gdfs = []
    for traj in tqdm(trajcollection.trajectories):
        traj_gdf = traj.to_traj_gdf()
        traj_gdf['user_id'] = traj.obj_id
        gdfs.append(traj_gdf)

    gdf = gp.GeoDataFrame(pd.concat(gdfs), crs='EPSG:3035')

    return gp.GeoDataFrame(pd.concat(gdfs), crs='EPSG:3035')



In [45]:
logging.info('converting to gdf...')
freemove_raw = convert_trajcollection_to_gdf(raw_full_trip_collection)
freemove_splitted = convert_trajcollection_to_gdf(freemove_splitted_collection)
freemove_smooth = convert_trajcollection_to_gdf(freemove_smooth_collection)
freemove_generalized = convert_trajcollection_to_gdf(freemove_generalized_collection)
freemove_smooth_generalized = convert_trajcollection_to_gdf(freemove_smooth_generalized_collection)
logging.info('converted to gdf.')

100%|██████████| 1408/1408 [00:02<00:00, 508.66it/s]
100%|██████████| 1024/1024 [00:01<00:00, 618.34it/s]
100%|██████████| 1408/1408 [00:02<00:00, 549.02it/s]
100%|██████████| 1408/1408 [00:02<00:00, 584.38it/s]
100%|██████████| 1408/1408 [00:02<00:00, 526.18it/s]


In [47]:
# Write gdf to pickle file to load fast for further processing
logging.info('writing to geojson...')
write_geojson(freemove_raw, '../data/freemove/freemove_raw.geojson')
write_geojson(freemove_splitted, '../data/freemove/freemove_splitted.geojson')
write_geojson(freemove_smooth, '../data/freemove/freemove_smooth.geojson')
write_geojson(freemove_generalized, '../data/freemove/freemove_generalized.geojson')
write_geojson(freemove_smooth_generalized, '../data/freemove/freemove_smooth_generalized.geojson')

# Save point gdf as geojson
freemove_point_gdf = raw_full_trip_collection.to_point_gdf().reset_index()
write_geojson(freemove_point_gdf, '../data/freemove/freemove_raw_point.geojson')
freemove_smoothed_generalized_point = freemove_smooth_generalized_collection.to_point_gdf().reset_index()
write_geojson(freemove_smoothed_generalized_point, '../data/freemove/freemove_smooth_generalized_point.geojson')
logging.info('written to geojson.')


  pd.Int64Index,
  pd.Int64Index,
  pd.Int64Index,
