In [None]:
import warnings
warnings.filterwarnings("ignore")

import osmnx as ox
import networkx as nx
import pandas as pd
import skmob
import geopandas as gpd
import shapely
from shapely import Point
import numpy as np
from matplotlib import pyplot as plt
from geovoronoi import voronoi_regions_from_coords

# Exercise 1: Differences between GPS and CDR trajectories in Pisa

In this exercise, you will explore the differences between trajectories derived from GPS data and those based on call detail records (CDR) in the city of Pisa. You will need to identify a specific route within the city and compare the trajectories obtained using two different data sources.

Steps:

1. Obtain some GPS data
2. Position of the antennas -> public dataset -> OpenCellID
3. Get a tessellation (Voronoi) based on the antennas
4. Spatial join to associate to each GPS point the closest antenna
5. Take the centroid of the voronoi cell as the CDR point of the GPS point

**Pseudocode to obtain a CDR-like trajectory from a GPS trajectory**
```
cdr_trajectory = []
For each (gps_point, time) in gps_trajectory: # take into account time
    1. voronoi_cell = spatial_join(...)
    2. cdr_point = voronoi_cell.centroid
    3. cdr_trajectory.append(cdr_point, time)

visualize gps and cdr trajectories
```

## Step 1. Obtain some GPS data

Download the road network of Pisa. Create a route on it and translate into a sequence of GPS points.


In [None]:
# Central point around which build the road network
center_point = (43.7159556489785, 10.401897839562224)

In [None]:
# download and plot the road network
G = ox.graph.graph_from_point(center_point, dist=2000, network_type='walk')
ox.plot.plot_graph(G)

In [None]:
# start and ending location of the trip

start_loc = (43.72044281717659, 10.408522048226846)
end_loc = (43.70850381626197, 10.398664981308885)

In [None]:
# associate to the starting and ending position the closest node

start_node = ox.distance.nearest_nodes(G, start_loc[1], start_loc[0], return_dist=False)
print(start_node)

end_node = ox.distance.nearest_nodes(G, end_loc[1], end_loc[0], return_dist=False)
print(end_node)

In [None]:
# compute the shortest path and visualize it

shortest_path = nx.shortest_path(G, start_node, end_node, weight="length")
ox.plot.plot_graph_route(G, shortest_path)

In [None]:
# translate the sequence of nodes into a sequence of coordinates and create a TrajDataFrame

list_lat, list_lng = [], []

for node in shortest_path:

    lng = G.nodes[node]["x"]
    lat = G.nodes[node]["y"]

    list_lat.append(lat)
    list_lng.append(lng)

df_traj = pd.DataFrame()
df_traj["uid"] = [42] * len(shortest_path)
df_traj["lat"] = list_lat
df_traj["lng"] = list_lng

start_date = pd.to_datetime("2024-10-18 12:00:00")

# Generate a sequence of datetimes every 10 seconds for 10 minutes
time_sequence = pd.date_range(start=start_date, periods=len(shortest_path), freq='10S')
df_traj["datetime"] = time_sequence
tdf = skmob.TrajDataFrame(df_traj)

In [None]:
# plot the trajectory

tdf.plot_trajectory()

## Step 2. Obtain the position of the antennas

Download the dataset from https://opencellid.org.


In [None]:
# load the dataset

df_antennas = pd.read_csv("./222.csv.gz", compression="gzip", header=None)

In [None]:
# create a GeoDataFrame describing it

geometry_list = [Point(x, y) for x, y in zip(df_antennas[6], df_antennas[7])]
gpd_antennas = gpd.GeoDataFrame(geometry=geometry_list)
gpd_antennas["ID"] = np.arange(len(gpd_antennas))
gpd_antennas.plot(markersize=1)

In [None]:
# filter the antennas to retain only the ones in Pisa

# shapefile of Pisa
city_shape = ox.geocode_to_gdf("Pisa, Italy")

# spatial join to keep only the antennas within the shapefile of Pisa
antennas_pisa = gpd.sjoin(gpd_antennas, city_shape, predicate="within")

## Step 3. Get a tessellation (Voronoi) based on the antennas

In [None]:
# Generate the Voronoi tessellation
coords = [Point(geom.x, geom.y) for geom in antennas_pisa.geometry]
region_polys, region_pts = voronoi_regions_from_coords(coords, city_shape.iloc[0].geometry)

In [None]:
# Create a GeoDataFrame

geometry_voronoi = []

for id, surface in region_polys.items():
    if isinstance(surface, shapely.Polygon):
        geometry_voronoi.append(surface)

gpd_voronoi = gpd.GeoDataFrame(geometry=geometry_voronoi)
gpd_voronoi["tile_ID"] = np.arange(len(gpd_voronoi))

fig, ax = plt.subplots(1,1)

gpd_voronoi.plot(ax=ax)
gpd_voronoi.centroid.plot(ax=ax, color="red", markersize=1)
plt.xlim(10.3, 10.5)
plt.ylim(43.6, 43.8)

## Step 4. Spatial join to associate to each GPS point the closest antenna

In [None]:
# associate to each GPS point the closest antenna

tdf_cdr = tdf.mapping(gpd_voronoi)

## Step 5. Take the centroid of the voronoi cell as the CDR point of the GPS point

In [None]:
# create a dictionary in which we associate at each antenna its centroid

dict_antenna_to_centroid = {}

for geo, tid in zip(gpd_voronoi["geometry"], gpd_voronoi["tile_ID"]):
    centroid = geo.centroid
    dict_antenna_to_centroid[tid] = [centroid.y, centroid.x]
    

In [None]:
# lat e lng are associated with the coordinates of the centroid

tdf_cdr["lat"] = tdf_cdr["tile_ID"].apply(lambda x: dict_antenna_to_centroid[x][0])
tdf_cdr["lng"] = tdf_cdr["tile_ID"].apply(lambda x: dict_antenna_to_centroid[x][1])

In [None]:
map_f = tdf.plot_trajectory(hex_color="black", start_end_markers=True)
tdf_cdr.plot_trajectory(hex_color="red", start_end_markers=True, map_f=map_f)