In [1]:
%%html
<style>
    table {float:left}
    .folium-map leaflet-container leaflet-fade-anim leaflet-grab leaflet-touch-drag {
        display: inline-block;
    }
</style>

In [2]:
import ipywidgets as widget
from IPython.display import FileLink, HTML
from os.path import exists, join, basename
from os import listdir, remove
import osmnx as ox
from shapely.geometry import Polygon, Point, LineString, box
from leuvenmapmatching.map.inmem import InMemMap
import numpy as np
import pandas as pd
import geopandas as gpd
from gpx_converter import Converter
from leuvenmapmatching.matcher.distance import DistanceMatcher
import leafmap.foliumap as leafmap
import time

# <img width="480" alt="logo" src="https://user-images.githubusercontent.com/47752920/234973760-c8157fdd-a3cf-43cf-88b0-4dc8096cfe7c.png">
### **AUTOMATION OF PROCESSING GPX TRACK RECORDS FOR DESIGNING INTENSITY MAPS**

The tool matches GNSS track records to a road network and calculates frequencies on it. The map matching runs on probabilistic model handling states with missing obseravation i.e., non-emitting states. 
For more information about the map-matching algotirhm see [Leuven.MapMatching documentation](https://leuvenmapmatching.readthedocs.io/en/latest/index.html). For more details regarding the tool see the [GitHub documentation](https://github.com/bsramo144/Thesis-Jupyter).

**Set Parameters**

In [3]:
upload_out = widget.Output()
upload_out

Output()

In [4]:
def clear_upload():
    with upload_out:
        upload_out.clear_output()
        DATA_UPLOAD = widget.FileUpload(
            accept='.gpx',
            multiple=True,
            description='Upload files')
        display(DATA_UPLOAD)
    return DATA_UPLOAD
        
DATA_UPLOAD = clear_upload()

Note: maximum upload size is 10 MB.

|Parameter |Description |
|--------- |----------- |
|Buffer         |*expand the study area from the GPX records; in meters*|
|Tolerance      |*GPX track simplification threshold; in meters*|
|Min. Proba.    |*stop matching below normalized probability*|
|Max. Distance  |*break for zero match probability; in meters*|
|Max Lattice    |*search the route with the number of possible paths at every step*|
|Increase Latt. |*if no solution is found, increase the lattice by the value*|
|Obs. Noise     |*standard deviation of measuremnet noise, in meters*|
|Obs. Noise NE  |*standard deviation of measuremnet noise for non-emitting states, in meter (the value should be larger than Obs. Noise)*|
|Dist. Noise    |*difference between distance between matched route and distance between tracks, in meters*|
|Dist. Noise Ne |*difference between the distances for non-emitting states, in meters (the value should be larger than Dist. Noise)*|

In [5]:
BUFFER_DIST = widget.IntSlider(
    value=140,
    min=40,
    max=300,
    step=20,
    description='Buffer',
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)
TOLERANCE = widget.IntSlider(
    value=2,
    min=0,
    max=10,
    step=1,
    description='Tolerance',
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)
MAX_DIST = widget.IntSlider(
    value=120,
    min=20,
    max=300,
    step=20,
    description='Max. Distance',
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
OBS_NOISE = widget.IntSlider(
    value=16,
    min=2,
    max=50,
    step=2,
    description='Obs. Noise',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
OBS_NOISE_NE = widget.IntSlider(
    value=30,
    min=5,
    max=150,
    step=5,
    description='Obs.Noise NE',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
DIST_NOISE = widget.IntSlider(
    value=5,
    min=1,
    max=50,
    step=1,
    description='Dist. Noise',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
DIST_NOISE_NE = widget.IntSlider(
    value=16,
    min=4,
    max=100,
    step=2,
    description='Dist.Noise NE',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
MAX_LATTICE_WIDTH = widget.IntSlider(
    value=7,
    min=1,
    max=20,
    step=1,
    description='Max Lattice',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
INCREASE_MAX_LATTICE_WIDTH = widget.IntSlider(
    value=5,
    min=1,
    max=10,
    step=1,
    description='Increase Latt.',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
MIN_PROB_NORM = widget.FloatSlider(
    value=0.002,
    min=0,
    max=0.01,
    step=0.001,
    description='Min Proba.',
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.3f',
)

In [6]:
items = [widget.VBox([BUFFER_DIST, TOLERANCE, MIN_PROB_NORM]),
         widget.VBox([MAX_DIST, MAX_LATTICE_WIDTH, INCREASE_MAX_LATTICE_WIDTH]),
         widget.VBox([OBS_NOISE, OBS_NOISE_NE, DIST_NOISE, DIST_NOISE_NE])]
accordion = widget.Accordion(children=items)
accordion.set_title(0,"Environment Parameters")
accordion.set_title(1,"Thresholding of Matching")
accordion.set_title(2,"Measurement Noise")
accordion

Accordion(children=(VBox(children=(IntSlider(value=140, continuous_update=False, description='Buffer', max=300…

In [7]:
def upload(DATA_UPLOAD):
    for filename in listdir("data_upload"):
        if filename.endswith(".gpx"):
            remove(join("data_upload", filename))

    if len(DATA_UPLOAD) == 0:
        with output:
            raise Exception("NO file has been uploaded. Check if the size of the files exceeds the upload limit 10 MB.")
            raise Exception("Reload the page and upload new files again.")
        raise Exception(0)

    for elem in DATA_UPLOAD.items():
        name, file_info = elem
        data_path = join("data_upload", name)
        with open (data_path, 'wb') as file:
            file.write(file_info['content'])

In [17]:
def study_areas(BUFFER_DIST):
    gpx_combined = gpd.GeoDataFrame(columns = ['latitude', 'longitude'])
    for filename in listdir("data_upload"):
        if filename.endswith(".gpx"):
            try:
                gpx_file = Converter(input_file = "data_upload/"+ filename).gpx_to_dataframe()
            except:
                with output:
                    print("   WARNING:", filename, "is invalid GPX file, the tool will skip the file.")
                remove(join("data_upload", filename))
                continue
                
            gpx_point = gpd.GeoDataFrame(gpx_file, geometry=gpd.points_from_xy(gpx_file.longitude, gpx_file.latitude)).set_crs('epsg:4326')
            gpx_combined = pd.concat([gpx_combined, gpx_point])
    
    if gpx_combined.empty:
        with output:
            raise Exception("Uploaded data is empty.")
        raise Exception(0)

    if (gpx_combined["latitude"] >= 80).any() or (gpx_combined["latitude"] <= -80).any():
        with output:
            raise Exception("Execution failed, uploaded data contains coordinates in polar area. Reload the page and upload new files again.")
            raise Exception("Reload the page and upload new files again.")
        raise Exception(0)


    latitude = gpx_combined["latitude"].iloc[0]
    distance_deg = BUFFER_DIST / (111319.488 * np.cos(np.radians(latitude)))  # metric system to degree distance
    gpx_buffer = gpx_combined["geometry"].buffer(distance_deg).set_crs(4326)
    areas = gpx_buffer.unary_union
    areas = gpd.GeoDataFrame(index=[0], crs='epsg:4326', geometry=[areas])
    if areas.type[0] == "MultiPolygon":
        areas = areas.explode()
    with output:
        print("From the uploaded GPX files have been calcualted", len(areas.index), "area(s) for graph(s). \n")
    return latitude, areas

In [9]:
def map_matching(latitude, areas, TOLERANCE, MAX_DIST, MIN_PROB_NORM, MAX_LATTICE_WIDTH, INCREASE_MAX_LATTICE_WIDTH, OBS_NOISE, OBS_NOISE_NE, DIST_NOISE, DIST_NOISE_NE):
    street_all = gpd.GeoDataFrame(columns = ['Latitude', 'Longitude'])
    track_df = pd.DataFrame(columns = ['Latitude', 'Longitude','id'])
    route_df = pd.DataFrame(columns = ['Latitude', 'Longitude','id'])
    id = 1
    for _, row in areas.iterrows():
    ## Download graph based on netwotk type
    ### possible network types {"all_private", "all", "bike", "drive", "drive_service", "walk"}
        with output:
            print("Building graph for a specific area...")
        area_gdf = gpd.GeoDataFrame(index=[0], crs='epsg:4326', geometry=[row["geometry"]])
        
        try:
            graph = ox.graph_from_polygon(row["geometry"], network_type = 'all', simplify = False)
        except:
            with output:
                print("In the area is no street network mapped. \n Reload the page and upload new files again.")
            continue
        
        with output:    
            print(graph, "\n") # number of nodes and edges
        street_lines = ox.graph_to_gdfs(graph, nodes = False)
        street_all = pd.concat([street_lines, street_all])
    ## Leuven Map Object
        map_con = InMemMap("road_network", 
                           use_latlon = True, 
                           use_rtree = True, 
                           index_edges = True)
        for node in graph.nodes:
            lat = graph.nodes[node]['y']
            lon = graph.nodes[node]['x']
            map_con.add_node(node, (lat, lon))

        for edge in graph.edges:
            node_a, node_b = edge[0], edge[1]
            map_con.add_edge(node_a, node_b)
            map_con.add_edge(node_b, node_a)
    
        map_con.purge() # remove nodes without location or edges
    ## Map Matching
        for filename in listdir("data_upload"):
            if not filename.endswith(".gpx"):
                continue
    
            gpx_df = Converter(input_file = "data_upload/" + filename).gpx_to_dataframe()
            gpx_point = gpd.GeoDataFrame(gpx_df, geometry = gpd.points_from_xy(gpx_df.longitude, gpx_df.latitude)).set_crs('epsg:4326')
            gpx_point['id'] = 1
            area_check = gpx_point.within(area_gdf)
            if not area_check.iloc[0]:
                continue  # skip the GPX file outside of the graph
                
            with output:
                print("   Map matching of " + filename + " started...")
            try:
                gpx_line = gpx_point.groupby(['id']) ['geometry'].apply(lambda x: LineString(x.tolist()))
            except:
                with output:
                    print("   WARNING: The file has only one record of position, therefore, the matching stopped.")
                continue
                
            line_gdf = gpd.GeoDataFrame(gpx_line, geometry = 'geometry').set_crs('epsg:4326')
            tolerance_deg = TOLERANCE / (111319.488 * np.cos(np.radians(latitude))) # metric system to degree distance
            line_gdf['geometry'] = line_gdf['geometry'].simplify(tolerance_deg) # reducing line vertices inside the tolerance
            gpx_coords = line_gdf.apply(lambda row: list((row.geometry).coords), axis=1)
            # for row in gpx_coords.items():
            passage = list(gpx_coords[1])
            track = []
            path = []
            for i in range(len(passage)):
                lat = passage[i][1]
                lon = passage[i][0]
                path.append((lat, lon)) 
                track.append([lat, lon])
    
            track = np.array(track)
            df = pd.DataFrame(track, columns = ['Latitude', 'Longitude'])
            df['id'] = id # id for grouping into one line
            track_df = pd.concat([track_df, df])
            matcher = DistanceMatcher(map_con,
                                      max_dist = MAX_DIST,
                                      min_prob_norm = MIN_PROB_NORM,
                                      max_lattice_width = MAX_LATTICE_WIDTH,
                                      increase_max_lattice_width = INCREASE_MAX_LATTICE_WIDTH,
                                      obs_noise = OBS_NOISE, 
                                      obs_noise_ne = OBS_NOISE_NE,
                                      dist_noise = DIST_NOISE,
                                      dist_noise_ne = DIST_NOISE_NE,
                                      non_emitting_edgeid = False,
                                      restrained_ne = False)
        
            matcher.match(path, unique = False)  # retain only unique nodes in the sequence (avoid repetitions)
            if matcher.early_stop_idx is not None:
                with output:
                    print("   Parts of the path were omitted from matching due to the road mismatch.")
                from_matches = matcher.best_last_matches(k = MAX_LATTICE_WIDTH)
                matcher.continue_with_distance(from_matches = from_matches, max_dist = MAX_DIST)
                matcher.match(path, expand = True)
      
            node_id = matcher.path_pred_onlynodes_withjumps # retrieve the node_ids the route passes through
            id_route = 1
            for i in range(len(node_id)-1):
                route_node = []
                lat = graph.nodes[node_id[i]]['y']
                lon = graph.nodes[node_id[i]]['x']
                latlon = [lat, lon]
                route_node.append(latlon)
                lat2 = graph.nodes[node_id[i + 1]]['y']
                lon2 = graph.nodes[node_id[i + 1]]['x']
                latlon2 = [lat2, lon2]
                route_node.append(latlon2)
                route_node = np.array(route_node)
                df = pd.DataFrame(route_node, columns = ['Latitude', 'Longitude'])
                df['id'] = id_route # the same id for one line (street)
                route_df = pd.concat([route_df, df])
                id_route += 1
        
            id += 1
            with output:
                print("   Matching of " + filename + " finished successfully.\n")
    
        if route_df.empty:
            with output:
                raise Exception("The map matching has no result, the execution has terminated. Reload the page and upload new files again.")
            raise Exception(0)
    
    del graph, street_lines, map_con, node_id
    return street_all, track_df, route_df

In [10]:
def post_process(track_df, route_df, street_all, start_time):
    track_point = gpd.GeoDataFrame(track_df, geometry=gpd.points_from_xy(track_df.Longitude, track_df.Latitude))
    track_lines = track_point.groupby(['id']) ['geometry'].apply(lambda x: LineString(x.tolist()))
    tracks_gdf = gpd.GeoDataFrame(track_lines, geometry='geometry').set_crs('epsg:4326')
    #print(tracks_gdf[:10])
    route_point = gpd.GeoDataFrame(route_df, geometry=gpd.points_from_xy(route_df.Longitude, route_df.Latitude))
    route_line = route_point.groupby(['id']) ['geometry'].apply(lambda x: LineString(x.tolist()))
    route_gdf = gpd.GeoDataFrame(route_line, geometry='geometry').set_crs('epsg:4326')
    #print(route_gdf[:10])
    street_all = street_all.loc[:, ['osmid', 'length', 'geometry']] # drop unnecessary columns.
    with output:
        print("Calculating passage frequences on streets, this part may take time...")
    street_freq = street_all.overlay(route_gdf, how='intersection') # drop geometries not part of the routes
    street_freq = street_freq.drop_duplicates(subset=['osmid', 'length'])
    frequency = []
    for _, row in street_freq.iterrows():
        series = route_gdf.covers(row["geometry"])
        frequency.append(series.values.sum())
    street_freq["frequency"] = frequency
    #street_freq = street_freq.dissolve(by='osmid')
    output.clear_output()
    with output:
        print("The length of matched roads is", round(street_freq["length"].sum()), "meters.")
        print("The execution has finished in %s seconds." % (round(time.time() - start_time)))
        print("\n\n")
    
    return tracks_gdf, street_freq

In [11]:
def map_vis(tracks_gdf, street_freq):
    m_light = leafmap.Map(width="100%", 
                          height="380",
                          draw_control=False,
                          attribution_control=True,
                          tiles="CartoDB positron")
    m_light.add_gdf(tracks_gdf,
                    layer_name='tracks',
                    info_mode=None, 
                    style={'color':'blue', 'weight':0.5, 'opacity': 0.5})
    m_light.add_data(street_freq,
                     "frequency",
                     cmap = "Wistia",
                     scheme='Quantiles',
                     k=5,
                     add_legend=True,
                     legend_title="Number of passages",
                     legend_position="bottomright",
                     layer_name="passages",
                     style_function = lambda feat: {"color": feat["properties"]["color"], 
                                                    "weight": 4, 
                                                    'opacity': 0.9})
    m_light.add_text("INTENSITY OF MOBILITY ON ROAD NETWORK", fontsize=22, fontcolor='#404040', bold=True, padding='0px', background=True, bg_color='white', border_radius='5px', position='topright')
    m_light.add_text("<a href='https://github.com/bsramo144/Thesis-Jupyter' target='_blank'><img width='250' alt='Asset 3logo' src='https://user-images.githubusercontent.com/47752920/234973760-c8157fdd-a3cf-43cf-88b0-4dc8096cfe7c.png'></a>", background=False, position='topright')
    m_light.zoom_to_gdf(street_freq)

    m_dark = leafmap.Map(width="100%", 
                         height="380",
                         draw_control=False,
                         attribution_control=True,
                         tiles="Cartodbdark_matter")
    m_dark.add_gdf(tracks_gdf,
                   layer_name='tracks',
                   info_mode=None, 
                   style={'color':'red', 'weight':0.5, 'opacity': 0.5})
    m_dark.add_data(street_freq,
                    "frequency",
                    cmap = "YlOrBr_r",
                    scheme='Quantiles',
                    k=5,
                    add_legend=True,
                    legend_title="Number of passages",
                    legend_position="bottomright",
                    layer_name="passages",
                    style_function = lambda feat:{"color": feat["properties"]["color"], 
                                                  "weight": 4, 
                                                  'opacity': 0.8})
    m_dark.add_text("INTENSITY OF MOBILITY ON ROAD NETWORK", fontsize=22, fontcolor='white', bold=True, padding='0px', background=True, bg_color='#404040', border_radius='5px', position='topright')
    m_dark.add_text("<a href='https://github.com/bsramo144/Thesis-Jupyter' target='_blank'><img width='250' alt='Asset 3logo' src='https://user-images.githubusercontent.com/47752920/234973760-c8157fdd-a3cf-43cf-88b0-4dc8096cfe7c.png'></a>", background=False, position='topright')
    m_dark.zoom_to_gdf(street_freq)
    
    return m_light, m_dark

In [12]:
def save_results(street_freq, m_light, m_dark):
    street_freq.to_file("data_upload/lines.json", driver="GeoJSON")
    street_freq.to_file("data_upload/lines.gpkg", driver="GPKG")
    m_light.to_html("data_upload/light_map.html")
    m_dark.to_html("data_upload/dark_map.html")

In [13]:
def download_button(name, button_title):
    file_name = str(FileLink("data_upload/"+name)).rpartition('/')[2]
    html_voila = '<a style="color: white; border-radius: 3px;" class="lm-Widget p-Widget jupyter-widgets jupyter-button widget-button mod-primary" href="../../files/data_upload/'+file_name+'" download="'+file_name+'">'+button_title+'</a>'
    html_jupyter = '<a style="color: white; border-radius: 3px;" class="lm-Widget p-Widget jupyter-widgets jupyter-button widget-button mod-primary" href="../../data_upload/'+file_name+'" download="'+file_name+'">'+button_title+'</a>'
    display(HTML(html_voila))

In [14]:
def click(b):
    global DATA_UPLOAD
    start_time = time.time()
    output.clear_output()
    with output:
        print("CALCULATION STARTED \n")
    upload(DATA_UPLOAD.value)
    with output:
        print("(1/3) Creating study area zones around the uploaded data.")
    latitude, areas = study_areas(BUFFER_DIST.value)
    
    with output:
        print("===")
        print("(2/3) Matching GPX files to the street graph of the study area.")
    street_all, track_df, route_df = map_matching(latitude, areas, TOLERANCE.value,
                                      MAX_DIST.value, MIN_PROB_NORM.value, MAX_LATTICE_WIDTH.value, INCREASE_MAX_LATTICE_WIDTH.value, OBS_NOISE.value, OBS_NOISE_NE.value, DIST_NOISE.value, DIST_NOISE_NE.value)
    
    with output:
        print("===")
        print("(3/3) Post-processing of the results.")
    
    tracks_gdf, street_freq = post_process(track_df, route_df, street_all, start_time)
    m_light, m_dark = map_vis(tracks_gdf, street_freq)
    with output:
        display(m_light, m_dark)
    
    save_results(street_freq, m_light, m_dark)
    with output:
        display(HTML("<b>Download outputs:</b>"))
        display(HTML("Linear Features (.json, .gpkg)"))
        download_button('lines.json', 'GeoJson')
        download_button('lines.gpkg', 'GeoPackage')
        display(HTML("Web Map (.html)"))
        download_button('light_map.html', 'Light Mode')
        download_button('dark_map.html', 'Dark Mode')
    
    DATA_UPLOAD = clear_upload()
    return DATA_UPLOAD

In [15]:
output = widget.Output()
RUN_BUTTON = widget.Button(description='Run the Tool', button_style='primary')
RUN_BUTTON.on_click(click)
RUN_BUTTON

Button(button_style='primary', description='Run the Tool', style=ButtonStyle())

In [16]:
output

Output()

---

#### The tool is Attachment 2 to Master Thesis.
<a href="https://master-cde.eu/"><img src="https://blog.plus.ac.at/storage/2022/12/copernicus-master-logo.png" alt="Erasmus Funded" title="Erasmus" width="300" /></a>

#### [![GitHub Documnetation](https://upload.wikimedia.org/wikipedia/commons/3/3f/Github-circle_%28CoreUI_Icons_v1.0.0%29.svg)](https://github.com/bsramo144/Thesis-Jupyter.git) [![License: CC BY 4.0](https://img.shields.io/badge/License-CC_BY_4.0-lightgrey.svg)](https://creativecommons.org/licenses/by/4.0/)
#### <a href="https://bsramo144.github.io/">Benjamín Šramo (2023)</a><br/>Palacký University in Olomouc, Univesity of Salzburg