# Path data analysis

Test visualizing paths using geohashes with decreasing precision the further they are from the defined center of an area.

## Import data and libraries

In [186]:
import geopandas as gpd
import pandas as pd
import folium
from folium.plugins import HeatMap
from branca.colormap import linear
from geolib import geohash as geolib
import geopy.distance
import random
import math
import json
import numpy as np

In [187]:
center = (46.79381345553877, 7.158862023497898) # Unifr
limits = 2000 # meters

In [188]:
# Defined legend
legend_html = '''
     <div style="position: fixed; 
                 bottom: 50px; left: 50px; width: 160px; height: 300px; 
                 background-color: white; border:2px solid grey; z-index:9999; 
                 font-size:14px;">
     &nbsp; <b>Legend</b> <br>
     &nbsp; Walking &nbsp; <i class="fa fa-square" style="color:#7FC97F"></i><br>
     &nbsp; On Bicycle &nbsp; <i class="fa fa-square" style="color:#BDAED4"></i><br>
     &nbsp; Train &nbsp; <i class="fa fa-square" style="color:#FDBF85"></i><br>
     &nbsp; Bus &nbsp; <i class="fa fa-square" style="color:#FFFF99"></i><br>
     &nbsp; Electric Bus &nbsp; <i class="fa fa-square" style="color:#FFFF99"></i><br>
     &nbsp; Car &nbsp; <i class="fa fa-square" style="color:#386CB0"></i><br>
     &nbsp; Tram &nbsp; <i class="fa fa-square" style="color:#F0027F"></i><br>
     &nbsp; Plane &nbsp; <i class="fa fa-square" style="color:#BE5B17"></i><br>
     &nbsp; Boat &nbsp; <i class="fa fa-square" style="color:#BE5B17"></i><br>
      </div>
     '''

In [189]:
df = gpd.read_file('data/path.csv')

In [190]:
# Keep only columns with coordinates (latitude, longitude and accuracy)
df = df[['moveId', 'latitude', 'longitude', 'accuracy']]

In [191]:
df

Unnamed: 0,moveId,latitude,longitude,accuracy
0,0,46.7938337,7.1589473,15.12600040435791
1,1,46.7945523,7.1573563,22.886999130249023
2,1,46.7952677,7.1565532,46.82500076293945
3,1,46.7963267,7.1553883,17.33099937438965
4,1,46.7968228,7.1547272,10.956999778747559
...,...,...,...,...
3211,425,46.7929895,7.1551063,72.9000015258789
3212,426,46.7929658,7.1549381,32.887001037597656
3213,427,46.7946359,7.1550851,18.374000549316406
3214,428,46.7943201,7.1550194,17.253000259399414


In [192]:
# Compute distance point-point (in meters)
def dist(p1, p2):
    return geopy.distance.geodesic(p1, p2).m

In [209]:
# Convert locations to geohashes with varying precision, depending on their distance from the center
def geohash(lat, lon):
    p = (lat, lon)
    d = dist(center, p)
    if d < limits//2:
        return geolib.encode(lat, lon, precision=8) #8
    elif d < limits:
        return geolib.encode(lat, lon, precision=8) #7
    else:
        return geolib.encode(lat, lon, precision=8) #6


In [210]:
# Add column with geohash
df['geohash'] = df.apply(lambda x: geohash(x['latitude'], x['longitude']), axis=1)

In [211]:
df

Unnamed: 0,moveId,latitude,longitude,accuracy,geohash
0,0,46.7938337,7.1589473,15.12600040435791,u0m44ysc
1,1,46.7945523,7.1573563,22.886999130249023,u0m44yey
2,1,46.7952677,7.1565532,46.82500076293945,u0m44yg4
3,1,46.7963267,7.1553883,17.33099937438965,u0m44z42
7,3,46.7958022,7.1559085,11.0,u0m44yfv
...,...,...,...,...,...
3211,425,46.7929895,7.1551063,72.9000015258789,u0m44y6h
3212,426,46.7929658,7.1549381,32.887001037597656,u0m44y6h
3213,427,46.7946359,7.1550851,18.374000549316406,u0m44ydn
3214,428,46.7943201,7.1550194,17.253000259399414,u0m44ydh


In [212]:
# Filter points, removing the ones in the proximity of start and end locations, if they are not in the same area as the POI.
def filter_points(df_in):
    rows_list = []
    # Start and end locations per each moveId
    start_end = df_in.groupby('moveId').agg({'latitude': ['first', 'last'], 'longitude': ['first', 'last']})

    for index, row in df_in.iterrows():
        moveId = row['moveId']
        p = (row['latitude'], row['longitude'])
        p_start = (start_end.loc[moveId, ('latitude', 'first')], start_end.loc[moveId, ('longitude', 'first')])
        p_end = (start_end.loc[moveId, ('latitude', 'last')], start_end.loc[moveId, ('longitude', 'last')])

        if (dist(p, p_start) > limits/5 and dist(p, p_end) > limits/5) or dist(center,p) < limits/5:
            rows_list.append(row)
    return pd.DataFrame(rows_list)

In [213]:
# Create a new dataframe with the count of points per geohash
df_filtered = filter_points(df)
df_filtered = df_filtered.groupby('geohash').size().reset_index(name='count')

## Create a visualization of the covered routes

In [214]:
## Get a coordinate from a geohash, adding a small random offset to avoid overlapping
def geohash_to_coordinate(geohash):
    lat, lon = geolib.decode(geohash)
    #lat = float(lat) + 0.00000001#(random.random() - 0.5) * 0.00000001
    #lon = float(lon) + 0.00000001#(random.random() - 0.5) * 0.00000001
    return [float(lat), float(lon)]

In [215]:
## Draw a path from start_geohash to end_geohash on a folium map
def draw_path(start_geohash, end_geohash, mean_of_transport, map, weight=1, tooltip=None):
    ## Give a different color to each mean of transport
    colors = {
        'WALKING': '#7FC97F',
        'ON_BICYCLE': '#BDAED4',
        'ELECTRIC_BIKE': '#BDAED4',
        'SCOOTER': '#BDAED4',
        'TRAIN': '#FDBF85',
        'BUS': '#FFFF99',
        'ELECTRIC_BUS': '#FFFF99',
        'CAR': '#386CB0',
        'ELECTRIC_CAR': '#386CB0',
        'TRAM': '#F0027F',
        'PLANE': '#BE5B17',
        'BOAT': '#BE5B17',
        'DETECTION_ERROR': '#000000'
    }
    start_coord = geohash_to_coordinate(start_geohash)
    end_coord = geohash_to_coordinate(end_geohash)

    # Add a small offset to avoid overlapping
    start_coord[0] += (random.random() - 0.5) * 0.001
    start_coord[1] += (random.random() - 0.5) * 0.001
    end_coord[0] += (random.random() - 0.5) * 0.001
    end_coord[1] += (random.random() - 0.5) * 0.001

    #arc_drawer.draw_arc(map, start_coord[0], start_coord[1], end_coord[0], end_coord[1], color=colors[mean_of_transport], weight = weight)

    folium.PolyLine(locations=[start_coord, end_coord], color=colors[mean_of_transport], weight=weight, tooltip=tooltip).add_to(map)


## Create a heatmap of the locations visited

In [216]:
df = filter_points(df)

In [217]:
## Extract from df a list of all start_geohashes and end_geohashes
geohashes = list(df['geohash'])
len(geohashes)

1811

In [218]:
# Convert the list of geohashes to a dataframe
geohashes_df = gpd.GeoDataFrame(geohashes, columns=['geohash'])

In [219]:
# Decode geohashes to latitude and longitude
df_filtered['latitude'] = df_filtered['geohash'].apply(lambda x: geohash_to_coordinate(x)[0])
df_filtered['longitude'] = df_filtered['geohash'].apply(lambda x: geohash_to_coordinate(x)[1])

In [220]:
# Create a base map
m = folium.Map(location=[df_filtered['latitude'].mean(), df_filtered['longitude'].mean()], zoom_start=10)

# Prepare data for the heatmap
heat_data = [[row['latitude'], row['longitude'], row['count']] for index, row in df_filtered.iterrows()]

# Create a heatmap layer
HeatMap(heat_data).add_to(m)

# Display the map
m.save('maps/fine_heatmap.html')
!open -a Arc maps/fine_heatmap.html

## Convert the list of geohashes to a geoJSON object


In [221]:
## Convert geohashes to a heatmap in geojson format
def geohashes_to_heatmap(df):
    # Get the distinct geohashes and their counts from the dataframe
    geohashes = df['geohash'].value_counts()

    # Get the maximum count of any geohash
    max_count = math.log(geohashes.max())

    # Convert the geohashes to a list of lists, each containing the geohash and its count
    geohashes = [[geohash, count] for geohash, count in zip(geohashes.index, geohashes)]
    
    # Create a color scale for the heatmap
    color_scale = linear.RdYlBu_10.scale(1, max_count)

    # Convert geohashes to features for geoJSON
    features = []

    for geohash in geohashes:
        # Get the bounds of the geohash
        bounds = geolib.bounds(geohash[0])
        color = color_scale(math.log(geohash[1]))

        # Create a feature for the geohash
        features.append({
            "type": "Feature",
            "properties": {
                "id": geohash[0],
                "fillColor": color,
                "fillOpacity": 0.6,
                "stroke": False
            },
            "geometry": {
                "type": "Polygon",
                "coordinates": [[
                    [bounds.sw.lon, bounds.sw.lat],
                    [bounds.sw.lon, bounds.ne.lat],
                    [bounds.ne.lon, bounds.ne.lat],
                    [bounds.ne.lon, bounds.sw.lat],
                    [bounds.sw.lon, bounds.sw.lat]
                ]]
            },
        })

    # Convert the geohashes to a heatmap in geojson format
    return {
        "type" : "FeatureCollection",
        "features": features
    }

In [206]:
# Convert the geohashes to a heatmap in geojson format
heatmap = geohashes_to_heatmap(geohashes_df)

# Save GeoJSON with double quotes
with open('results/heatmap.geojson', 'w') as f:
    json.dump(heatmap, f)

In [207]:
# Visualize as a heatmap using Folium
# Create a folium map centered at an initial location
def create_map(geojson, mapname, legend=None):
    m = folium.Map(location=[46.9446011, 7.4143311],zoom_start=10, tiles='https://tiles.stadiamaps.com/tiles/stamen_toner_lite/{z}/{x}/{y}{r}.png?api_key=977802c5-9b2e-4fc3-9254-a9199d0d5d0c', attr='https://stadiamaps.com/')

    # Define a style function to set the color of the polygon
    def style_function(feature):
        return {
            'fillColor': feature["properties"]["fillColor"],  # Change this to the desired color
            'stroke': feature["properties"]["stroke"],
            'fillOpacity': feature["properties"]["fillOpacity"],
        }

    # Add GeoJSON data to the map with the style function
    folium.GeoJson(
        geojson,
        name='Polygon Layer',
        style_function=style_function,
    ).add_to(m)

    # Add Layer Control to the map
    folium.LayerControl().add_to(m)

    if legend:
        m.get_root().html.add_child(folium.Element(legend))

    # Save or display the map
    m.save("maps/"+mapname)


In [208]:
create_map(heatmap, "heatmap.html")

In [185]:
!open -a Arc maps/heatmap.html

In [22]:
# Draw on map points with lat and long of the path coordinates
m = folium.Map(location=[46.9446011, 7.4143311], zoom_start=10, tiles='https://tiles.stadiamaps.com/tiles/stamen_toner_lite/{z}/{x}/{y}{r}.png?api_key=977802c5-9b2e-4fc3-9254-a9199d0d5d0c', attr='https://stadiamaps.com/')
for i, row in df.iterrows():
    folium.CircleMarker(location=[row['latitude'], row['longitude']], radius=1, color='blue').add_to(m)

m.save('maps/points.html')
!open -a Arc maps/points.html

## Get the map of the most used vehicles

In [23]:
## Get the list of geohashes with the corresponding mean of transport
geohashes = df[['start_geohash', 'mean_of_transport']].copy()
## Append the end_geohashes to the list of geohashes
geohashes = gpd.GeoDataFrame(pd.concat([geohashes, df[['end_geohash', 'mean_of_transport']]]))
## Merge start_geohashes and end_geohashes into a single column
geohashes['geohash'] = geohashes['start_geohash'].combine_first(geohashes['end_geohash'])
## Remove the start_geohashes and end_geohashes columns
geohashes = geohashes[['geohash', 'mean_of_transport']]

## Reduce precision of geohashes
geohashes['geohash'] = geohashes['geohash'] #.str[:-1]

geohashes

KeyError: "None of [Index(['start_geohash', 'mean_of_transport'], dtype='object')] are in the [columns]"

In [None]:
## Find the mean of transport that occurs the more often for each geohash and keep only the first one (the most used)
geohashes_df = gpd.GeoDataFrame(geohashes, columns=['geohash', 'mean_of_transport'])
# Get the count of the mean of transport for each geohash
geohashes_df = geohashes_df.groupby(['geohash', 'mean_of_transport']).size().reset_index(name='counts')
# For each geohash, keep only the mean of transport that occurs the most often
geohashes_df = geohashes_df.sort_values('counts', ascending=False).drop_duplicates(['geohash'])
# Remove the counts column
#geohashes_df = geohashes_df[['geohash', 'mean_of_transport']]
geohashes_df

Unnamed: 0,geohash,mean_of_transport,counts
60,u0m44x,WALKING,47
74,u0m468,TRAIN,21
132,u0m714,TRAIN,19
86,u0m470,WALKING,17
80,u0m46b,WALKING,17
...,...,...,...
214,u0nq9p,CAR,1
215,u0nq9r,CAR,1
216,u0nq9w,CAR,1
219,u0nqc0,CAR,1


In [None]:
## Convert geohashes to a heatmap in geojson format
def heatmap_mot(df):
    ## Give a different color to each mean of transport
    colors = {
        'WALKING': '#7FC97F',
        'ON_BICYCLE': '#BDAED4',
        'ELECTRIC_BIKE': '#BDAED4',
        'SCOOTER': '#BDAED4',
        'TRAIN': '#FDBF85',
        'BUS': '#FFFF99',
        'ELECTRIC_BUS': '#FFFF99',
        'CAR': '#386CB0',
        'ELECTRIC_CAR': '#386CB0',
        'TRAM': '#F0027F',
        'PLANE': '#BE5B17',
        'BOAT': '#BE5B17'
    }

    # Convert geohashes to features for geoJSON
    features = []

    for geohash in df:
        # Get the bounds of the geohash
        bounds = geolib.bounds(geohash[0])
        color = colors[geohash[1]]

        # Create a feature for the geohash
        features.append({
            "type": "Feature",
            "properties": {
                "id": geohash[0],
                "fillColor": color,
                "fillOpacity": 0.8,
                "stroke": False
            },
            "geometry": {
                "type": "Polygon",
                "coordinates": [[
                    [bounds.sw.lon, bounds.sw.lat],
                    [bounds.sw.lon, bounds.ne.lat],
                    [bounds.ne.lon, bounds.ne.lat],
                    [bounds.ne.lon, bounds.sw.lat],
                    [bounds.sw.lon, bounds.sw.lat]
                ]]
            },
        })

    # Convert the geohashes to a heatmap in geojson format
    return {
        "type" : "FeatureCollection",
        "features": features
    }

In [24]:
# Convert the means of transport to a heatmap in geojson format
heatmap = heatmap_mot(geohashes_df.values)

# Save GeoJSON with double quotes
with open('results/heatmap_mot.geojson', 'w') as f:
    json.dump(heatmap, f)

NameError: name 'heatmap_mot' is not defined

In [25]:
create_map(heatmap, "heatmap_mot.html", legend_html)

In [26]:
!open -a Arc maps/heatmap_mot.html

## Get the map of the movements (start to end point)

In [27]:
# Now we should create a map with lines between the geohashes, with the color of the line corresponding to the mean of transport
# We should also create a legend for the map

## Create the actual folium map
m = folium.Map(location=[46.9446011, 7.4143311], zoom_start=8, tiles='https://tiles.stadiamaps.com/tiles/stamen_toner_lite/{z}/{x}/{y}{r}.png?api_key=977802c5-9b2e-4fc3-9254-a9199d0d5d0c', attr='https://stadiamaps.com/')
# Draw a path for each movement
for index, row in df.iterrows():
    draw_path(row['start_geohash'], row['end_geohash'], row['mean_of_transport'], m, weight = 2, tooltip=row['participant_id'])


## Add the legend to the map
m.get_root().html.add_child(folium.Element(legend_html))

## Save the map as an html file
m.save('maps/paths_map_mot.html')

!open -a Arc maps/paths_map_mot.html

KeyError: 'start_geohash'