## Visualize path points

In [12]:
import geopandas as gpd
import pandas as pd
import folium
from folium.plugins import HeatMap
from geolib import geohash as geolib
import numpy as np
import pydeck as pdk
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import geohash2
from sklearn.neighbors import BallTree
from shapely.geometry import Polygon
import json
import libgeohash as gh

In [13]:
df = gpd.read_file('data/pathpoints.csv')

In [14]:
# Keep only relevant data
df = df[['time_range', 'day_of_week', 'month', 'mode_of_transport', 'geohash', 'direction']]
df[:5]

Unnamed: 0,time_range,day_of_week,month,mode_of_transport,geohash,direction
0,18-21,TUESDAY,8,BUS,u0m70dcm,EAST
1,18-21,TUESDAY,8,BUS,u0m70ddq,NORTH_WEST
2,18-21,TUESDAY,8,BUS,u0m70dfh,EAST
3,6-9,WEDNESDAY,8,ON_BICYCLE,u0m70fqv,NORTH_EAST
4,6-9,WEDNESDAY,8,ON_BICYCLE,u0m70fqk,EAST


In [15]:
# Get unique mode_of_transport values
df['mode_of_transport'].unique()

array(['BUS', 'ON_BICYCLE', 'WALKING', 'TRAIN', 'CAR', 'TRAM', 'BOAT'],
      dtype=object)

In [16]:
# Keep only cars
#df = df[df['mode_of_transport'].isin(['CAR'])]


In [17]:
## Get a coordinate from a geohash, adding a small random offset to avoid overlapping
def geohash_to_coordinate(geohash):
    lat, lon = geohash2.decode(geohash)
    #lat = float(lat) + 0.00000001#(random.random() - 0.5) * 0.00000001
    #lon = float(lon) + 0.00000001#(random.random() - 0.5) * 0.00000001
    return [float(lat), float(lon)]

In [18]:
# Define plasma color map
plasma_colormap = [plt.cm.plasma(i)[:3] for i in range(256)]  # Get 256 RGB values
plasma_colormap = [[int(r*255), int(g*255), int(b*255)] for r, g, b in plasma_colormap]  # Convert to 0-255

In [19]:
df_grouped = df

In [20]:
# Group by geohashes
df_grouped = df.groupby('geohash').size().reset_index(name='count')
df_grouped['count'] = df_grouped['count']

In [21]:
# Heatmap using pydeck
df_grouped['coordinates'] = df_grouped['geohash'].apply(geohash_to_coordinate)
df_grouped['latitude'] = df_grouped['coordinates'].apply(lambda x: x[0])
df_grouped['longitude'] = df_grouped['coordinates'].apply(lambda x: x[1])

In [22]:
layer = pdk.Layer(
    'HeatmapLayer',
    df_grouped,
    get_position=['longitude', 'latitude'],
    get_weight='count',
    color_range=plasma_colormap,  # Apply Plasma colormap
    aggregation = 'SUM',
    opacity=0.8,
    pickable=True
)

view_state = pdk.ViewState(
    latitude=df_grouped['latitude'].median(),
    longitude=df_grouped['longitude'].median(),
    zoom=8,
    min_zoom=5,
    max_zoom=15,
)

r = pdk.Deck(
    layers=[layer],
    initial_view_state=view_state
)

r.to_html('maps/pydeck_heatmap.html')

!open -a Arc maps/pydeck_heatmap.html


In [None]:
# TODO: Visualize movements with arrows!
# Translate NORTH, SOUTH, EAST, WEST etc to angles
def translate_to_angle(direction):
    if direction == 'NORTH':
        return 0
    elif direction == 'NORTH_EAST':
        return 45
    elif direction == 'EAST':
        return 90
    elif direction == 'SOUTH_EAST':
        return 135
    elif direction == 'SOUTH':
        return 180
    elif direction == 'SOUTH_WEST':
        return 225
    elif direction == 'WEST':
        return 270
    elif direction == 'NORTH_WEST':
        return 315
    else:
        return 0

In [None]:
# Transform direction to an angle (e.g., NORTH = 0, EAST = 90, SOUTH = 180, WEST = 270)
df['angle'] = df['direction'].apply(lambda x: translate_to_angle(x))

In [None]:
df

Unnamed: 0,time_range,day_of_week,month,mode_of_transport,geohash,direction,angle
0,18-21,TUESDAY,8,BUS,u0m70dcm,EAST,90
1,18-21,TUESDAY,8,BUS,u0m70ddq,NORTH_WEST,315
2,18-21,TUESDAY,8,BUS,u0m70dfh,EAST,90
3,6-9,WEDNESDAY,8,ON_BICYCLE,u0m70fqv,NORTH_EAST,45
4,6-9,WEDNESDAY,8,ON_BICYCLE,u0m70fqk,EAST,90
...,...,...,...,...,...,...,...
283800,9-10,TUESDAY,2,WALKING,srb100dp,EAST,90
283801,9-10,TUESDAY,2,WALKING,srb100bm,SOUTH_EAST,135
283802,9-10,TUESDAY,2,WALKING,srb100eq,EAST,90
283803,9-10,TUESDAY,2,WALKING,srb100jy,SOUTH,180


In [None]:
# Reduce precision of geohashes by removing last char
#df['geohash'] = df['geohash'].apply(lambda x: x[:-1])

In [None]:
# Transform time to morning and afternoon
df['time'] = df['time_range'].apply(lambda x: int(x.split('-')[0]))
df['time'] = df['time'].apply(lambda x: 'morning' if x < 12 else 'afternoon')

In [None]:
# Function to compute average of angles
def average_angle(angles):
    x = 0
    y = 0
    for angle in angles:
        x += np.cos(np.radians(angle))
        y += np.sin(np.radians(angle))
    return np.degrees(np.arctan2(y, x))

In [None]:
# Function to compute weighted average of angles, depending on the distance
def weighted_average_angle(angles, weights):
    x = 0
    y = 0
    for angle, weight in zip(angles, weights):
        x += weight * np.cos(np.radians(angle))
        y += weight * np.sin(np.radians(angle))
    return np.degrees(np.arctan2(y, x))

In [None]:
# Group by time (morning, afternoon) and geohash, computing the average angle and adding a count column
df_grouped = df.groupby(['geohash', 'time']).agg({'angle': average_angle, 'geohash': 'count'}).rename(columns={'geohash': 'count'}).reset_index()

In [None]:
# Decode geohashes to latitude and longitude
df_grouped['latitude'] = df_grouped['geohash'].apply(lambda x: geohash_to_coordinate(x)[0])
df_grouped['longitude'] = df_grouped['geohash'].apply(lambda x: geohash_to_coordinate(x)[1])

In [None]:
df_grouped

Unnamed: 0,geohash,time,angle,count,latitude,longitude
0,9q9hquk1,afternoon,90.0,2,37.333174,-122.041454
1,9q9hquk2,afternoon,90.0,2,37.333002,-122.041111
2,9q9hquk4,afternoon,135.0,3,37.333345,-122.041454
3,9q9hquk8,afternoon,90.0,3,37.333002,-122.040768
4,9q9hqukb,afternoon,90.0,2,37.333002,-122.040424
...,...,...,...,...,...,...
181543,u33dk28x,afternoon,135.0,1,52.430792,13.547001
181544,u33dk29d,afternoon,135.0,1,52.429934,13.548374
181545,u33dk2h7,afternoon,135.0,1,52.427359,13.552151
181546,u33dk40s,afternoon,135.0,1,52.438517,13.536015


In [None]:
# Create line based on coordinate and angle to represent direction, and use count for length
def create_line(row, max_count, max_length=0.0005):
    leng = max(row['count']/max_count, 0.05) * max_length
    return [(row['latitude'], row['longitude']), (row['latitude'] + leng * np.cos(np.radians(row['angle'])), row['longitude'] +  leng * np.sin(np.radians(row['angle'])))]

In [None]:
# Create flow map
def create_flow(x1, y1, x2, y2, spacing, original_data, kernel_size=0.1):
    # Create a grid of geohashes
    geohashes = [geolib.encode(lat, lon, precision=8) for lat in np.arange(x1, x2, spacing) for lon in np.arange(y1, y2, spacing)]
    geohashes = list(set(geohashes))

    # Create a dataframe with the grid of geohashes
    geohashes_df = gpd.GeoDataFrame(geohashes, columns=['geohash'])
    geohashes_df['latitude'] = geohashes_df['geohash'].apply(lambda x: geohash_to_coordinate(x)[0])
    geohashes_df['longitude'] = geohashes_df['geohash'].apply(lambda x: geohash_to_coordinate(x)[1])

    # Compute flow for each geohash using a kernel
    for index, row in geohashes_df.iterrows():
        # Get all points in the vicinity of the geohash
        points = original_data[(original_data['latitude'] >= row['latitude'] - kernel_size) & 
                               (original_data['latitude'] <= row['latitude'] + kernel_size) & 
                               (original_data['longitude'] >= row['longitude'] - kernel_size) & 
                               (original_data['longitude'] <= row['longitude'] + kernel_size)]
        if not points.empty:
            # Compute the average angle and count, weighted by the distance to the geohash
            distances = np.sqrt((points['latitude'] - row['latitude'])**2 + (points['longitude'] - row['longitude'])**2)
            weights = np.where(distances == 0, 1, 1 / distances)  # Handle case where distance is 0
            weights /= weights.sum()

            angle = weighted_average_angle(points['angle'], weights)
            count = np.sum(weights * points['count'])

            geohashes_df.at[index, 'angle'] = angle
            geohashes_df.at[index, 'count'] = count
        else:
            geohashes_df.at[index, 'angle'] = 0
            geohashes_df.at[index, 'count'] = 0
            
    return geohashes_df


In [None]:
# Create a map only a part of switzerland, where we create a grid with arrows, similar to a wind map or a flow map, where flows depend on the count and direction of the existing data points
# Bern
x1 = 46.92
y1 = 7.38
x2 = 46.96
y2 = 7.51
spacing = 0.001
k = 0.005 # Kernel size
max_length = 0.0005
'''
# Switzerland
x1 = 45.69
y1 = 5.4
x2 = 47.47
y2 = 10.71
spacing = 0.01
k = 0.05 # Kernel size
max_length = 0.01
'''

m = folium.Map(location=[(x1+x2)/2, (y1+y2)/2], zoom_start=14, tiles="cartodb positron")

flow_morning = create_flow(x1, y1, x2, y2, spacing, df_grouped[df_grouped['time'] == 'morning'], kernel_size=k)
flow_afternoon = create_flow(x1, y1, x2, y2, spacing, df_grouped[df_grouped['time'] == 'afternoon'], kernel_size=k)

# Create a line for each geohash in morning and afternoon
max_count_morning = flow_morning['count'].max()
for index, row in flow_morning.iterrows():
    if not np.isnan(row['angle']):
        folium.PolyLine(create_line(row, max_count_morning, max_length), color='blue', weight=1).add_to(m)

max_count_afternoon = flow_afternoon['count'].max()
for index, row in flow_afternoon.iterrows():
    if not np.isnan(row['angle']):
        folium.PolyLine(create_line(row, max_count_afternoon, max_length), color='red', weight=1).add_to(m)

# Display the map
m.save('maps/pathpoints_flowmap.html')
!open -a Arc maps/pathpoints_flowmap.html

In [22]:
# Display the map with arrows for morning and afternoon in different colors
m = folium.Map(location=[46.5, 7.2], zoom_start=10, tiles="cartodb positron")
max_count = df_grouped['count'].max()

# Add arrows for morning
for index, row in df_grouped[df_grouped['time'] == 'morning'].iterrows():
    folium.PolyLine(create_line(row, max_count), color='blue', weight=2, opacity=1).add_to(m)

# Add arrows for afternoon
for index, row in df_grouped[df_grouped['time'] == 'afternoon'].iterrows():
    folium.PolyLine(create_line(row, max_count), color='red', weight=2, opacity=1).add_to(m)

# Display the map
m.save('maps/pathpoints_arrows.html')
!open -a Arc maps/pathpoints_arrows.html