# Movement data analysis

Analyze the movement data collected during the experiment

## Import data and libraries

In [8]:
import geopandas as gpd
import pandas as pd
import folium
from folium.plugins import HeatMap
from branca.colormap import linear
from geolib import geohash as geolib
import json
import math
import random


In [9]:

# Read the data from the csv
df = gpd.read_file('data/Test_movements.csv')
df

Unnamed: 0,participant_id,start_time,end_time,start_geohash,end_geohash,distance(m),mean_of_transport,is_power_saving,geometry
0,119928XAX,2023-11-22 15:07:41+00:00,2023-11-22 15:29:35+00:00,u0m468,u0m714,53753,TRAIN,False,
1,119928XAX,2023-11-22 15:36:51+00:00,2023-11-22 15:49:36+00:00,u0m714,u0m70f,1365,WALKING,False,
2,219935XE1,2023-11-12 19:26:43+00:00,2023-11-12 19:44:21+00:00,u0qh02,u0mgtj,22038,TRAIN,False,
3,219935XE1,2023-11-12 19:49:14+00:00,2023-11-12 19:54:59+00:00,u0mgtj,u0mgth,290,WALKING,False,
4,219935XE1,2023-11-12 20:01:35+00:00,2023-11-12 21:01:40+00:00,u0mgth,u0m714,103394,TRAIN,False,
...,...,...,...,...,...,...,...,...,...
431,2198211RX,2023-12-05 07:46:58+00:00,2023-12-05 08:01:57+00:00,u0m44w,u0m44y,1342,WALKING,False,
432,2198211RX,2023-12-04 21:33:35+00:00,2023-12-04 21:36:53+00:00,u0kcvw,u0kcvm,1347,WALKING,False,
433,2198211RX,2023-12-05 06:13:44+00:00,2023-12-05 06:16:56+00:00,u0kcvw,u0kcvm,1290,WALKING,False,
434,2198211RX,2023-12-05 08:02:11+00:00,2023-12-05 08:06:00+00:00,u0m44y,u0m44y,186,ON_BICYCLE,False,


In [10]:
## Convert distance to int
df['distance(m)'] = df['distance(m)'].astype(int)

In [11]:
## Get a list of all the unique participants in df, including the number of occurrences of each participant
participants = df['participant_id'].value_counts()
participants

participant_id
2198211RX     145
219935XE1     140
2197410XTX     57
119928XAX      49
119963XR1      27
219613XI1      11
219827XRX       7
Name: count, dtype: int64

In [12]:
## Get the list of movements with each mean of transport
df['mean_of_transport'].value_counts()

mean_of_transport
WALKING         225
CAR              86
TRAIN            61
ON_BICYCLE       45
BUS              10
TRAM              6
ELECTRIC_BUS      2
PLANE             1
Name: count, dtype: int64

In [13]:
# Get the distance covered with each mean of transport (in km)
df.groupby('mean_of_transport')['distance(m)'].sum()/1000

mean_of_transport
BUS               18.887
CAR             1002.045
ELECTRIC_BUS       7.264
ON_BICYCLE        98.558
PLANE           1534.269
TRAIN           2294.621
TRAM              14.451
WALKING         1179.913
Name: distance(m), dtype: float64

## Create a visualization of the covered routes

In [28]:
## Get a coordinate from a geohash, adding a small random offset to avoid overlapping
def geohash_to_coordinate(geohash):
    lat, lon = geolib.decode(geohash)
    lat = float(lat) + (random.random() - 0.5) * 0.001
    lon = float(lon) + (random.random() - 0.5) * 0.001
    return [lat, lon]

In [29]:
## Draw a path from start_geohash to end_geohash on a folium map
def draw_path(start_geohash, end_geohash, mean_of_transport, map):
    ## Give a different color to each mean of transport
    colors = {
        'WALKING': '#7FC97F',
        'ON_BICYCLE': '#BDAED4',
        'TRAIN': '#FDBF85',
        'BUS': '#FFFF99',
        'ELECTRIC_BUS': '#FFFF99',
        'CAR': '#386CB0',
        'TRAM': '#F0027F',
        'PLANE': '#BE5B17'
    }
    start_coord = geohash_to_coordinate(start_geohash)
    end_coord = geohash_to_coordinate(end_geohash)

    folium.PolyLine(locations=[start_coord, end_coord], color=colors[mean_of_transport]).add_to(map)


In [30]:
## Create the actual folium map
m = folium.Map(location=[46.9446011, 7.4143311], zoom_start=5)

## Draw a path for each movement
for index, row in df.iterrows():
    draw_path(row['start_geohash'], row['end_geohash'], row['mean_of_transport'], m)

## Save the map as an html file
m.save('paths_map.html')


## Create a heatmap of the locations visited

In [193]:
## Extract from df a list of all start_geohashes and end_geohashes
geohashes = list(df['start_geohash']) + list(df['end_geohash'])
len(geohashes)

872

In [194]:
# If we want, we can remove some characters from the end of each geohash to reduce the precision
geohashes = [geohash[:-1] for geohash in geohashes]

In [195]:
# Convert the list of geohashes to a dataframe
geohashes_df = gpd.GeoDataFrame(geohashes, columns=['geohash'])

## Convert the list of geohashes to a geoJSON object


In [196]:
## Convert geohashes to a heatmap in geojson format
def geohashes_to_heatmap(df):
    # Get the distinct geohashes and their counts from the dataframe
    geohashes = df['geohash'].value_counts()

    # Get the maximum count of any geohash
    max_count = math.log(geohashes.max())

    # Convert the geohashes to a list of lists, each containing the geohash and its count
    geohashes = [[geohash, count] for geohash, count in zip(geohashes.index, geohashes)]
    
    # Create a color scale for the heatmap
    color_scale = linear.RdYlBu_10.scale(1, max_count)

    # Convert geohashes to features for geoJSON
    features = []

    for geohash in geohashes:
        # Get the bounds of the geohash
        bounds = geolib.bounds(geohash[0])
        color = color_scale(math.log(geohash[1]))

        # Create a feature for the geohash
        features.append({
            "type": "Feature",
            "properties": {
                "id": geohash[0],
                "fillColor": color,
                "fillOpacity": 0.6,
                "stroke": False
            },
            "geometry": {
                "type": "Polygon",
                "coordinates": [[
                    [bounds.sw.lon, bounds.sw.lat],
                    [bounds.sw.lon, bounds.ne.lat],
                    [bounds.ne.lon, bounds.ne.lat],
                    [bounds.ne.lon, bounds.sw.lat],
                    [bounds.sw.lon, bounds.sw.lat]
                ]]
            },
        })

    # Convert the geohashes to a heatmap in geojson format
    return {
        "type" : "FeatureCollection",
        "features": features
    }

In [197]:
# Convert the geohashes to a heatmap in geojson format
heatmap = geohashes_to_heatmap(geohashes_df)

# Save GeoJSON with double quotes
with open('results/heatmap.geojson', 'w') as f:
    json.dump(heatmap, f)

In [198]:
# Visualize as a heatmap using Folium
# Create a folium map centered at an initial location
def create_map(geojson, mapname, legend=None):
    m = folium.Map(location=[46.9446011, 7.4143311], zoom_start=5)

    # Define a style function to set the color of the polygon
    def style_function(feature):
        return {
            'fillColor': feature["properties"]["fillColor"],  # Change this to the desired color
            'stroke': feature["properties"]["stroke"],
            'fillOpacity': feature["properties"]["fillOpacity"],
        }

    # Add GeoJSON data to the map with the style function
    folium.GeoJson(
        geojson,
        name='Polygon Layer',
        style_function=style_function,
    ).add_to(m)

    # Add Layer Control to the map
    folium.LayerControl().add_to(m)

    if legend:
        m.get_root().html.add_child(folium.Element(legend))

    # Save or display the map
    m.save("maps/"+mapname)


In [199]:
create_map(heatmap, "heatmap.html")

## Get the map of the most used vehicles

In [208]:
## Get the list of geohashes with the corresponding mean of transport
geohashes = df[['start_geohash', 'mean_of_transport']].copy()
## Append the end_geohashes to the list of geohashes
geohashes = gpd.GeoDataFrame(pd.concat([geohashes, df[['end_geohash', 'mean_of_transport']]]))
## Merge start_geohashes and end_geohashes into a single column
geohashes['geohash'] = geohashes['start_geohash'].combine_first(geohashes['end_geohash'])
## Remove the start_geohashes and end_geohashes columns
geohashes = geohashes[['geohash', 'mean_of_transport']]

## Reduce precision of geohashes
geohashes['geohash'] = geohashes['geohash'].str[:-1]

geohashes

Unnamed: 0,geohash,mean_of_transport
0,u0m46,TRAIN
1,u0m71,WALKING
2,u0qh0,TRAIN
3,u0mgt,WALKING
4,u0mgt,TRAIN
...,...,...
431,u0m44,WALKING
432,u0kcv,WALKING
433,u0kcv,WALKING
434,u0m44,ON_BICYCLE


In [209]:
## Find the mean of transport that occurs the more often for each geohash and keep only the first one (the most used)
geohashes_df = gpd.GeoDataFrame(geohashes, columns=['geohash', 'mean_of_transport'])
# Get the count of the mean of transport for each geohash
geohashes_df = geohashes_df.groupby(['geohash', 'mean_of_transport']).size().reset_index(name='counts')
# For each geohash, keep only the mean of transport that occurs the most often
geohashes_df = geohashes_df.sort_values('counts', ascending=False).drop_duplicates(['geohash'])
# Remove the counts column
#geohashes_df = geohashes_df[['geohash', 'mean_of_transport']]
geohashes_df

Unnamed: 0,geohash,mean_of_transport,counts
47,u0m44,WALKING,97
25,u0kcu,CAR,83
71,u0m70,WALKING,63
77,u0m71,WALKING,33
30,u0kcv,WALKING,31
...,...,...,...
88,u0nqd,CAR,1
89,u0nqv,TRAIN,1
90,u0nrf,CAR,1
94,u0q1e,TRAIN,1


In [210]:
## Convert geohashes to a heatmap in geojson format
def heatmap_mot(df):
    ## Give a different color to each mean of transport
    colors = {
        'WALKING': '#7FC97F',
        'ON_BICYCLE': '#BDAED4',
        'TRAIN': '#FDBF85',
        'BUS': '#FFFF99',
        'ELECTRIC_BUS': '#FFFF99',
        'CAR': '#386CB0',
        'TRAM': '#F0027F',
        'PLANE': '#BE5B17'
    }

    # Convert geohashes to features for geoJSON
    features = []

    for geohash in df:
        # Get the bounds of the geohash
        bounds = geolib.bounds(geohash[0])
        color = colors[geohash[1]]

        # Create a feature for the geohash
        features.append({
            "type": "Feature",
            "properties": {
                "id": geohash[0],
                "fillColor": color,
                "fillOpacity": 0.8,
                "stroke": False
            },
            "geometry": {
                "type": "Polygon",
                "coordinates": [[
                    [bounds.sw.lon, bounds.sw.lat],
                    [bounds.sw.lon, bounds.ne.lat],
                    [bounds.ne.lon, bounds.ne.lat],
                    [bounds.ne.lon, bounds.sw.lat],
                    [bounds.sw.lon, bounds.sw.lat]
                ]]
            },
        })

    # Convert the geohashes to a heatmap in geojson format
    return {
        "type" : "FeatureCollection",
        "features": features
    }

In [211]:
# Convert the means of transport to a heatmap in geojson format
heatmap = heatmap_mot(geohashes_df.values)

# Save GeoJSON with double quotes
with open('results/heatmap_mot.geojson', 'w') as f:
    json.dump(heatmap, f)

In [212]:
legend_html = '''
     <div style="position: fixed; 
                 bottom: 50px; left: 50px; width: 160px; height: 300px; 
                 background-color: white; border:2px solid grey; z-index:9999; 
                 font-size:14px;">
     &nbsp; <b>Legend</b> <br>
     &nbsp; Walking &nbsp; <i class="fa fa-square" style="color:#7FC97F"></i><br>
     &nbsp; On Bicycle &nbsp; <i class="fa fa-square" style="color:#BDAED4"></i><br>
     &nbsp; Train &nbsp; <i class="fa fa-square" style="color:#FDBF85"></i><br>
     &nbsp; Bus &nbsp; <i class="fa fa-square" style="color:#FFFF99"></i><br>
     &nbsp; Electric Bus &nbsp; <i class="fa fa-square" style="color:#FFFF99"></i><br>
     &nbsp; Car &nbsp; <i class="fa fa-square" style="color:#386CB0"></i><br>
     &nbsp; Tram &nbsp; <i class="fa fa-square" style="color:#F0027F"></i><br>
     &nbsp; Plane &nbsp; <i class="fa fa-square" style="color:#BE5B17"></i><br>
      </div>
     '''
create_map(heatmap, "heatmap_mot.html", legend_html)