In [1]:
import pandas as pd
import numpy as np
import os
import folium
from branca.colormap import linear
from folium.plugins import HeatMap

# Analyzing City Bike Data in Bergen, Norway (2023)

This notebook uses data from [Bergen bysykkel historiske data](https://bergenbysykkel.no/apne-data/historisk) for january to december 2023.

The dataset contains information such as the start and end times of bike rides, the duration of rides, start and end station IDs, station names, and geographical coordinates.

## Overview of Analysis

1. **Data Preparation**: We begin by loading and preprocessing the data, concatenating monthly data files into a single dataframe.

2. **Visualizing Popular Routes**: We identify and visualize the 100 most traveled routes in Bergen based on bike sharing data. These visualizations provide insights into the most popular bike routes and help understand the patterns of bike usage in the city.

3. **Visualizing Least Traveled Routes**: Similarly, we visualize the 100 least traveled routes, which might indicate underutilized areas or less popular bike routes.

4. **Heatmap of Starting Stations**: We create a heatmap showing the distribution of bike ride start locations across the city. This visualization helps identify areas with high bike usage density.

Throughout the notebook, we use the `folium` library for creating interactive maps to visualize geographical data. 

## Loading the data

In [2]:
filenames = [f"data/{month:02d}.csv" for month in range(1, 13)]

dataframes = []

for filename in filenames:
    if os.path.exists(filename):
        df = pd.read_csv(filename)
        dataframes.append(df)
    else:
        print(f"File not found: {filename}")

combined_df = pd.concat(dataframes, ignore_index=True)

In [3]:
combined_df.head()

Unnamed: 0,started_at,ended_at,duration,start_station_id,start_station_name,start_station_description,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_description,end_station_latitude,end_station_longitude
0,2023-01-01 04:22:50.614000+00:00,2023-01-01 04:33:19.884000+00:00,629,150,Torget,Ved busstopp,60.395878,5.325284,1896,Takhagen på Nordnes,På taket av Nordneshallen,60.398865,5.306411
1,2023-01-01 04:31:33.724000+00:00,2023-01-01 04:33:21.525000+00:00,107,1894,Kong Oscars gate,Ved Kong Oscars gate 36,60.393323,5.330654,1894,Kong Oscars gate,Ved Kong Oscars gate 36,60.393323,5.330654
2,2023-01-01 04:33:41.500000+00:00,2023-01-01 04:44:24.625000+00:00,643,1894,Kong Oscars gate,Ved Kong Oscars gate 36,60.393323,5.330654,809,Skutevikstorget,På sørsiden av torget,60.402229,5.320745
3,2023-01-01 05:55:30.388000+00:00,2023-01-01 05:59:17.644000+00:00,227,797,John Lunds plass,John Lunds plass,60.388247,5.324558,22,Skur 11,Nord for skur 11,60.396384,5.324169
4,2023-01-01 05:59:33.946000+00:00,2023-01-01 06:03:47.999000+00:00,254,22,Skur 11,Nord for skur 11,60.396384,5.324169,812,Hans Hauges gate,Jens Rolfens gate 6,60.401906,5.324748


In [4]:
combined_df.tail()

Unnamed: 0,started_at,ended_at,duration,start_station_id,start_station_name,start_station_description,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_description,end_station_latitude,end_station_longitude
467856,2023-12-31 21:37:56.593000+00:00,2023-12-31 21:46:08.219000+00:00,491,49,Studentsenteret UiB,ved Parkveien / Olaf Ryes vei,60.387198,5.32298,212,Klosteret,Strangehagen,60.395138,5.314947
467857,2023-12-31 21:38:05.944000+00:00,2023-12-31 21:42:35.790000+00:00,269,156,Allehelgens plass,ved Politihuset,60.392651,5.328977,219,Jonsvollkvartalet,Jonsvollsgaten,60.392677,5.317308
467858,2023-12-31 21:52:34.239000+00:00,2023-12-31 22:02:00.744000+00:00,566,2322,Høyteknologisenteret,Utenfor hovedinngang,60.38175,5.331699,36,Torgallmenningen,Valkendorfsgaten / Torgallmenningen,60.392918,5.323669
467859,2023-12-31 22:13:49.387000+00:00,2023-12-31 22:25:26.481000+00:00,697,818,Vågsallmenningen,Ved Husfliden,60.39369,5.326581,5,Moxy Hotellet,På sørsiden av hotellet,60.378783,5.333327
467860,2023-12-31 22:25:37.636000+00:00,2023-12-31 22:38:15.701000+00:00,758,219,Jonsvollkvartalet,Jonsvollsgaten,60.392677,5.317308,640,Damsgårdsveien,Ved inngang Bunnpris,60.381512,5.319435


## Creating a new dataframe and visualizing the most popular routes

Before we can visualize the most popular routes, we first need to create a new dataframe containing the route name along with the starting and ending id. We also want to count the number of times the route is traveled.

In [5]:
combined_df['route_name'] = combined_df['start_station_name'] + " to " + combined_df['end_station_name']

combined_df['start_id'] = combined_df['start_station_id']
combined_df['end_id'] = combined_df['end_station_id']

route_details = combined_df.groupby(['route_name', 'start_id', 'end_id']).size().reset_index(name='count')
route_details_sorted = route_details.sort_values(by='count', ascending=False)
route_details_sorted = pd.merge(route_details_sorted, combined_df[['start_id', 'end_id', 'start_station_latitude', 'start_station_longitude', 'end_station_latitude', 'end_station_longitude']], 
                                how='left', 
                                left_on=['start_id', 'end_id'], 
                                right_on=['start_id', 'end_id'])

route_details_sorted = route_details_sorted.drop_duplicates(subset=['route_name', 'start_id', 'end_id'])

print(route_details_sorted.head())

                                  route_name  start_id  end_id  count  \
0               Torgallmenningen to Nykirken        36     814   1831   
1831      Torgallmenningen to C. Sundts gate        36     138   1629   
3460  Busstasjonen 1 Nord to Møllendalsplass       789     220   1617   
5077            Nykirken to Torgallmenningen       814      36   1531   
6608  Møllendalsplass to Busstasjonen 1 Nord       220     789   1350   

      start_station_latitude  start_station_longitude  end_station_latitude  \
0                  60.392918                 5.323669             60.396949   
1831               60.392918                 5.323669             60.399475   
3460               60.388910                 5.333817             60.379686   
5077               60.396949                 5.313495             60.392918   
6608               60.379686                 5.351994             60.388910   

      end_station_longitude  
0                  5.313495  
1831               5.30876

## Visualizing the 100 most traveled routes for 2023

In [6]:
m = folium.Map(location=[60.392918, 5.323669], zoom_start=13)  # Use the first route's start coordinates as the initial location
colormap = linear.inferno.scale(route_details_sorted['count'].min(), route_details_sorted['count'].max())

for index, row in route_details_sorted.head(100).iterrows():
    start_coords = (row['start_station_latitude'], row['start_station_longitude'])
    end_coords = (row['end_station_latitude'], row['end_station_longitude'])
    route_color = colormap(row['count'])
    folium.PolyLine(locations=[start_coords, end_coords], color=route_color, weight=2, popup=f"Route: {row['route_name']} ({row['count']} counts)").add_to(m)

colormap.add_to(m)
title_html = '<h3 align="center" style="font-size:16px"><b>The 100 most traveled routes for 2023</b></h3>'
m.get_root().html.add_child(folium.Element(title_html))
m

## Visualizing the 100 least traveled routes for 2023

In [7]:
m = folium.Map(location=[60.392918, 5.323669], zoom_start=12)  # Use the first route's start coordinates as the initial location
colormap = linear.YlOrRd_04.scale(route_details_sorted['count'].min(), route_details_sorted['count'].max())

for index, row in route_details_sorted.tail(100).iterrows():
    start_coords = (row['start_station_latitude'], row['start_station_longitude'])
    end_coords = (row['end_station_latitude'], row['end_station_longitude'])
    route_color = colormap(row['count'])
    folium.PolyLine(locations=[start_coords, end_coords], color=route_color, weight=1, popup=f"Route: {row['route_name']} ({row['count']} counts)").add_to(m)

colormap.add_to(m)

title_html = '<h3 align="center" style="font-size:16px"><b>The 100 least traveled routes for 2023</b></h3>'
m.get_root().html.add_child(folium.Element(title_html))
m

## Visualizing the most popular starting stations

In [8]:
m = folium.Map(location=[60.392918, 5.323669], zoom_start=13)
start_coordinates = route_details_sorted[['start_station_latitude', 'start_station_longitude']].values.tolist()
heat_map_layer = HeatMap(start_coordinates)
heat_map_layer.add_to(m)
title_html = '<h3 align="center" style="font-size:16px"><b>Heatmap of Route Start Coordinates</b></h3>'
m.get_root().html.add_child(folium.Element(title_html))
m