In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import pickle
import requests

from bokeh.io import show
from bokeh.models import ColumnDataSource, GMapOptions, HoverTool, GlyphRenderer, Legend
from bokeh.plotting import gmap, figure
from bokeh.layouts import column

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

# Oslo Bysykkel scheme quicklook

A quick look at load balancing in Oslo based on a day worth of station capacity data collected from their api.

I ran the counterpart script `collect_data.py` every 10 minutes for a 24 hour period in order to look for which stations were emptied and refilled.

In [2]:
secrets = dict()
with open('secrets', 'r') as f:
    for line in f:
        l = line.strip('\n').split('=')
        secrets[l[0]] = l[1]
secrets.keys()

dict_keys(['GMAPS_API_KEY', 'OSLO_BYSYKKEL_API_KEY'])

In [3]:
base_url = 'https://oslobysykkel.no/api/v1'
auth_header = {'Client-Identifier': secrets['OSLO_BYSYKKEL_API_KEY']}

r = requests.get(f'{base_url}/stations', headers=auth_header)

stations = r.json()['stations']

In [4]:
capacity = dict()
for s in stations:
    capacity[s['id']] = s['number_of_locks']
    
station_names = dict()
for s in stations:
    station_names[s['id']] = s['title']
    
station_locs = dict()
for s in stations:
    station_locs[s['id']] = s['center']

Get the records and format them as needed. This means attaching the station details to each record and correcting for timezone.

In [5]:
df = pd.read_csv('oslo/oslo_sykkel_records.csv', index_col=0)
df = df.drop(['overflow_capacity'], axis=1)
df['date'] = pd.to_datetime(df['date']) + timedelta(seconds = 60*60*2)
df['lat'] = df['id'].apply(lambda x: station_locs.get(x, {'latitude': np.nan})['latitude'])
df['lon'] = df['id'].apply(lambda x: station_locs.get(x, {'longitude': np.nan})['longitude'])
df['name'] = df['id'].map(station_names)
df['capacity'] = df['id'].map(capacity)

ids = set(df['id'])
print(len(ids))
df.head()

299


Unnamed: 0,bikes,id,locks,date,lat,lon,name,capacity
0,0,177,28,2018-09-04 12:49:52,59.915629,10.777664,Tøyenparken,29.0
1,15,166,4,2018-09-04 12:49:52,59.943748,10.760195,Bjølsen Studentby,21.0
2,0,178,12,2018-09-04 12:49:52,59.929838,10.711223,Colosseum Kino,12.0
3,1,167,16,2018-09-04 12:49:52,59.920565,10.734274,Pilestredet Park Studenthus,18.0
4,1,173,6,2018-09-04 12:49:52,59.916847,10.758148,Legevakten,10.0


Capacity seems to shift. bikes + locks changes across the day for a single station, eg id=177 goes from 28 to 24 to 25. Sometimes will hit maximum gained from the /stations endpoint.

### Looking for refills

We can look for the change in number of bikes between each record and use that to guess if there was a refill.

There's a chance of course that finer measurements are needed and I should have used every 2 or 5 minutes instead of 10 but let's see.

In [6]:
df = df.sort_values(by=['id', 'date'])

df['diff'] = df['bikes'] - df['bikes'].shift(1)

earliest_date = df['date'].min()
df = df[df['date'] != earliest_date].reset_index(drop=True)

df.head()

Unnamed: 0,bikes,id,locks,date,lat,lon,name,capacity,diff
0,4,157,26,2018-09-04 13:00:01,59.91562,10.762248,Nylandsveien,30.0,0.0
1,4,157,26,2018-09-04 13:09:58,59.91562,10.762248,Nylandsveien,30.0,0.0
2,3,157,27,2018-09-04 13:19:55,59.91562,10.762248,Nylandsveien,30.0,-1.0
3,4,157,26,2018-09-04 13:29:51,59.91562,10.762248,Nylandsveien,30.0,1.0
4,4,157,26,2018-09-04 13:40:01,59.91562,10.762248,Nylandsveien,30.0,0.0


This is a reasonably busy station in the centre of town. The -22 and the late-night -10 could well be load-balancing. We can use something like +-8 for our cutoff for now.

In [7]:
df[(df['id'] == 389) & (abs(df['diff']) >= 4)]

Unnamed: 0,bikes,id,locks,date,lat,lon,name,capacity,diff
32955,37,389,1,2018-09-04 14:49:57,59.911881,10.747759,Arkaden Sør,39.0,5.0
32966,15,389,22,2018-09-04 16:39:58,59.911881,10.747759,Arkaden Sør,39.0,-22.0
32979,10,389,26,2018-09-04 18:50:00,59.911881,10.747759,Arkaden Sør,39.0,-4.0
33011,6,389,31,2018-09-05 00:09:52,59.911881,10.747759,Arkaden Sør,39.0,-7.0
33025,2,389,35,2018-09-05 02:29:59,59.911881,10.747759,Arkaden Sør,39.0,-4.0
33059,8,389,30,2018-09-05 08:09:57,59.911881,10.747759,Arkaden Sør,39.0,4.0
33065,19,389,19,2018-09-05 09:09:51,59.911881,10.747759,Arkaden Sør,39.0,4.0
33078,22,389,16,2018-09-05 11:19:52,59.911881,10.747759,Arkaden Sør,39.0,-10.0


In [8]:
df.sort_values(by="diff")

Unnamed: 0,bikes,id,locks,date,lat,lon,name,capacity,diff
32966,15,389,22,2018-09-04 16:39:58,59.911881,10.747759,Arkaden Sør,39.0,-22.0
39214,17,465,22,2018-09-04 16:39:58,59.910820,10.730321,Rådhusbrygge 4,40.0,-21.0
9294,15,222,19,2018-09-04 23:40:01,59.922414,10.758127,Olaf Ryes plass,35.0,-20.0
97,10,157,20,2018-09-05 05:09:55,59.915620,10.762248,Nylandsveien,30.0,-20.0
31601,0,379,20,2018-09-05 01:49:59,59.914837,10.753622,Bernt Ankers gate,25.0,-20.0
452,7,160,42,2018-09-04 17:19:59,59.910215,10.751687,Sjøsiden vest,50.0,-19.0
9261,14,222,19,2018-09-04 18:09:58,59.922414,10.758127,Olaf Ryes plass,35.0,-19.0
12780,14,247,30,2018-09-04 13:00:01,59.910091,10.752138,Sjøsiden øst,45.0,-19.0
11075,23,234,18,2018-09-05 12:30:00,59.913731,10.736032,Spikersuppa Vest,42.0,-18.0
28752,10,359,18,2018-09-05 00:19:56,59.911609,10.761874,Tøyenbekken,30.0,-18.0


In [9]:
map_options = GMapOptions(lat=59.914, lng=10.751, map_type="roadmap", zoom=11)

df_added = df[df['diff'] > 8]
df_removed = df[df['diff'] < -8]

added_data = dict(
    lat=list(df_added['lat'].values),
    lon=list(df_added['lon'].values),
)
removed_data = dict(
    lat=list(df_removed['lat'].values),
    lon=list(df_removed['lon'].values),
)

added_source = ColumnDataSource(added_data)
removed_source = ColumnDataSource(removed_data)


plot = gmap(secrets['GMAPS_API_KEY'], map_options, title="Oslo")

plot.circle(x="lon", y="lat", size=15, fill_color="blue", fill_alpha=0.8, source=added_source)
plot.circle(x="lon", y="lat", size=15, fill_color="red", fill_alpha=0.8, source=removed_source)


dumx = range(10)
dumfig = figure(outline_line_alpha=0, plot_height=200,
                plot_width=300, toolbar_location=None)
dumfig.circle(dumx, dumx, color='blue',
              size=10, legend='Bikes added')
dumfig.circle(dumx, dumx, color='red',
              size=10, legend='Bikes removed')

dumfig.renderers = [rend for rend in dumfig.renderers if (
    type(rend) == GlyphRenderer or type(rend) == Legend)]
dumfig.renderers[0].border_line_alpha = 1
dumfig.renderers[0].spacing = 6
dumfig.renderers[0].location = 'top_left'
for rend in dumfig.renderers:
    if type(rend) == GlyphRenderer:
        rend.visible = False
        
show(column(plot, dumfig))

Produces a live version of:

![title](oslo/oslo_load_balancing.png)