In [1]:
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import folium
import branca.colormap as cm
import geopandas as gpd
from geopandas.tools import sjoin
from shapely.geometry import Point, Polygon

# Import trip and station data 

In [2]:
bremen = pd.read_csv('../data/processed/bremen.csv')
bremen.head(2)

Unnamed: 0,bike,identification,start_time,end_time,weekend,duration_sec,start_lng,start_lat,end_lng,end_lat,...,humidity_2m,dew_point_2m,max_at_2m,mean_speed_h/s,direction_degree_x,max_m/s,min_mean_m/s,max_mean_m/s,direction_degree_y,min
0,20507,14776184,2019-04-21 17:34:00,2019-04-21 17:47:00,1,780.0,8.884911,53.078923,8.899906,53.078711,...,24.8,-0.1,21.1,1.8,120.0,2.1,1.0,2.2,110.0,0.0
1,20507,10278649,2019-04-23 17:55:00,2019-04-23 20:13:00,0,8280.0,8.884911,53.078923,8.884911,53.078923,...,43.2,5.9,18.7,6.7,110.0,11.7,3.6,7.3,110.0,0.0


In [3]:
stations = json_normalize(pd.read_json('../data/external/station_information.json').iloc[0,2])
stations.head(2)

Unnamed: 0,station_id,name,short_name,lat,lon,region_id,capacity
0,2351602,WESER-KURIER | Pressehaus,2910,53.076722,8.803943,379,5.0
1,7872668,Am Dobben,2925,53.078194,8.82325,379,


In [4]:
stations.isna().sum()

station_id     0
name           0
short_name     0
lat            0
lon            0
region_id      0
capacity      66
dtype: int64

### Check on the value range of station capacity

In [5]:
stations.capacity.unique()

array([ 5., nan,  6.,  8.,  4.])

In [6]:
# Get stations with maximum capacity
stations_max = stations[stations.capacity == 8]
stations_max

Unnamed: 0,station_id,name,short_name,lat,lon,region_id,capacity
52,8806546,Park Hotel,2976,53.08905,8.822274,379,8.0
53,8806578,GOP / Steigenberger,2977,53.082036,8.789476,379,8.0


In [7]:
# Get stations with minimum capacity
stations_min = stations[stations.capacity == 4]
stations_min

Unnamed: 0,station_id,name,short_name,lat,lon,region_id,capacity
70,11119973,hanseWasser Bremen GmbH,2913,53.099552,8.759247,379,4.0


In [8]:
# Drop stations with minimum and maximum capacity
stations = stations[(stations.station_id != '11119973') & (stations.station_id != '8806546') & (stations.station_id != '8806578')]

In [9]:
m = folium.Map(location=[53.06703,8.76410], zoom_start=11)

for index, row in stations.iterrows():
    
    station_name = str(row['name'])
    station_id = row.station_id
        
    station_info = "Stationsname: {}<br>Station ID: {}".format(station_name,station_id)
    popup = folium.Popup(station_info,max_width=450)
    
    folium.Circle(
        location=[row['lat'],row['lon']],
        popup=popup,
        radius=150,
        color='royalblue',
        fill=True,
        fill_color='royalblue'
    ).add_to(m)

# Add stations with maximum capacity
for index, row in stations_max.iterrows():
    
    station_name = str(row['name'])
    station_id = row.station_id
        
    station_info = "Station with largest capacity of 8 bikes<br>Stationsname: {}<br>Station ID: {}".format(station_name,station_id)
    popup = folium.Popup(station_info,max_width=450)
    
    folium.Circle(
        location=[row['lat'],row['lon']],
        popup=popup,
        radius=150,
        color='indianred',
        fill=True,
        fill_color='indianred'
    ).add_to(m)
    
# Add stations with minimum capacity
for index, row in stations_min.iterrows():
    
    station_name = str(row['name'])
    station_id = row.station_id
        
    station_info = "Station with lowest capacity of 4 bikes<br>Stationsname: {}<br>Station ID: {}".format(station_name,station_id)
    popup = folium.Popup(station_info,max_width=450)
    
    folium.Circle(
        location=[row['lat'],row['lon']],
        popup=popup,
        radius=150,
        color='green',
        fill=True,
        fill_color='green'
    ).add_to(m)
                  
m

## Functions to visualize number of bikes at stations at specific point in time

In [10]:
def get_nearest_timestamp(df, timestamp):
    
    index = np.argmin(np.abs(df.index.to_pydatetime() - timestamp.to_pydatetime()))
    
    return index


def get_bike_availability_df(timestamp):
    
    # Import necessary data
    bremen = pd.read_csv('../data/processed/bremen_cleaned.csv')
    bremen = bremen[['datetime','p_uid','p_bikes','p_lat','p_lng']]
    bremen['datetime'] = pd.to_datetime(bremen['datetime'])
    bremen.set_index('datetime', inplace=True)
    stations = json_normalize(pd.read_json('../data/external/station_information.json').iloc[0,2])
    
    # Create columns for bikes available and timestamp of that information
    stations['bikes_available'] = 0
    stations['bikes_available_timestamp'] = timestamp


    for index, row in stations.iterrows():

        station_id = int(row.station_id)

        if station_id not in bremen['p_uid'].unique():
            bikes_available = 0

        else:

            i = get_nearest_timestamp(bremen[bremen['p_uid'] == station_id], timestamp)
            
            bikes_available = bremen.iloc[i,1]

            stations['bikes_available_timestamp'].iloc[index] = bremen[bremen['p_uid'] == station_id].iloc[i].name
            
            if (timestamp - stations['bikes_available_timestamp'].iloc[index]).days > 2:
                bikes_available = 0
            

        stations['bikes_available'].iloc[index] = bikes_available
        
    return stations

def visualize_bike_availablity(stations):
    
    m = folium.Map(location=[53.06703,8.76410], zoom_start=11)
    
    # Get stations with maximum and minimum capacity
    stations_max = stations[stations.capacity == 8]
    stations_min = stations[stations.capacity == 4]
    
    # Drop stations with minimum and maximum capacity
    stations = stations[(stations.station_id != '11119973') & (stations.station_id != '8806546') & (stations.station_id != '8806578')]
    
    for index, row in stations.iterrows():

        station_name = str(row['name'])
        station_id = row.station_id
        bikes_available = row.bikes_available

        station_info = "Stationsname: {}<br>Station ID: {}<br>Bikes available: {}<br>".format(station_name,station_id,bikes_available)
        popup = folium.Popup(station_info,max_width=450)
        
        folium.Circle(
            location=[row['lat'],row['lon']],
            popup=popup,
            radius=50+bikes_available*20,
            color='royalblue',
            fill=True,
            fill_color='royalblue'
        ).add_to(m)
        
    for index, row in stations_max.iterrows():

        station_name = str(row['name'])
        station_id = row.station_id
        bikes_available = row.bikes_available

        station_info = "Station with largest capacity of 8 bikes<br>Stationsname: {}<br>Station ID: {}<br>Bikes available: {}<br>".format(station_name,station_id,bikes_available)
        popup = folium.Popup(station_info,max_width=450)
        
        folium.Circle(
            location=[row['lat'],row['lon']],
            popup=popup,
            radius=50+bikes_available*20,
            color='indianred',
            fill=True,
            fill_color='indianred'
        ).add_to(m)
        
    for index, row in stations_min.iterrows():

        station_name = str(row['name'])
        station_id = row.station_id
        bikes_available = row.bikes_available

        station_info = "Station with lowest capacity of 4 bikes<br>Stationsname: {}<br>Station ID: {}<br>Bikes available: {}<br>".format(station_name,station_id,bikes_available)
        popup = folium.Popup(station_info,max_width=450)
        
        folium.Circle(
            location=[row['lat'],row['lon']],
            popup=popup,
            radius=50+bikes_available*20,
            color='green',
            fill=True,
            fill_color='green'
        ).add_to(m)

    return m

In [11]:
timestamp = pd.to_datetime('20.09.19 12:00')

In [12]:
visualize_bike_availablity(get_bike_availability_df(timestamp))

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [13]:
visualize_bike_availablity(get_bike_availability_df(timestamp)).save('../reports/figures/bike_availability_20092019_1200.html')