In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
boroughs = gpd.read_file('boroughs.geojson')

In [4]:
counts = pd.read_csv('total_cycle_by_borough.csv')

In [5]:
counts.head()

Unnamed: 0.1,Unnamed: 0,Borough,Year,Quarter,Period,Weather,Normal_cycles,Hire_cycles,Total_cycles,Infras
0,0,Camden,2017,Q1,AM peak (07:00-10:00),Dry,20.117479,2.707736,22.825215,2344.0
1,1,Camden,2017,Q1,AM peak (07:00-10:00),Wet,31.79661,4.567797,36.364407,2344.0
2,2,Camden,2017,Q1,Early Morning (06:00-07:00),Dry,3.275424,0.415254,3.690678,2344.0
3,3,Camden,2017,Q1,Early Morning (06:00-07:00),Wet,4.416667,0.25,4.666667,2344.0
4,4,Camden,2017,Q1,Evening (19:00-22:00),Dry,10.527697,0.932945,11.460641,2344.0


In [6]:
counts.drop(columns='Unnamed: 0', inplace=True)

In [7]:
counts.drop(counts[
    (counts['Quarter'] != 'Q3') |
    (counts['Period'] == 'Early Morning (06:00-07:00)') |
    (counts['Period'] == 'Evening (19:00-22:00)') |
    (counts['Period'] == 'Inter-peak (10:00-16:00)')
].index, inplace=True)

In [8]:
counts.head()

Unnamed: 0,Borough,Year,Quarter,Period,Weather,Normal_cycles,Hire_cycles,Total_cycles,Infras
24,Camden,2017,Q3,AM peak (07:00-10:00),Dry,26.020349,3.414244,29.434593,2344.0
25,Camden,2017,Q3,AM peak (07:00-10:00),Wet,16.657895,1.809211,18.467105,2344.0
34,Camden,2017,Q3,PM peak (16:00-19:00),Dry,25.805634,3.507042,29.312676,2344.0
35,Camden,2017,Q3,PM peak (16:00-19:00),Wet,18.6,1.615385,20.215385,2344.0
84,Camden,2018,Q3,AM peak (07:00-10:00),Dry,26.622108,3.349614,29.971722,59.0


In [9]:
def get_year(year:int):
    year_df = counts[counts['Year'] == year].copy()
    return year_df

In [29]:
df2017 = get_year(2017)
df2018 = get_year(2018)
df2019 = get_year(2019)
df2020 = get_year(2020)

In [23]:
def pivot_periods(df):
    df_grouped = df.groupby(['Borough', 'Period'], as_index=False).agg({'Total_cycles': 'sum'})
    pivot_df = df_grouped.pivot(values = 'Total_cycles',
            index = 'Borough',
            columns = 'Period')
    return pivot_df

In [30]:
pivot17 = pivot_periods(df2017)
pivot18 = pivot_periods(df2018)
pivot19 = pivot_periods(df2019)
pivot20 = pivot_periods(df2020)

In [58]:
geo17 = pd.merge(pivot17, boroughs, how='left', left_on='Borough', right_on='NAME')
geo18 = pd.merge(pivot18, boroughs, how='left', left_on='Borough', right_on='NAME')
geo19 = pd.merge(pivot19, boroughs, how='left', left_on='Borough', right_on='NAME')
geo20 = pd.merge(pivot20, boroughs, how='left', left_on='Borough', right_on='NAME')

In [59]:
geo17 = gpd.GeoDataFrame(geo17, geometry='geometry', crs='epsg:4326')
geo18 = gpd.GeoDataFrame(geo18, geometry='geometry', crs='epsg:4326')
geo19 = gpd.GeoDataFrame(geo19, geometry='geometry', crs='epsg:4326')
geo20 = gpd.GeoDataFrame(geo20, geometry='geometry', crs='epsg:4326')

In [60]:
geo17.to_file('geo17.geojson', driver='GeoJSON')
geo18.to_file('geo18.geojson', driver='GeoJSON')
geo19.to_file('geo19.geojson', driver='GeoJSON')
geo20.to_file('geo20.geojson', driver='GeoJSON')

In [67]:
predicted = pd.read_csv('total_cycles_prediction_2021.csv')

In [68]:
predicted.head()

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Borough,Year,Period,Normal_cycles,Hire_cycles,Total_cycles,Infras,Totle_cycles_predicted
0,0,10,Camden,2021,AM peak (07:00-10:00),0,0,0,2344,33226.159283
1,1,11,Camden,2021,Early Morning (06:00-07:00),0,0,0,2344,2491.799911
2,2,12,Camden,2021,Evening (19:00-22:00),0,0,0,2344,13003.36895
3,3,13,Camden,2021,Inter-peak (10:00-16:00),0,0,0,2344,16265.378021
4,4,14,Camden,2021,PM peak (16:00-19:00),0,0,0,2344,31864.194582


In [69]:
predicted.drop(columns=['Unnamed: 0', 'Unnamed: 0.1'], inplace=True)

In [70]:
predicted.drop(predicted[
    (predicted['Period'] == 'Early Morning (06:00-07:00)') |
    (predicted['Period'] == 'Evening (19:00-22:00)') |
    (predicted['Period'] == 'Inter-peak (10:00-16:00)')
].index, inplace=True)

In [72]:
df21_grouped = predicted.groupby(['Borough', 'Period'], as_index=False).agg({'Totle_cycles_predicted': 'sum'})

In [75]:
pivot21 = df21_grouped.pivot(values = 'Totle_cycles_predicted', index = 'Borough', columns = 'Period')

In [76]:
pivot21

Period,AM peak (07:00-10:00),PM peak (16:00-19:00)
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1
Camden,33226.159283,31864.194582
City of London,58846.767683,56434.595404
Hackney,609.557954,584.571726
Islington,17963.205029,17226.880046
Lambeth,21455.161442,20575.698598
Southwark,34296.007846,32890.189266
Tower Hamlets,517.119545,991.844869
Westminster,55858.140779,53568.474515


In [77]:
geo21 = pd.merge(pivot21, boroughs, how='left', left_on='Borough', right_on='NAME')
geo21 = gpd.GeoDataFrame(geo21, geometry='geometry', crs='epsg:4326')

In [78]:
geo21.to_file('geo21.geojson', driver='GeoJSON')