In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

print("Pandas Version: " + pd.__version__)
print("Numpy Version: " + np.__version__)

Pandas Version: 1.2.4
Numpy Version: 1.18.5


In [2]:
trips = pd.read_csv("../../data/clean/trips.csv", parse_dates=["date_from", "date_until"])
trips.head(3)

Unnamed: 0,bike_id,user_id,date_from,date_until,start_station_name,start_station_id,end_station_name,end_station_id,booked_via,duration_in_min,distance_in_km,speed_in_kmh,time_since_last_checkout
0,119911,341973A96CDE0DF9792F6C844622735AE4216EBF,2014-01-01 00:02:51,2014-01-01 00:20:04,Enckeplatz / Hütten,131887,Königstraße / Struenseestraße,131650,Terminal HH_8 (-2624-),18,2.364129,7.880431,
1,118994,665D79F269FA03F84FC61F3A7F7B078D7392EC0E,2014-01-01 00:07:45,2014-01-01 00:10:48,Isestraße / Hoheluftbrücke,140804,Isestraße / Hoheluftbrücke,140804,Terminal HH_63 (-2241-),4,0.0,0.0,
2,143660,B46B52FDC494E46849DB84BF84F0B99C78358E59,2014-01-01 00:09:55,2014-01-01 00:26:20,Schulterblatt/Eifflerstraße,131648,Schulterblatt/Eifflerstraße,131648,Android SRH,17,0.0,0.0,


In [3]:
stations = pd.read_csv("../../data/clean/stations.csv")
stations.head(3)

Unnamed: 0,station_id,name,longitude,latitude
0,131543,Landungsbrücke/Hafentor,9.9723,53.5457
1,131546,Fischmarkt/Breite Straße,9.95088,53.5462
2,131547,Paulinenplatz/Wohlwillstraße,9.96246,53.5542


### Counting check-ins and check-outs per station per hour

In [4]:
trips_slice = trips[["date_from", "date_until", "start_station_name", "end_station_name"]].copy()

In [5]:
check_ins = trips_slice.groupby([pd.Grouper(key='date_until', freq='H'), 'end_station_name']).size().to_frame('check_ins')
check_ins.index.rename(['datetime', 'station_name'], inplace=True)
check_ins.sample(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,check_ins
datetime,station_name,Unnamed: 2_level_1
2014-04-07 17:00:00,Mönckebergstraße / Rosenstraße,3
2016-09-15 21:00:00,Planetarium/Hindenburgstraße,3
2016-09-24 14:00:00,Wandsbeker Marktstraße/Wandsbeker Marktplatz,1


In [7]:
check_outs = trips_slice.groupby([pd.Grouper(freq='H', key='date_from'), 'start_station_name']).size().to_frame('check_outs')
check_outs.index.rename(['datetime', 'station_name'], inplace=True)
check_outs.sample(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,check_outs
datetime,station_name,Unnamed: 2_level_1
2017-05-14 12:00:00,Ohnhorststraße/Klein Flottbek,1
2016-05-01 19:00:00,Schulterblatt/Eifflerstraße,10
2014-06-26 17:00:00,Leinpfad/Fernsicht,5


### Create empty data set with stats per station per hour and fill in accumulated data

In [8]:
# create date range
start_date = datetime(2014, 1, 1, 0)
end_date = datetime(2017, 5, 15, 11, 59, 59)
date_range = pd.date_range(start_date, end_date, freq="H")

station_names = sorted(pd.unique(stations["name"]))

# create multiindex from hourly timestamps and station names
index = pd.MultiIndex.from_product([date_range, station_names], names=['datetime', 'station_name'])

# create empty data set with index
empty_df = pd.DataFrame(index=index)

# insert calculations and fill missing values with 0, meaning no check-outs or check-ins observed
station_count = empty_df.join(check_ins).join(check_outs).fillna(0)
station_count.check_ins = station_count.check_ins.astype(int)
station_count.check_outs = station_count.check_outs.astype(int)

# calculate change in bike inventory
station_count['diff'] = (station_count['check_ins'] - station_count['check_outs'])

station_count.reset_index(inplace=True)

In [9]:
station_count

Unnamed: 0,datetime,station_name,check_ins,check_outs,diff
0,2014-01-01 00:00:00,Allende-Platz/Grindelhof,0,5,-5
1,2014-01-01 00:00:00,Alsenstraße/Düppelstraße,3,0,3
2,2014-01-01 00:00:00,Alsterdorf Markt/Evangelische Stiftung,0,0,0
3,2014-01-01 00:00:00,Alsterdorfer Straße/Fuhlsbüttler Straße,0,0,0
4,2014-01-01 00:00:00,Alsterschwimmhalle/Ifflandstraße,0,0,0
...,...,...,...,...,...
6142651,2017-05-15 11:00:00,Wiesendamm/Roggenkamp,1,1,0
6142652,2017-05-15 11:00:00,Wilhelmsburger Platz/Zur Schleuse,0,0,0
6142653,2017-05-15 11:00:00,Winterhuder Weg/ Zimmerstraße,2,4,-2
6142654,2017-05-15 11:00:00,Zentralbibliothek / Münzstraße,0,2,-2


In [10]:
station_count.to_csv("../../data/clean/trips_hourly.csv", index=False)