# Filter out all round trips data for 2019, 2020, and 2021

In [None]:
# import all packages and set plots to be embedded inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import os
import glob
import datetime

%matplotlib inline

In [2]:
import requests
from xml.etree import ElementTree as ET
import pandas as pd

site = "https://tfl.gov.uk/tfl/syndication/feeds/cycle-hire/livecyclehireupdates.xml"

response = requests.get(site)
root = ET.fromstring(response.content)

id_list = [int(root[i][0].text) for i in range(0, len(root))]
name_list = [root[i][1].text for i in range(0, len(root))]
lat_list = [float(root[i][3].text) for i in range(0, len(root))]
lon_list = [float(root[i][4].text) for i in range(0, len(root))]
capacity_list = [int(root[i][12].text) for i in range(0, len(root))]

stations = pd.DataFrame(list(zip(name_list, id_list, lat_list, 
                                 lon_list, capacity_list)), columns = ["name","id","lat","lon","capacity"])

stations.to_csv('stations.csv', header=True, index=None)

print(stations.shape)
stations.head()

(789, 5)


Unnamed: 0,name,id,lat,lon,capacity
0,"River Street , Clerkenwell",1,51.529163,-0.109971,19
1,"Phillimore Gardens, Kensington",2,51.499607,-0.197574,37
2,"Christopher Street, Liverpool Street",3,51.521284,-0.084606,32
3,"St. Chad's Street, King's Cross",4,51.530059,-0.120974,23
4,"Broadcasting House, Marylebone",6,51.518118,-0.144229,18


In [3]:
# Read 2019 round trips data file
round_2019_whole_year = pd.read_csv('round_2019_whole_year.csv')
round_2019_whole_year['end_date'] = pd.to_datetime(round_2019_whole_year['end_date'])
round_2019_whole_year['start_date'] = pd.to_datetime(round_2019_whole_year['start_date'])
round_2019_whole_year.info()
round_2019_whole_year

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 359780 entries, 0 to 359779
Data columns (total 9 columns):
 #   Column             Non-Null Count   Dtype         
---  ------             --------------   -----         
 0   rental_id          359780 non-null  int64         
 1   duration           359780 non-null  int64         
 2   bike_id            359780 non-null  int64         
 3   end_date           359780 non-null  datetime64[ns]
 4   endstation_id      359780 non-null  int64         
 5   endstation_name    359780 non-null  object        
 6   start_date         359780 non-null  datetime64[ns]
 7   startstation_id    359780 non-null  int64         
 8   startstation_name  359780 non-null  object        
dtypes: datetime64[ns](2), int64(5), object(2)
memory usage: 24.7+ MB


Unnamed: 0,rental_id,duration,bike_id,end_date,endstation_id,endstation_name,start_date,startstation_id,startstation_name
0,83186728,120,4156,2018-12-27 10:09:00,280,"Royal Avenue 2, Chelsea",2018-12-27 10:07:00,280,"Royal Avenue 2, Chelsea"
1,83231389,240,16201,2018-12-30 14:14:00,333,"Palace Gardens Terrace, Notting Hill",2018-12-30 14:10:00,333,"Palace Gardens Terrace, Notting Hill"
2,83231461,5340,2895,2018-12-30 15:43:00,333,"Palace Gardens Terrace, Notting Hill",2018-12-30 14:14:00,333,"Palace Gardens Terrace, Notting Hill"
3,83254006,1200,7403,2018-12-31 22:25:00,230,"Poured Lines, Bankside",2018-12-31 22:05:00,230,"Poured Lines, Bankside"
4,83234424,60,1858,2018-12-30 16:46:00,230,"Poured Lines, Bankside",2018-12-30 16:45:00,230,"Poured Lines, Bankside"
...,...,...,...,...,...,...,...,...,...
359775,93975449,6180,4826,2019-12-30 17:06:00,303,"Albert Gate, Hyde Park",2019-12-30 15:23:00,303,"Albert Gate, Hyde Park"
359776,93974006,1020,555,2019-12-30 14:43:00,303,"Albert Gate, Hyde Park",2019-12-30 14:26:00,303,"Albert Gate, Hyde Park"
359777,93974155,1980,7362,2019-12-30 15:06:00,303,"Albert Gate, Hyde Park",2019-12-30 14:33:00,303,"Albert Gate, Hyde Park"
359778,93973049,3960,11780,2019-12-30 14:56:00,303,"Albert Gate, Hyde Park",2019-12-30 13:50:00,303,"Albert Gate, Hyde Park"


In [4]:
# Filter from 2019-10-15 to 2019-11-15 round trips
round2019 = round_2019_whole_year[(round_2019_whole_year['start_date'] >= pd.to_datetime('2019-10-15 00:00:00')) & (round_2019_whole_year['start_date'] <= pd.to_datetime('2019-11-15 23:59:59'))]
round2019

Unnamed: 0,rental_id,duration,bike_id,end_date,endstation_id,endstation_name,start_date,startstation_id,startstation_name
9846,83498005,1800,15833,2019-11-01 22:23:00,233,"Pall Mall East, West End",2019-11-01 21:53:00,233,"Pall Mall East, West End"
9857,83498021,120,10008,2019-11-01 21:56:00,233,"Pall Mall East, West End",2019-11-01 21:54:00,233,"Pall Mall East, West End"
9860,83497218,60,16150,2019-11-01 20:35:00,29,"Hereford Road, Bayswater",2019-11-01 20:34:00,29,"Hereford Road, Bayswater"
9861,83498077,1380,16300,2019-11-01 22:23:00,233,"Pall Mall East, West End",2019-11-01 22:00:00,233,"Pall Mall East, West End"
9866,83472340,60,3179,2019-11-01 03:21:00,155,"Lexham Gardens, Kensington",2019-11-01 03:20:00,155,"Lexham Gardens, Kensington"
...,...,...,...,...,...,...,...,...,...
349407,93645271,1380,17021,2019-11-12 16:27:00,256,"Houghton Street, Strand",2019-11-12 16:04:00,256,"Houghton Street, Strand"
349410,93649357,960,4540,2019-11-12 18:12:00,428,"Exhibition Road, Knightsbridge",2019-11-12 17:56:00,428,"Exhibition Road, Knightsbridge"
349413,93635239,540,4752,2019-11-12 08:52:00,712,"Mile End Stadium, Mile End",2019-11-12 08:43:00,712,"Mile End Stadium, Mile End"
349422,93641274,1800,12035,2019-11-12 12:38:00,509,"Fore Street, Guildhall",2019-11-12 12:08:00,509,"Fore Street, Guildhall"


In [5]:
import pandas as pd
from pandas.core.frame import DataFrame

round2019= pd.merge(round2019, stations, left_on='endstation_id',right_on='id',how='left')
round2019

Unnamed: 0,rental_id,duration,bike_id,end_date,endstation_id,endstation_name,start_date,startstation_id,startstation_name,name,id,lat,lon,capacity
0,83498005,1800,15833,2019-11-01 22:23:00,233,"Pall Mall East, West End",2019-11-01 21:53:00,233,"Pall Mall East, West End","Pall Mall East, West End",233.0,51.507770,-0.130700,22.0
1,83498021,120,10008,2019-11-01 21:56:00,233,"Pall Mall East, West End",2019-11-01 21:54:00,233,"Pall Mall East, West End","Pall Mall East, West End",233.0,51.507770,-0.130700,22.0
2,83497218,60,16150,2019-11-01 20:35:00,29,"Hereford Road, Bayswater",2019-11-01 20:34:00,29,"Hereford Road, Bayswater","Hereford Road, Bayswater",29.0,51.513735,-0.193487,22.0
3,83498077,1380,16300,2019-11-01 22:23:00,233,"Pall Mall East, West End",2019-11-01 22:00:00,233,"Pall Mall East, West End","Pall Mall East, West End",233.0,51.507770,-0.130700,22.0
4,83472340,60,3179,2019-11-01 03:21:00,155,"Lexham Gardens, Kensington",2019-11-01 03:20:00,155,"Lexham Gardens, Kensington","Lexham Gardens, Kensington",155.0,51.495867,-0.191934,19.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22798,93645271,1380,17021,2019-11-12 16:27:00,256,"Houghton Street, Strand",2019-11-12 16:04:00,256,"Houghton Street, Strand","Houghton Street, Strand",256.0,51.513621,-0.116764,15.0
22799,93649357,960,4540,2019-11-12 18:12:00,428,"Exhibition Road, Knightsbridge",2019-11-12 17:56:00,428,"Exhibition Road, Knightsbridge","Exhibition Road, Knightsbridge",428.0,51.499917,-0.174554,20.0
22800,93635239,540,4752,2019-11-12 08:52:00,712,"Mile End Stadium, Mile End",2019-11-12 08:43:00,712,"Mile End Stadium, Mile End","Mile End Stadium, Mile End",712.0,51.518541,-0.034904,22.0
22801,93641274,1800,12035,2019-11-12 12:38:00,509,"Fore Street, Guildhall",2019-11-12 12:08:00,509,"Fore Street, Guildhall","Fore Street, Guildhall",509.0,51.517842,-0.090075,19.0


In [6]:
# check for the null
round2019.isnull().sum()
round2019.dropna(axis=0, how='any', inplace=True)
round2019

Unnamed: 0,rental_id,duration,bike_id,end_date,endstation_id,endstation_name,start_date,startstation_id,startstation_name,name,id,lat,lon,capacity
0,83498005,1800,15833,2019-11-01 22:23:00,233,"Pall Mall East, West End",2019-11-01 21:53:00,233,"Pall Mall East, West End","Pall Mall East, West End",233.0,51.507770,-0.130700,22.0
1,83498021,120,10008,2019-11-01 21:56:00,233,"Pall Mall East, West End",2019-11-01 21:54:00,233,"Pall Mall East, West End","Pall Mall East, West End",233.0,51.507770,-0.130700,22.0
2,83497218,60,16150,2019-11-01 20:35:00,29,"Hereford Road, Bayswater",2019-11-01 20:34:00,29,"Hereford Road, Bayswater","Hereford Road, Bayswater",29.0,51.513735,-0.193487,22.0
3,83498077,1380,16300,2019-11-01 22:23:00,233,"Pall Mall East, West End",2019-11-01 22:00:00,233,"Pall Mall East, West End","Pall Mall East, West End",233.0,51.507770,-0.130700,22.0
4,83472340,60,3179,2019-11-01 03:21:00,155,"Lexham Gardens, Kensington",2019-11-01 03:20:00,155,"Lexham Gardens, Kensington","Lexham Gardens, Kensington",155.0,51.495867,-0.191934,19.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22798,93645271,1380,17021,2019-11-12 16:27:00,256,"Houghton Street, Strand",2019-11-12 16:04:00,256,"Houghton Street, Strand","Houghton Street, Strand",256.0,51.513621,-0.116764,15.0
22799,93649357,960,4540,2019-11-12 18:12:00,428,"Exhibition Road, Knightsbridge",2019-11-12 17:56:00,428,"Exhibition Road, Knightsbridge","Exhibition Road, Knightsbridge",428.0,51.499917,-0.174554,20.0
22800,93635239,540,4752,2019-11-12 08:52:00,712,"Mile End Stadium, Mile End",2019-11-12 08:43:00,712,"Mile End Stadium, Mile End","Mile End Stadium, Mile End",712.0,51.518541,-0.034904,22.0
22801,93641274,1800,12035,2019-11-12 12:38:00,509,"Fore Street, Guildhall",2019-11-12 12:08:00,509,"Fore Street, Guildhall","Fore Street, Guildhall",509.0,51.517842,-0.090075,19.0


In [7]:
# save the 2019 result to a .csv for further usage
round2019.to_csv('round2019.csv', index=False)

In [8]:
# Read 2020 round trips data file
round_2020_whole_year = pd.read_csv('round_2020_whole_year.csv')
round_2020_whole_year['end_date'] = pd.to_datetime(round_2020_whole_year['end_date'])
round_2020_whole_year['start_date'] = pd.to_datetime(round_2020_whole_year['start_date'])
round_2020_whole_year.info()
round_2020_whole_year

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 741315 entries, 0 to 741314
Data columns (total 9 columns):
 #   Column             Non-Null Count   Dtype         
---  ------             --------------   -----         
 0   rental_id          741315 non-null  int64         
 1   duration           741315 non-null  int64         
 2   bike_id            741315 non-null  int64         
 3   end_date           741315 non-null  datetime64[ns]
 4   endstation_id      741315 non-null  int64         
 5   endstation_name    741315 non-null  object        
 6   start_date         741315 non-null  datetime64[ns]
 7   startstation_id    741315 non-null  int64         
 8   startstation_name  741315 non-null  object        
dtypes: datetime64[ns](2), int64(5), object(2)
memory usage: 50.9+ MB


Unnamed: 0,rental_id,duration,bike_id,end_date,endstation_id,endstation_name,start_date,startstation_id,startstation_name
0,94124724,1440,5972,2020-07-01 19:44:00,46,"Nesham Street, Wapping",2020-07-01 19:20:00,46,"Nesham Street, Wapping"
1,94057246,3360,15662,2020-04-01 17:49:00,48,"Godliman Street, St. Paul's",2020-04-01 16:53:00,48,"Godliman Street, St. Paul's"
2,94088392,5340,873,2020-06-01 15:56:00,48,"Godliman Street, St. Paul's",2020-06-01 14:27:00,48,"Godliman Street, St. Paul's"
3,94088347,5580,15171,2020-06-01 15:57:00,48,"Godliman Street, St. Paul's",2020-06-01 14:24:00,48,"Godliman Street, St. Paul's"
4,94088307,5700,9704,2020-06-01 15:57:00,48,"Godliman Street, St. Paul's",2020-06-01 14:22:00,48,"Godliman Street, St. Paul's"
...,...,...,...,...,...,...,...,...,...
741310,104796945,1140,10019,2021-02-01 12:57:00,644,"Rainville Road, Hammersmith",2021-02-01 12:38:00,644,"Rainville Road, Hammersmith"
741311,104829962,1620,5627,2021-04-01 13:57:00,644,"Rainville Road, Hammersmith",2021-04-01 13:30:00,644,"Rainville Road, Hammersmith"
741312,104818029,1560,10019,2021-03-01 14:11:00,644,"Rainville Road, Hammersmith",2021-03-01 13:45:00,644,"Rainville Road, Hammersmith"
741313,104809576,720,15369,2021-02-01 19:21:00,644,"Rainville Road, Hammersmith",2021-02-01 19:09:00,644,"Rainville Road, Hammersmith"


In [9]:
# Filter from 2020-10-15 to 2020-11-15 round trips
round2020 = round_2020_whole_year[(round_2020_whole_year['start_date'] >= pd.to_datetime('2020-10-15 00:00:00')) & (round_2020_whole_year['start_date'] <= pd.to_datetime('2020-11-15 23:59:59'))]
round2020

Unnamed: 0,rental_id,duration,bike_id,end_date,endstation_id,endstation_name,start_date,startstation_id,startstation_name
4177,94215819,60,14851,2020-11-01 11:16:00,47,"Warwick Avenue Station, Maida Vale",2020-11-01 11:15:00,47,"Warwick Avenue Station, Maida Vale"
4186,94227132,60,13122,2020-11-01 18:48:00,81,"Great Titchfield Street, Fitzrovia",2020-11-01 18:47:00,81,"Great Titchfield Street, Fitzrovia"
4193,94221501,1920,14033,2020-11-01 15:14:00,97,"Gloucester Road (North), Kensington",2020-11-01 14:42:00,97,"Gloucester Road (North), Kensington"
4202,94219056,1440,3968,2020-11-01 13:41:00,298,"Curlew Street, Shad Thames",2020-11-01 13:17:00,298,"Curlew Street, Shad Thames"
4209,94215464,480,13118,2020-11-01 11:08:00,596,"Parson's Green , Parson's Green",2020-11-01 11:00:00,596,"Parson's Green , Parson's Green"
...,...,...,...,...,...,...,...,...,...
714035,104428034,720,19723,2020-12-12 00:01:00,726,"Alfreda Street, Battersea Park",2020-11-12 23:49:00,726,"Alfreda Street, Battersea Park"
714059,104410690,1320,7213,2020-11-12 13:20:00,625,"Queen's Circus, Battersea Park",2020-11-12 12:58:00,625,"Queen's Circus, Battersea Park"
714076,104411803,1140,13318,2020-11-12 14:01:00,640,"Silverthorne Road, Battersea",2020-11-12 13:42:00,640,"Silverthorne Road, Battersea"
714077,104404191,4080,2003,2020-11-12 09:12:00,657,"Blythe Road West, Shepherd's Bush",2020-11-12 08:04:00,657,"Blythe Road West, Shepherd's Bush"


In [10]:
round2020= pd.merge(round2020, stations, left_on='endstation_id',right_on='id',how='left')
round2020

Unnamed: 0,rental_id,duration,bike_id,end_date,endstation_id,endstation_name,start_date,startstation_id,startstation_name,name,id,lat,lon,capacity
0,94215819,60,14851,2020-11-01 11:16:00,47,"Warwick Avenue Station, Maida Vale",2020-11-01 11:15:00,47,"Warwick Avenue Station, Maida Vale","Warwick Avenue Station, Maida Vale",47.0,51.523345,-0.183846,19.0
1,94227132,60,13122,2020-11-01 18:48:00,81,"Great Titchfield Street, Fitzrovia",2020-11-01 18:47:00,81,"Great Titchfield Street, Fitzrovia","Great Titchfield Street, Fitzrovia",81.0,51.520253,-0.141327,19.0
2,94221501,1920,14033,2020-11-01 15:14:00,97,"Gloucester Road (North), Kensington",2020-11-01 14:42:00,97,"Gloucester Road (North), Kensington","Gloucester Road (North), Kensington",97.0,51.497925,-0.183835,18.0
3,94219056,1440,3968,2020-11-01 13:41:00,298,"Curlew Street, Shad Thames",2020-11-01 13:17:00,298,"Curlew Street, Shad Thames","Curlew Street, Shad Thames",298.0,51.502280,-0.074189,21.0
4,94215464,480,13118,2020-11-01 11:08:00,596,"Parson's Green , Parson's Green",2020-11-01 11:00:00,596,"Parson's Green , Parson's Green","Parson's Green , Parson's Green",596.0,51.472817,-0.199783,21.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50875,104428034,720,19723,2020-12-12 00:01:00,726,"Alfreda Street, Battersea Park",2020-11-12 23:49:00,726,"Alfreda Street, Battersea Park","Alfreda Street, Battersea Park",726.0,51.475051,-0.150908,22.0
50876,104410690,1320,7213,2020-11-12 13:20:00,625,"Queen's Circus, Battersea Park",2020-11-12 12:58:00,625,"Queen's Circus, Battersea Park","Queen's Circus, Battersea Park",625.0,51.477619,-0.149552,28.0
50877,104411803,1140,13318,2020-11-12 14:01:00,640,"Silverthorne Road, Battersea",2020-11-12 13:42:00,640,"Silverthorne Road, Battersea","Silverthorne Road, Battersea",640.0,51.472866,-0.148059,28.0
50878,104404191,4080,2003,2020-11-12 09:12:00,657,"Blythe Road West, Shepherd's Bush",2020-11-12 08:04:00,657,"Blythe Road West, Shepherd's Bush","Blythe Road West, Shepherd's Bush",657.0,51.499680,-0.221792,21.0


In [11]:
# check for the null
round2020.isnull().sum()
round2020.dropna(axis=0, how='any', inplace=True)
round2020

Unnamed: 0,rental_id,duration,bike_id,end_date,endstation_id,endstation_name,start_date,startstation_id,startstation_name,name,id,lat,lon,capacity
0,94215819,60,14851,2020-11-01 11:16:00,47,"Warwick Avenue Station, Maida Vale",2020-11-01 11:15:00,47,"Warwick Avenue Station, Maida Vale","Warwick Avenue Station, Maida Vale",47.0,51.523345,-0.183846,19.0
1,94227132,60,13122,2020-11-01 18:48:00,81,"Great Titchfield Street, Fitzrovia",2020-11-01 18:47:00,81,"Great Titchfield Street, Fitzrovia","Great Titchfield Street, Fitzrovia",81.0,51.520253,-0.141327,19.0
2,94221501,1920,14033,2020-11-01 15:14:00,97,"Gloucester Road (North), Kensington",2020-11-01 14:42:00,97,"Gloucester Road (North), Kensington","Gloucester Road (North), Kensington",97.0,51.497925,-0.183835,18.0
3,94219056,1440,3968,2020-11-01 13:41:00,298,"Curlew Street, Shad Thames",2020-11-01 13:17:00,298,"Curlew Street, Shad Thames","Curlew Street, Shad Thames",298.0,51.502280,-0.074189,21.0
4,94215464,480,13118,2020-11-01 11:08:00,596,"Parson's Green , Parson's Green",2020-11-01 11:00:00,596,"Parson's Green , Parson's Green","Parson's Green , Parson's Green",596.0,51.472817,-0.199783,21.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50875,104428034,720,19723,2020-12-12 00:01:00,726,"Alfreda Street, Battersea Park",2020-11-12 23:49:00,726,"Alfreda Street, Battersea Park","Alfreda Street, Battersea Park",726.0,51.475051,-0.150908,22.0
50876,104410690,1320,7213,2020-11-12 13:20:00,625,"Queen's Circus, Battersea Park",2020-11-12 12:58:00,625,"Queen's Circus, Battersea Park","Queen's Circus, Battersea Park",625.0,51.477619,-0.149552,28.0
50877,104411803,1140,13318,2020-11-12 14:01:00,640,"Silverthorne Road, Battersea",2020-11-12 13:42:00,640,"Silverthorne Road, Battersea","Silverthorne Road, Battersea",640.0,51.472866,-0.148059,28.0
50878,104404191,4080,2003,2020-11-12 09:12:00,657,"Blythe Road West, Shepherd's Bush",2020-11-12 08:04:00,657,"Blythe Road West, Shepherd's Bush","Blythe Road West, Shepherd's Bush",657.0,51.499680,-0.221792,21.0


In [12]:
# save the 2020 result to a .csv for further usage
round2020.to_csv('round2020.csv', index=False)

In [13]:
# Read 2021 round trips data file
round_2021_whole_year = pd.read_csv('round_2021_whole_year.csv')
round_2021_whole_year['end_date'] = pd.to_datetime(round_2021_whole_year['end_date'])
round_2021_whole_year['start_date'] = pd.to_datetime(round_2021_whole_year['start_date'])
round_2021_whole_year.info()
round_2021_whole_year

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569118 entries, 0 to 569117
Data columns (total 9 columns):
 #   Column             Non-Null Count   Dtype         
---  ------             --------------   -----         
 0   rental_id          569118 non-null  int64         
 1   duration           569118 non-null  int64         
 2   bike_id            569118 non-null  int64         
 3   end_date           569118 non-null  datetime64[ns]
 4   endstation_id      569118 non-null  int64         
 5   endstation_name    569118 non-null  object        
 6   start_date         569118 non-null  datetime64[ns]
 7   startstation_id    569118 non-null  int64         
 8   startstation_name  569118 non-null  object        
dtypes: datetime64[ns](2), int64(5), object(2)
memory usage: 39.1+ MB


Unnamed: 0,rental_id,duration,bike_id,end_date,endstation_id,endstation_name,start_date,startstation_id,startstation_name
0,104788389,5400,15668,2021-01-01 16:29:00,655,"Crabtree Lane, Fulham",2021-01-01 14:59:00,655,"Crabtree Lane, Fulham"
1,104777428,5940,6695,2020-12-31 18:49:00,655,"Crabtree Lane, Fulham",2020-12-31 17:10:00,655,"Crabtree Lane, Fulham"
2,104791339,1860,6695,2021-01-01 18:28:00,655,"Crabtree Lane, Fulham",2021-01-01 17:57:00,655,"Crabtree Lane, Fulham"
3,104770021,3060,16218,2020-12-31 13:11:00,42,"Wenlock Road , Hoxton",2020-12-31 12:20:00,42,"Wenlock Road , Hoxton"
4,104804829,3420,17194,2021-02-01 16:34:00,95,"Aldersgate Street, Barbican",2021-02-01 15:37:00,95,"Aldersgate Street, Barbican"
...,...,...,...,...,...,...,...,...,...
569113,115914136,720,17063,2021-12-30 16:28:00,815,"Lambeth Palace Road, Waterloo",2021-12-30 16:16:00,815,"Lambeth Palace Road, Waterloo"
569114,115946155,60,4654,2022-01-01 00:56:00,815,"Lambeth Palace Road, Waterloo",2022-01-01 00:55:00,815,"Lambeth Palace Road, Waterloo"
569115,115954677,15960,16324,2022-01-01 17:17:00,817,"Riverlight South, Nine Elms",2022-01-01 12:51:00,817,"Riverlight South, Nine Elms"
569116,116006452,3060,19326,2022-04-01 08:40:00,817,"Riverlight South, Nine Elms",2022-04-01 07:49:00,817,"Riverlight South, Nine Elms"


In [14]:
# Filter from 2021-10-15 to 2021-11-15 round trips
round2021 = round_2021_whole_year[(round_2021_whole_year['start_date'] >= pd.to_datetime('2021-10-15 00:00:00')) & (round_2021_whole_year['start_date'] <= pd.to_datetime('2021-11-15 23:59:59'))]
round2021

Unnamed: 0,rental_id,duration,bike_id,end_date,endstation_id,endstation_name,start_date,startstation_id,startstation_name
8664,104919803,420,8061,2021-11-01 08:22:00,92,"Borough Road, Elephant & Castle",2021-11-01 08:15:00,92,"Borough Road, Elephant & Castle"
8672,104922065,1080,5854,2021-11-01 11:52:00,32,"Leonard Circus , Shoreditch",2021-11-01 11:34:00,32,"Leonard Circus , Shoreditch"
8696,104919808,2460,15555,2021-11-01 08:57:00,24,"British Museum, Bloomsbury",2021-11-01 08:16:00,24,"British Museum, Bloomsbury"
8704,104925031,1260,18837,2021-11-01 15:34:00,12,"Malet Street, Bloomsbury",2021-11-01 15:13:00,12,"Malet Street, Bloomsbury"
8720,104928908,1680,7143,2021-11-01 19:16:00,789,"Podium, Queen Elizabeth Olympic Park",2021-11-01 18:48:00,789,"Podium, Queen Elizabeth Olympic Park"
...,...,...,...,...,...,...,...,...,...
547950,115546780,60,17919,2021-11-12 13:34:00,379,"Turquoise Island, Notting Hill",2021-11-12 13:33:00,379,"Turquoise Island, Notting Hill"
547962,115554334,840,13367,2021-11-12 20:52:00,393,"Snow Hill, Farringdon",2021-11-12 20:38:00,393,"Snow Hill, Farringdon"
547982,115542007,60,20567,2021-11-12 11:27:00,578,"Hollybush Gardens, Bethnal Green",2021-11-12 11:26:00,578,"Hollybush Gardens, Bethnal Green"
547996,115551127,120,5754,2021-11-12 16:55:00,760,"Rossmore Road, Marylebone",2021-11-12 16:53:00,760,"Rossmore Road, Marylebone"


In [15]:
round2021= pd.merge(round2021, stations, left_on='endstation_id',right_on='id',how='left')
round2021

Unnamed: 0,rental_id,duration,bike_id,end_date,endstation_id,endstation_name,start_date,startstation_id,startstation_name,name,id,lat,lon,capacity
0,104919803,420,8061,2021-11-01 08:22:00,92,"Borough Road, Elephant & Castle",2021-11-01 08:15:00,92,"Borough Road, Elephant & Castle","Borough Road, Elephant & Castle",92.0,51.498898,-0.100441,41.0
1,104922065,1080,5854,2021-11-01 11:52:00,32,"Leonard Circus , Shoreditch",2021-11-01 11:34:00,32,"Leonard Circus , Shoreditch","Leonard Circus , Shoreditch",32.0,51.524696,-0.084439,43.0
2,104919808,2460,15555,2021-11-01 08:57:00,24,"British Museum, Bloomsbury",2021-11-01 08:16:00,24,"British Museum, Bloomsbury","British Museum, Bloomsbury",24.0,51.519080,-0.124678,35.0
3,104925031,1260,18837,2021-11-01 15:34:00,12,"Malet Street, Bloomsbury",2021-11-01 15:13:00,12,"Malet Street, Bloomsbury","Malet Street, Bloomsbury",12.0,51.521681,-0.130432,49.0
4,104928908,1680,7143,2021-11-01 19:16:00,789,"Podium, Queen Elizabeth Olympic Park",2021-11-01 18:48:00,789,"Podium, Queen Elizabeth Olympic Park","Podium, Queen Elizabeth Olympic Park",789.0,51.538718,-0.011889,40.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43190,115546780,60,17919,2021-11-12 13:34:00,379,"Turquoise Island, Notting Hill",2021-11-12 13:33:00,379,"Turquoise Island, Notting Hill","Turquoise Island, Notting Hill",379.0,51.514312,-0.200838,21.0
43191,115554334,840,13367,2021-11-12 20:52:00,393,"Snow Hill, Farringdon",2021-11-12 20:38:00,393,"Snow Hill, Farringdon","Snow Hill, Farringdon",393.0,51.517334,-0.103604,15.0
43192,115542007,60,20567,2021-11-12 11:27:00,578,"Hollybush Gardens, Bethnal Green",2021-11-12 11:26:00,578,"Hollybush Gardens, Bethnal Green","Hollybush Gardens, Bethnal Green",578.0,51.527607,-0.057133,32.0
43193,115551127,120,5754,2021-11-12 16:55:00,760,"Rossmore Road, Marylebone",2021-11-12 16:53:00,760,"Rossmore Road, Marylebone",,,,,


In [16]:
# check for the null
round2021.isnull().sum()
round2021.dropna(axis=0, how='any', inplace=True)
round2021

Unnamed: 0,rental_id,duration,bike_id,end_date,endstation_id,endstation_name,start_date,startstation_id,startstation_name,name,id,lat,lon,capacity
0,104919803,420,8061,2021-11-01 08:22:00,92,"Borough Road, Elephant & Castle",2021-11-01 08:15:00,92,"Borough Road, Elephant & Castle","Borough Road, Elephant & Castle",92.0,51.498898,-0.100441,41.0
1,104922065,1080,5854,2021-11-01 11:52:00,32,"Leonard Circus , Shoreditch",2021-11-01 11:34:00,32,"Leonard Circus , Shoreditch","Leonard Circus , Shoreditch",32.0,51.524696,-0.084439,43.0
2,104919808,2460,15555,2021-11-01 08:57:00,24,"British Museum, Bloomsbury",2021-11-01 08:16:00,24,"British Museum, Bloomsbury","British Museum, Bloomsbury",24.0,51.519080,-0.124678,35.0
3,104925031,1260,18837,2021-11-01 15:34:00,12,"Malet Street, Bloomsbury",2021-11-01 15:13:00,12,"Malet Street, Bloomsbury","Malet Street, Bloomsbury",12.0,51.521681,-0.130432,49.0
4,104928908,1680,7143,2021-11-01 19:16:00,789,"Podium, Queen Elizabeth Olympic Park",2021-11-01 18:48:00,789,"Podium, Queen Elizabeth Olympic Park","Podium, Queen Elizabeth Olympic Park",789.0,51.538718,-0.011889,40.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43189,115549004,60,15143,2021-11-12 14:52:00,330,"Eastbourne Mews, Paddington",2021-11-12 14:51:00,330,"Eastbourne Mews, Paddington","Eastbourne Mews, Paddington",330.0,51.516417,-0.179135,34.0
43190,115546780,60,17919,2021-11-12 13:34:00,379,"Turquoise Island, Notting Hill",2021-11-12 13:33:00,379,"Turquoise Island, Notting Hill","Turquoise Island, Notting Hill",379.0,51.514312,-0.200838,21.0
43191,115554334,840,13367,2021-11-12 20:52:00,393,"Snow Hill, Farringdon",2021-11-12 20:38:00,393,"Snow Hill, Farringdon","Snow Hill, Farringdon",393.0,51.517334,-0.103604,15.0
43192,115542007,60,20567,2021-11-12 11:27:00,578,"Hollybush Gardens, Bethnal Green",2021-11-12 11:26:00,578,"Hollybush Gardens, Bethnal Green","Hollybush Gardens, Bethnal Green",578.0,51.527607,-0.057133,32.0


In [17]:
# save the 2021 result to a .csv for further usage
round2021.to_csv('round2021.csv', index=False)