# Processing of TfL Cycle Data

For CUSP London Data Dive 2021

Data downloaded from https://cycling.data.tfl.gov.uk

In [4]:
import pandas as pd
import numpy as np

## Processing Stop Points

Merges data from API call and from csv database to result in a csv output of stations.

In [None]:
# Reads json data and cleans it
api_stations = pd.read_json("BikePoint.json")
api_stations.drop(['$type', 'url', 'placeType', 
                   'additionalProperties', 'children', 'childrenUrls'], axis=1, inplace=True)
api_stations

In [None]:
api_stations.id = api_stations.id.apply(lambda x: x[11:]).astype(int)
api_stations.rename({'commonName': 'StationName', 'lat': 'latitude', 'lon': 'longitude'}, axis=1, inplace=True)
api_stations.set_index('id', inplace=True)
api_stations

In [None]:
# Reads csv data and cleans it
csv_stations = pd.read_csv('stations.csv')
csv_stations.rename({'Station.Id': 'id'}, axis=1, inplace=True)
csv_stations.set_index('id', inplace=True)
csv_stations

In [None]:
# Merges both datasets, adding values missing values from the api set to the csv set 
all_stations = csv_stations.combine_first(api_stations)
all_stations

In [None]:
all_stations.to_csv('all_stations.csv')

In [None]:
# Unfortunate case that some stations with same ID are different in API than CSV!
all_stations.loc[355]

In [None]:
csv_stations.loc[355]

In [None]:
api_stations.loc[355]

## Creating statistics using Journey csv files

Using QGIS, stations have been classified as in Central London or not

This section works through statistic creation for one dataset

In [1]:
def bucket_duration(dur):
    """Buckets duration into a string category"""
    if dur <= 5:
        return '1_5'
    elif dur <= 10:
        return '6_10'
    elif dur <= 15:
        return '11_15'
    elif dur <= 20:
        return '16_20'
    elif dur <= 25:
        return '21_25'
    elif dur <= 30:
        return '26_30'
    elif dur <= 40:
        return '31_40'
    elif dur <= 50:
        return '41_50'
    elif dur <= 60:
        return '51_60'
    elif dur <= 120:
        return '61_120'
    else:
        return '120_'

In [5]:
# Reads in station data
stations = pd.read_csv("all_stations_city.csv", index_col='id')
stations

Unnamed: 0_level_0,Easting,Northing,StationName,latitude,longitude,city
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,531202.520,182832.020,"River Street, Clerkenwell",51.529200,-0.109971,False
2,525207.070,179391.860,"Phillimore Gardens, Kensington",51.499600,-0.197574,False
3,532984.810,182001.530,"Christopher Street, Liverpool Street",51.521300,-0.084606,True
4,530436.760,182911.990,"St. Chad's Street, King's Cross",51.530100,-0.120974,False
5,528051.649,178742.097,"Sedding Street, Sloane Square",51.493100,-0.156876,False
...,...,...,...,...,...,...
840,,,"George Row, Bermondsey",51.498585,-0.068981,False
841,,,"Tower Wharf, Bermondsey",51.500845,-0.074704,False
842,,,"Temple Gardens, Temple",51.510981,-0.108322,True
844,,,"Canada Water Station, Rotherhithe",51.498439,-0.049150,False


In [6]:
# A test with first dataset
bike = pd.read_csv("cycle_data/143JourneyDataExtract02Jan2019-08Jan2019.csv")
bike

Unnamed: 0,Rental Id,Duration,Bike Id,End Date,EndStation Id,EndStation Name,Start Date,StartStation Id,StartStation Name
0,83284852,660,8282,02/01/2019 17:47,94,"Bricklayers Arms, Borough",02/01/2019 17:36,197,"Stamford Street, South Bank"
1,83360769,180,4657,06/01/2019 18:14,94,"Bricklayers Arms, Borough",06/01/2019 18:11,269,"Empire Square, The Borough"
2,83280311,960,6306,02/01/2019 14:49,374,"Waterloo Station 1, Waterloo",02/01/2019 14:33,5,"Sedding Street, Sloane Square"
3,83323626,120,6217,04/01/2019 12:54,269,"Empire Square, The Borough",04/01/2019 12:52,94,"Bricklayers Arms, Borough"
4,83343322,120,848,05/01/2019 16:03,269,"Empire Square, The Borough",05/01/2019 16:01,94,"Bricklayers Arms, Borough"
...,...,...,...,...,...,...,...,...,...
142408,83303300,1140,2590,03/01/2019 16:22,45,"Boston Place, Marylebone",03/01/2019 16:03,562,"Bury Place, Holborn"
142409,83309255,420,5011,03/01/2019 18:44,123,"St. John Street, Finsbury",03/01/2019 18:37,562,"Bury Place, Holborn"
142410,83301941,2100,3794,03/01/2019 15:23,89,"Tavistock Place, Bloomsbury",03/01/2019 14:48,562,"Bury Place, Holborn"
142411,83401636,1260,4278,08/01/2019 12:25,254,"Chadwell Street, Angel",08/01/2019 12:04,562,"Bury Place, Holborn"


In [7]:
# Converts dates to workable format
bike['Start Date'] = pd.to_datetime(bike['Start Date'], format='%d/%m/%Y %H:%M').dt.floor('d')
# Converts duration seconds to minutes
bike['Duration'] = bike.Duration // 60
# Merges city/non-city classification
bike = bike.merge(stations.city, left_on='StartStation Id', right_on='id', how='left')
bike.rename({"city": "start_city"}, axis=1, inplace=True)
bike = bike.merge(stations.city, left_on='EndStation Id', right_on='id', how='left')
bike.rename({"city": "end_city"}, axis=1, inplace=True)
# Should do nothing as there should be no missing stations
bike.dropna(axis=0, how='any', inplace=True)
bike

Unnamed: 0,Rental Id,Duration,Bike Id,End Date,EndStation Id,EndStation Name,Start Date,StartStation Id,StartStation Name,start_city,end_city
0,83284852,11,8282,02/01/2019 17:47,94,"Bricklayers Arms, Borough",2019-01-02,197,"Stamford Street, South Bank",True,False
1,83360769,3,4657,06/01/2019 18:14,94,"Bricklayers Arms, Borough",2019-01-06,269,"Empire Square, The Borough",False,False
2,83280311,16,6306,02/01/2019 14:49,374,"Waterloo Station 1, Waterloo",2019-01-02,5,"Sedding Street, Sloane Square",False,False
3,83323626,2,6217,04/01/2019 12:54,269,"Empire Square, The Borough",2019-01-04,94,"Bricklayers Arms, Borough",False,False
4,83343322,2,848,05/01/2019 16:03,269,"Empire Square, The Borough",2019-01-05,94,"Bricklayers Arms, Borough",False,False
...,...,...,...,...,...,...,...,...,...,...,...
142408,83303300,19,2590,03/01/2019 16:22,45,"Boston Place, Marylebone",2019-01-03,562,"Bury Place, Holborn",True,False
142409,83309255,7,5011,03/01/2019 18:44,123,"St. John Street, Finsbury",2019-01-03,562,"Bury Place, Holborn",True,False
142410,83301941,35,3794,03/01/2019 15:23,89,"Tavistock Place, Bloomsbury",2019-01-03,562,"Bury Place, Holborn",True,False
142411,83401636,21,4278,08/01/2019 12:25,254,"Chadwell Street, Angel",2019-01-08,562,"Bury Place, Holborn",True,False


In [8]:
# Adds boolean fields classifying ride types
bike['city'] = bike.start_city & bike.end_city
bike['suburb'] = ~bike.start_city & ~bike.end_city
bike['city_suburb'] = np.logical_xor(bike.start_city, bike.end_city)

In [9]:
# Groups by start date to generate stats
bgb = bike.groupby(by='Start Date')
data = bgb[['Duration', 'city', 'suburb', 'city_suburb']].sum()
data['count'] = bgb.Duration.count().rename('count')

In [10]:
# Fetches duration statistics
data['mean_duration'] = bgb.Duration.mean()
data['median_duration'] = bgb.Duration.median()
data['max_duration'] = bgb.Duration.max()

In [11]:
# Adds duration buckets to data
bike['duration_bucket'] = bike.Duration.apply(bucket_duration)
data = pd.concat((data, bike.groupby(['Start Date', 'duration_bucket'])['Duration'].count().unstack()), axis=1)
data

Unnamed: 0_level_0,Duration,city,suburb,city_suburb,count,mean_duration,median_duration,max_duration,11_15,120_,16_20,1_5,21_25,26_30,31_40,41_50,51_60,61_120,6_10
Start Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2019-01-02,358875,2138,11813,5601,19552,18.3549,13,6578,4253,129,2918,2837,1943,1001,626,276,177,358,5034
2019-01-03,361027,2436,12537,6373,21346,16.913098,12,5551,4802,102,3349,3143,2096,970,514,224,148,251,5747
2019-01-04,344051,2366,12435,5865,20666,16.648166,12,6088,4596,100,3114,3267,1870,886,488,221,134,284,5706
2019-01-05,253234,947,10070,2753,13770,18.390269,12,4244,2736,147,1897,2377,1254,767,469,257,175,242,3449
2019-01-06,294908,921,10549,2661,14131,20.869578,13,3390,2767,158,1976,2162,1518,796,607,299,183,431,3234
2019-01-07,373467,2845,15020,7588,25453,14.672809,12,2290,6018,66,3947,3766,2370,1201,523,184,86,138,7154
2019-01-08,392790,3139,16220,8136,27495,14.28587,12,643,6458,59,4284,4022,2715,1246,559,200,92,186,7674


In [12]:
# Sanity checks
print(data[['city', 'suburb', 'city_suburb']].sum(axis=1) == data['count'])
print(data.Duration / data['count'] == data.mean_duration)

Start Date
2019-01-02    True
2019-01-03    True
2019-01-04    True
2019-01-05    True
2019-01-06    True
2019-01-07    True
2019-01-08    True
dtype: bool
Start Date
2019-01-02    True
2019-01-03    True
2019-01-04    True
2019-01-05    True
2019-01-06    True
2019-01-07    True
2019-01-08    True
dtype: bool


## Generating data from each set in turn

Creates dataset and saves as csv

In [13]:
# Gets list of all files
files = !ls cycle_data
files.remove("143JourneyDataExtract02Jan2019-08Jan2019.csv")

In [14]:
for file in files:
    print(file)
    curr_bike = pd.read_csv("cycle_data/" + file)
    # Converts dates to workable format
    curr_bike['Start Date'] = pd.to_datetime(curr_bike['Start Date'], format='%d/%m/%Y %H:%M').dt.floor('d')
    # Converts duration seconds to minutes
    curr_bike['Duration'] = curr_bike.Duration // 60
    # Merges city/non-city classification
    curr_bike = curr_bike.merge(stations.city, left_on='StartStation Id', right_on='id', how='left')
    curr_bike.rename({"city": "start_city"}, axis=1, inplace=True)
    curr_bike = curr_bike.merge(stations.city, left_on='EndStation Id', right_on='id', how='left')
    curr_bike.rename({"city": "end_city"}, axis=1, inplace=True)
    # Should do nothing as there should be no missing stations
    curr_bike.dropna(axis=0, how='any', inplace=True)
    # Adds boolean fields classifying ride types
    curr_bike['city'] = curr_bike.start_city & curr_bike.end_city
    curr_bike['suburb'] = ~curr_bike.start_city & ~curr_bike.end_city
    curr_bike['city_suburb'] = np.logical_xor(curr_bike.start_city, curr_bike.end_city)
    # Groups by start date to generate stats
    curr_bgb = curr_bike.groupby(by='Start Date')
    curr_data = curr_bgb[['Duration', 'city', 'suburb', 'city_suburb']].sum()
    curr_data['count'] = curr_bgb.Duration.count().rename('count')
    # Fetches duration statistics
    curr_data['mean_duration'] = curr_bgb.Duration.mean()
    curr_data['median_duration'] = curr_bgb.Duration.median()
    curr_data['max_duration'] = curr_bgb.Duration.max()
    # Adds duration buckets to data
    curr_bike['duration_bucket'] = curr_bike.Duration.apply(bucket_duration)
    data = data.append(pd.concat((curr_data,
                                  curr_bike.groupby(['Start Date',
                                                     'duration_bucket'])['Duration'].count().unstack()),
                          axis=1))

144JourneyDataExtract09Jan2019-15Jan2019.csv
145JourneyDataExtract16Jan2019-22Jan2019.csv
146JourneyDataExtract23Jan2019-29Jan2019.csv
147JourneyDataExtract30Jan2019-05Feb2019.csv
148JourneyDataExtract06Feb2019-12Feb2019.csv
149JourneyDataExtract13Feb2019-19Feb2019.csv
150JourneyDataExtract20Feb2019-26Feb2019.csv
151JourneyDataExtract27Feb2019-05Mar2019.csv
152JourneyDataExtract06Mar2019-12Mar2019.csv
153JourneyDataExtract13Mar2019-19Mar2019.csv
154JourneyDataExtract20Mar2019-26Mar2019.csv
155JourneyDataExtract27Mar2019-02Apr2019.csv
156JourneyDataExtract03Apr2019-09Apr2019.csv
157JourneyDataExtract10Apr2019-16Apr2019.csv
158JourneyDataExtract17Apr2019-23Apr2019.csv
159JourneyDataExtract24Apr2019-30Apr2019.csv
160JourneyDataExtract01May2019-07May2019.csv
161JourneyDataExtract08May2019-14May2019.csv
162JourneyDataExtract15May2019-21May2019.csv
163JourneyDataExtract22May2019-28May2019.csv
164JourneyDataExtract29May2019-04Jun2019.csv
165JourneyDataExtract05Jun2019-11Jun2019.csv
166Journey

In [15]:
data

Unnamed: 0_level_0,Duration,city,suburb,city_suburb,count,mean_duration,median_duration,max_duration,11_15,120_,16_20,1_5,21_25,26_30,31_40,41_50,51_60,61_120,6_10
Start Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2019-01-02,358875,2138,11813,5601,19552,18.354900,13,6578,4253,129,2918,2837,1943,1001,626,276,177,358,5034
2019-01-03,361027,2436,12537,6373,21346,16.913098,12,5551,4802,102,3349,3143,2096,970,514,224,148,251,5747
2019-01-04,344051,2366,12435,5865,20666,16.648166,12,6088,4596,100,3114,3267,1870,886,488,221,134,284,5706
2019-01-05,253234,947,10070,2753,13770,18.390269,12,4244,2736,147,1897,2377,1254,767,469,257,175,242,3449
2019-01-06,294908,921,10549,2661,14131,20.869578,13,3390,2767,158,1976,2162,1518,796,607,299,183,431,3234
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-02-05,483698,1224,14290,3758,19272,25.098485,16,5226,3707,303,3283,1809,2477,1533,982,450,327,735,3666
2021-02-06,509337,903,14357,3575,18835,27.042049,18,4424,3156,391,3075,1453,2682,1735,1301,612,442,944,3044
2021-02-07,137829,332,4811,1052,6195,22.248426,14,2620,1217,69,1014,774,648,402,248,121,75,184,1443
2021-02-08,106558,455,4808,1368,6631,16.069673,13,1812,1455,26,1075,878,712,380,212,72,44,54,1723


In [16]:
# Adds weekend indicator
data['weekend'] = data.index.weekday >= 5

In [17]:
# Sanity checks - should both print 0
print((data[['city', 'suburb', 'city_suburb']].sum(axis=1) != data['count']).sum())
print((data.Duration / data['count'] != data.mean_duration).sum())

0
0


In [18]:
# Saves data to csv
data.to_csv('bike_output.csv')

## Scratch Space

Further processing of the data in order to find further statistics and charts

In [22]:
data[data['count']==data['count'].min()]

Unnamed: 0_level_0,Duration,city,suburb,city_suburb,count,mean_duration,median_duration,max_duration,11_15,120_,16_20,1_5,21_25,26_30,31_40,41_50,51_60,61_120,6_10,weekend
Start Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-04-28,91589,238,3685,848,4771,19.197024,15,466,927,43,802,527,573,317,225,102,60,113,1082,False


In [25]:
data[data['suburb'] / data['count'] == (data['suburb'] / data['count']).max()]

Unnamed: 0_level_0,Duration,city,suburb,city_suburb,count,mean_duration,median_duration,max_duration,11_15,120_,16_20,1_5,21_25,26_30,31_40,41_50,51_60,61_120,6_10,weekend
Start Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-03-29,228196,362,5909,971,7242,31.51008,19,2649,1119,154,1094,599,908,654,528,329,243,568,1046,True


In [28]:
1-(data['suburb'] / data['count']).max()

0.18406517536592104

In [29]:
data[data.max_duration == data.max_duration.max()]

Unnamed: 0_level_0,Duration,city,suburb,city_suburb,count,mean_duration,median_duration,max_duration,11_15,120_,16_20,1_5,21_25,26_30,31_40,41_50,51_60,61_120,6_10,weekend
Start Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-04-01,254910,442,7142,1509,9093,28.033652,17,9911,1587,156,1448,888,1206,707,511,313,225,466,1586,False


In [30]:
9911/60/24

6.882638888888889

In [31]:
data[data.median_duration == data.median_duration.max()]

Unnamed: 0_level_0,Duration,city,suburb,city_suburb,count,mean_duration,median_duration,max_duration,11_15,120_,16_20,1_5,21_25,26_30,31_40,41_50,51_60,61_120,6_10,weekend
Start Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2020-05-09,2710768,2113,37789,7573,47475,57.098852,27,4403,4738,5734,5759,1745,6480,4978,3932,2522,2035,6071,3481,True


In [113]:
data.median_duration['2020-03-21':].mean()

17.08282208588957

In [112]:
data.median_duration['2020-03-21':].mean()/data.median_duration[:'2020-03-20'].mean()

1.2835967179108088

In [54]:
data[data.weekend][['city', 'suburb', 'city_suburb', 'count']]['2020-03-21':].mean()

city            1741.180851
suburb         24767.085106
city_suburb     5759.563830
count          32267.829787
dtype: float64

In [None]:
# data
before lockdown
weekend
city            1445.142857
suburb         16215.619048
city_suburb     3938.912698

non-weekend
city            3039.468553
suburb         18037.647799
city_suburb     8283.814465

after lockdown
weekend
city            1741.180851
suburb         24767.085106
city_suburb     5759.563830

non-weekend
city            1544.862069
suburb         19224.633621
city_suburb     5115.943966

In [96]:
first_year = data[['city', 'suburb', 'city_suburb']]['2019-01-04':'2020-02-09']
second_year = data[['city', 'suburb', 'city_suburb']]['2020-01-03':'2021-02-07']
first_year

Unnamed: 0_level_0,city,suburb,city_suburb
Start Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-01-04,2366,12435,5865
2019-01-05,947,10070,2753
2019-01-06,921,10549,2661
2019-01-07,2845,15020,7588
2019-01-08,3139,16220,8136
...,...,...,...
2020-02-05,3159,17341,8944
2020-02-06,3225,17125,8884
2020-02-07,3103,16803,8075
2020-02-08,1540,15792,4214


In [97]:
second_year

Unnamed: 0_level_0,city,suburb,city_suburb
Start Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-03,1880,11505,4860
2020-01-04,1113,11526,2867
2020-01-05,765,9750,2309
2020-01-06,2588,13735,7214
2020-01-07,2956,16024,8253
...,...,...,...
2021-02-03,803,9267,2814
2021-02-04,671,8049,2336
2021-02-05,1224,14290,3758
2021-02-06,903,14357,3575


In [98]:
difference = second_year.reset_index(drop=True).subtract(first_year.reset_index(drop=True))
difference.set_index(second_year.index).to_csv('difference.csv')

Unnamed: 0,city,suburb,city_suburb
0,-486,-930,-1005
1,166,1456,114
2,-156,-799,-352
3,-257,-1285,-374
4,-183,-196,117
...,...,...,...
397,-2356,-8074,-6130
398,-2554,-9076,-6548
399,-1879,-2513,-4317
400,-637,-1435,-639


In [100]:
difference = second_year.reset_index(drop=True).divide(first_year.reset_index(drop=True))
difference.set_index(second_year.index).to_csv('division.csv')

In [111]:
buckets = ['1_5', '6_10', '11_15', '16_20', '21_25', '26_30', '31_40', '41_50', '51_60', '61_120', '120_']
bucket_data = pd.concat((data[data.weekend][buckets][:'2020-03-20'].mean(),
           data[~data.weekend][buckets][:'2020-03-20'].mean(),
           data[data.weekend][buckets]['2020-03-21':].mean(),
           data[~data.weekend][buckets]['2020-03-21':].mean()), axis=1)
bucket_data.rename({0: 'weekend pre-lockdown', 1: 'weekday pre-lockdown',
                    2: 'weekend post-lockdown', 3: 'weekday post-lockdown'}, axis=1).to_csv('buckets.csv')

In [114]:
stations

Unnamed: 0_level_0,Easting,Northing,StationName,latitude,longitude,city
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,531202.520,182832.020,"River Street, Clerkenwell",51.529200,-0.109971,False
2,525207.070,179391.860,"Phillimore Gardens, Kensington",51.499600,-0.197574,False
3,532984.810,182001.530,"Christopher Street, Liverpool Street",51.521300,-0.084606,True
4,530436.760,182911.990,"St. Chad's Street, King's Cross",51.530100,-0.120974,False
5,528051.649,178742.097,"Sedding Street, Sloane Square",51.493100,-0.156876,False
...,...,...,...,...,...,...
840,,,"George Row, Bermondsey",51.498585,-0.068981,False
841,,,"Tower Wharf, Bermondsey",51.500845,-0.074704,False
842,,,"Temple Gardens, Temple",51.510981,-0.108322,True
844,,,"Canada Water Station, Rotherhithe",51.498439,-0.049150,False
