In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import json
import re
from shapely.geometry import Point, LineString #this library is for manipulating geometric objects, and it is what geopandas uses to store geometries
from scipy.spatial import distance

In [2]:
df = pd.read_csv('dataset_new.csv')

In [3]:
df.columns

Index(['Start.date', 'Start.station', 'End.date', 'End.station', 'id', 'lon',
       'lat'],
      dtype='object')

In [4]:
#covert the listing to the geo dataframe
g_df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs='epsg:27700')
g_df.crs

<Derived Projected CRS: EPSG:27700>
Name: OSGB36 / British National Grid
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: United Kingdom (UK) - offshore to boundary of UKCS within 49°45'N to 61°N and 9°W to 2°E; onshore Great Britain (England, Wales and Scotland). Isle of Man onshore.
- bounds: (-9.0, 49.75, 2.01, 61.01)
Coordinate Operation:
- name: British National Grid
- method: Transverse Mercator
Datum: Ordnance Survey of Great Britain 1936
- Ellipsoid: Airy 1830
- Prime Meridian: Greenwich

In [5]:
g_df.head()

Unnamed: 0,Start.date,Start.station,End.date,End.station,id,lon,lat,geometry
0,2023/3/31 23:57,"Vauxhall Cross, Vauxhall",2023/3/31 23:58,"Kennington Lane Rail Bridge, Vauxhall",BikePoints_74,-0.124469,51.485917,POINT (-0.124 51.486)
1,2023/3/31 23:54,"Ossulston Street, Somers Town",2023/3/31 23:56,"Doric Way , Somers Town",BikePoints_797,-0.128279,51.5293,POINT (-0.128 51.529)
2,2023/3/31 23:54,"Parson's Green , Parson's Green",2023/3/31 23:58,"Elysium Place, Fulham",BikePoints_596,-0.199783,51.472817,POINT (-0.200 51.473)
3,2023/3/31 23:52,"Westfield Ariel Way, White City",2023/3/31 23:59,"St. Mark's Road, North Kensington",BikePoints_566,-0.224103,51.509158,POINT (-0.224 51.509)
4,2023/3/31 23:52,"Berry Street, Clerkenwell",2023/3/31 23:59,"Queen Street 1, Bank",BikePoints_246,-0.099994,51.522853,POINT (-0.100 51.523)


In [6]:
# Convert the Start.date and End.date to datetime format
g_df['Start.date'] = pd.to_datetime(g_df['Start.date'])
g_df['End.date'] = pd.to_datetime(g_df['End.date'])

# Set the time period
start_time = "07:00:00"
end_time = "09:00:00"

In [7]:
g_df

Unnamed: 0,Start.date,Start.station,End.date,End.station,id,lon,lat,geometry
0,2023-03-31 23:57:00,"Vauxhall Cross, Vauxhall",2023-03-31 23:58:00,"Kennington Lane Rail Bridge, Vauxhall",BikePoints_74,-0.124469,51.485917,POINT (-0.124 51.486)
1,2023-03-31 23:54:00,"Ossulston Street, Somers Town",2023-03-31 23:56:00,"Doric Way , Somers Town",BikePoints_797,-0.128279,51.529300,POINT (-0.128 51.529)
2,2023-03-31 23:54:00,"Parson's Green , Parson's Green",2023-03-31 23:58:00,"Elysium Place, Fulham",BikePoints_596,-0.199783,51.472817,POINT (-0.200 51.473)
3,2023-03-31 23:52:00,"Westfield Ariel Way, White City",2023-03-31 23:59:00,"St. Mark's Road, North Kensington",BikePoints_566,-0.224103,51.509158,POINT (-0.224 51.509)
4,2023-03-31 23:52:00,"Berry Street, Clerkenwell",2023-03-31 23:59:00,"Queen Street 1, Bank",BikePoints_246,-0.099994,51.522853,POINT (-0.100 51.523)
...,...,...,...,...,...,...,...,...
575350,2023-03-20 00:03:00,"Earnshaw Street , Covent Garden",2023-03-20 00:39:00,"Bow Church Station, Bow",BikePoints_244,-0.128585,51.516118,POINT (-0.129 51.516)
575351,2023-03-20 00:02:00,"Euston Road, Euston",2023-03-20 00:21:00,"Great Suffolk Street, The Borough",BikePoints_69,-0.134407,51.526236,POINT (-0.134 51.526)
575352,2023-03-20 00:01:00,"Malet Street, Bloomsbury",2023-03-20 00:17:00,"Northington Street , Holborn",BikePoints_12,-0.130431,51.521680,POINT (-0.130 51.522)
575353,2023-03-20 00:00:00,"Tooley Street, Bermondsey",2023-03-20 00:33:00,"Empire Square, The Borough",BikePoints_278,-0.079620,51.503493,POINT (-0.080 51.503)


In [8]:
# Filter the dataset for rides between 07:00 and 09:00 during the period 17-23 October 2022
filtered_df = g_df[(g_df['Start.date'].dt.time >= pd.to_datetime(start_time).time()) &
                     (g_df['End.date'].dt.time <= pd.to_datetime(end_time).time()) &
                     (g_df['Start.date'].dt.date >= pd.to_datetime("2023-03-01").date()) &
                     (g_df['End.date'].dt.date <= pd.to_datetime("2023-03-31").date())]

In [9]:
filtered_df

Unnamed: 0,Start.date,Start.station,End.date,End.station,id,lon,lat,geometry
8308,2023-03-31 08:59:00,"Empire Square, The Borough",2023-03-31 09:00:00,"Borough High Street, The Borough",BikePoints_269,-0.089740,51.500823,POINT (-0.090 51.501)
8342,2023-03-31 08:57:00,"Northington Street , Holborn",2023-03-31 09:00:00,"Red Lion Street, Holborn",BikePoints_22,-0.114079,51.522264,POINT (-0.114 51.522)
8345,2023-03-31 08:58:00,"Newgate Street , St. Paul's",2023-03-31 09:00:00,"New Fetter Lane, Holborn",BikePoints_71,-0.098850,51.515418,POINT (-0.099 51.515)
8346,2023-03-31 08:58:00,"Clerkenwell Green, Clerkenwell",2023-03-31 09:00:00,"St. John Street, Finsbury",BikePoints_135,-0.104708,51.523260,POINT (-0.105 51.523)
8349,2023-03-31 08:57:00,"Sadlers Sports Centre, Finsbury",2023-03-31 09:00:00,"Bunhill Row, Moorgate",BikePoints_78,-0.099489,51.524868,POINT (-0.099 51.525)
...,...,...,...,...,...,...,...,...
574310,2023-03-20 07:00:00,"Newgate Street , St. Paul's",2023-03-20 07:08:00,"Swan Street, The Borough",BikePoints_71,-0.098850,51.515418,POINT (-0.099 51.515)
574311,2023-03-20 07:00:00,"Smugglers Way, Wandsworth",2023-03-20 07:01:00,"Smugglers Way, Wandsworth",BikePoints_665,-0.191722,51.461083,POINT (-0.192 51.461)
574312,2023-03-20 07:00:00,"Swan Street, The Borough",2023-03-20 07:05:00,"Strata, Elephant & Castle",BikePoints_295,-0.092762,51.500296,POINT (-0.093 51.500)
574325,2023-03-20 07:00:00,"Arlington Road, Camden Town",2023-03-20 07:28:00,"Fore Street, Guildhall",BikePoints_545,-0.145246,51.539957,POINT (-0.145 51.540)


In [10]:
columns = ['Start.station','End.station','geometry']
filtered_df= filtered_df[columns]
filtered_df

Unnamed: 0,Start.station,End.station,geometry
8308,"Empire Square, The Borough","Borough High Street, The Borough",POINT (-0.090 51.501)
8342,"Northington Street , Holborn","Red Lion Street, Holborn",POINT (-0.114 51.522)
8345,"Newgate Street , St. Paul's","New Fetter Lane, Holborn",POINT (-0.099 51.515)
8346,"Clerkenwell Green, Clerkenwell","St. John Street, Finsbury",POINT (-0.105 51.523)
8349,"Sadlers Sports Centre, Finsbury","Bunhill Row, Moorgate",POINT (-0.099 51.525)
...,...,...,...
574310,"Newgate Street , St. Paul's","Swan Street, The Borough",POINT (-0.099 51.515)
574311,"Smugglers Way, Wandsworth","Smugglers Way, Wandsworth",POINT (-0.192 51.461)
574312,"Swan Street, The Borough","Strata, Elephant & Castle",POINT (-0.093 51.500)
574325,"Arlington Road, Camden Town","Fore Street, Guildhall",POINT (-0.145 51.540)


In [11]:
# Suppose filtered_df is a filtered version of the original dataframe df
filtered_df1 = filtered_df.copy()

# Now you can modify filtered_df without warnings
filtered_df1['Route'] = filtered_df1['Start.station'] + ' - ' + filtered_df1['End.station']
df_route_counts = filtered_df1.groupby('Route').size().reset_index(name='Counts')
df_route_counts = df_route_counts.sort_values('Counts', ascending=False)


In [12]:
df_route_counts[['Start.station', 'End.station']] = df_route_counts['Route'].str.split(' - ', expand=True)

In [13]:
df_route_counts

Unnamed: 0,Route,Counts,Start.station,End.station
140,"Ackroyd Drive, Bow - Maplin Street, Mile End",98,"Ackroyd Drive, Bow","Maplin Street, Mile End"
30662,"Waterloo Station 3, Waterloo - Newgate Street ...",85,"Waterloo Station 3, Waterloo","Newgate Street , St. Paul's"
22820,"Rainville Road, Hammersmith - Barons Court Sta...",77,"Rainville Road, Hammersmith","Barons Court Station, West Kensington"
30659,"Waterloo Station 3, Waterloo - New Fetter Lane...",70,"Waterloo Station 3, Waterloo","New Fetter Lane, Holborn"
30554,"Waterloo Station 3, Waterloo - Cheapside, Bank",68,"Waterloo Station 3, Waterloo","Cheapside, Bank"
...,...,...,...,...
19049,"Nantes Close, Wandsworth - St. John's Road, Cl...",1,"Nantes Close, Wandsworth","St. John's Road, Clapham Junction"
19048,"Nantes Close, Wandsworth - St Mary's Hospital,...",1,"Nantes Close, Wandsworth","St Mary's Hospital, Paddington"
19047,"Nantes Close, Wandsworth - South Park, Sands End",1,"Nantes Close, Wandsworth","South Park, Sands End"
7719,"Crimscott Street, Bermondsey - Braham Street, ...",1,"Crimscott Street, Bermondsey","Braham Street, Aldgate"


In [14]:
df_route_counts.to_csv('output_new.csv', index=False)