## 1. Bicycle accident selected

#### 1.1 Filter out bicycle crashes (88383), remove duplicate rows with accident_index (86810)

In [None]:
import pandas as pd
import geopandas as gp
import matplotlib.pyplot as plt
from shapely.geometry import Point
import contextily as ctx

In [None]:
df = pd.read_csv("Cycling/GOV/00_dft-road-casualty-statistics-vehicle-last-5-years.csv")

df = df[df["vehicle_type"] == 1]

df.drop(df[df["vehicle_type"] != 1].index, inplace=True)

print(df)

In [None]:
# Delete duplicate rows
df = df.drop_duplicates(subset='accident_index')

# Save the results to a new CSV file
df.to_csv("Cycling/GOV_Selected/01_Pedal Cycle_accident reference_clean_2.csv", index=False)

print(df)

#### 1.2 Merging of 'Accident' data sets (86810)

In [None]:
# Read in two CSV files and set the data type of the accident_index column to string
df_accidents = pd.read_csv("Cycling/GOV/00_dft-road-casualty-statistics-accident-last-5-years.csv", dtype={'accident_index': str})
df_pedal_cycle = pd.read_csv("Cycling/GOV_Selected/01_Pedal Cycle_accident reference_clean_2.csv", dtype={'accident_index': str})

# Use the merge function to join the two DataFrames according to the accident_reference column
merged_df = pd.merge(df_pedal_cycle, df_accidents, on='accident_index', how='inner')

# output result
print(merged_df)

# Save the results to a new CSV file
merged_df.to_csv("Cycling/GOV_Selected/02_Pedal Cycle_Accident_UK_2.csv", index=False)

#### 1.3 Filtering out cycle crashes in London (24820)

In [None]:
from shapely.geometry import Point

Outer = gp.read_file("London shp/merged_london.shp")

CycleAccident = pd.read_csv("Cycling/GOV_Selected/02_Pedal Cycle_Accident_UK_2.csv")

x = CycleAccident['location_easting_osgr']
y = CycleAccident['location_northing_osgr']

# Creating geometric objects of type Point
geometry = [Point(xy) for xy in zip(CycleAccident['location_easting_osgr'], CycleAccident['location_northing_osgr'])]

# Converting CycleAccident to GeoDataFrame
gdf_cycle_accident = gp.GeoDataFrame(CycleAccident, crs=Outer.crs, geometry=geometry)

# Use Spatial Join to get points in CycleAccident that are inside Outer
points_inside_outer = gp.sjoin(gdf_cycle_accident, Outer, op='within')

print(points_inside_outer)

# Save as a new CSV file
points_inside_outer.to_csv("Cycling/GOV_Selected/03_Pedal Cycle_Accident_London_2.csv", index=False)

## 2. Find out the roads where all the bicycle accidents in GLA

In [None]:
pip install contextily

In [None]:
pip install basemap

In [None]:
import pandas as pd
import geopandas as gp
import matplotlib.pyplot as plt
from shapely.geometry import Point
import contextily as ctx

#### 2.1 Read the Great London Area boundary

In [None]:
# Read shapefile and csv
Outer = gp.read_file("London shp/merged_london.shp")
CountPoint = pd.read_csv('Data/0-Count locations.csv')

#### 2.2 Read the GLA roads

In [None]:
Road_within_Outer = gp.read_file("Road/oproad shp/Road_Outer/RoadLink_Outer_2.shp")

In [None]:
# Convert easting and northing to Points
geometry = [Point(xy) for xy in zip(CountPoint['Easting (UK Grid)'], CountPoint['Northing (UK Grid)'])]
CountPoint = gp.GeoDataFrame(CountPoint, geometry=geometry)

# Set the CRS for both GeoDataFrames to British National Grid (EPSG:27700)
CountPoint.crs = 'epsg:27700'
Outer.crs = 'epsg:27700'

#### 2.3 Print out the GLA boundary with all roads

In [None]:
# Creating graphic objects
fig, ax = plt.subplots(figsize=(50, 50))

# Read Shapefile Data
Outer.plot(ax=ax, color='red', alpha=0.05, edgecolor='red',linewidth=3, label='Shapefile')
Outer.plot(ax=ax, color='none', edgecolor='red',linewidth=3, label='Shapefile')

# Plot road of GLA
Road_within_Outer.plot(ax=ax, color='gray',linewidth=0.8, label='Road')

# Add the Google Map as a background using contextily
ctx.add_basemap(ax, crs=CountPoint.crs, source=ctx.providers.Stamen.TonerLite, alpha=1)

# add legend
ax.grid(False)
ax.set_title("Survey Region: London", fontsize=40)

# Hide axis Latitude and Longitude numbers
ax.set_xticks([])
ax.set_yticks([])

# save as image
# plt.savefig('Output_jpg/01_Survey Region.png', bbox_inches='tight', dpi=600)

# plot graphic
plt.show()

#### 2.4 Plot the points of all cycle accidents in London over a five-year period

In [None]:
LondonAccident = pd.read_csv("Cycling/GOV_Selected/03_Pedal Cycle_Accident_London_2.csv")
x = LondonAccident['location_easting_osgr']
y = LondonAccident['location_northing_osgr']

In [None]:
plt.show()

fig, ax = plt.subplots(figsize=(50, 50))

Outer.plot(ax=ax, color='none', edgecolor='black',linewidth=3, label='Shapefile')

ax.scatter(x, y, s=1, color='red', label='Cycle Accidents', zorder=2)

Road_within_Outer.plot(ax=ax, color='gray',linewidth=0.5, label='Road', zorder=1)

ax.legend()
ax.grid(False)
ax.set_title("Cycle Accidents and Count Locations in London", fontsize=40)

ax.set_xticks([])
ax.set_yticks([])

plt.savefig('Output_jpg/05_Cycle Accident and Count Locations_no basemap.png', bbox_inches='tight', dpi=600)

plt.show()

#### 2.5 Calculate the road where the accident occurred

In [None]:
from shapely.geometry import Point

# Creating graphic objects
fig, ax = plt.subplots(figsize=(50, 50))

# Plotting Shapefile data
Outer.plot(ax=ax, color='none', edgecolor='black',linewidth=3, label='Shapefile')

# Plotting GeoPackage data
Road_within_Outer.plot(ax=ax, color='gray',linewidth=0.3, label='Road', zorder=1)

# Plotting the points of cycle accidents
ax.scatter(x, y, s=1, color='red', label='Cycle Accidents', zorder=2)

def find_closest(row, roads):
    return roads.geometry.distance(row.geometry).idxmin()

# Creating point geometry objects
points = gp.GeoDataFrame(geometry=gp.points_from_xy(x, y))

points['closest_road'] = points.apply(find_closest, roads=Road_within_Outer, axis=1)

# Now, points['closest_road'] contains an index of the nearest road for each point.
# We can filter the corresponding roads from the Road DataFrame based on these indexes.

closest_roads_accident = Road_within_Outer.loc[points['closest_road']]

# Plotting the nearest road to each point
closest_roads_accident.plot(ax=ax, color='red', linewidth=0.5, label='Accident Closest Roads')

# Add legend
ax.legend()
ax.set_title("Cycle Accidents with Road Segment in London 2017-2021", fontsize=60)

# Save as image
# plt.savefig('Output_jpg/06_Cycle Accident Locations with Road Segment.png', bbox_inches='tight', dpi=600)

# Plot graphic
plt.show()

#### 2.6 Deposit the road on which the accident occurred into the accident dataset

In [None]:
# Add other attributes of the nearest road to points
attributes = ['identifier', 'class', 'name1', 'formOfWay', 'length']
for attribute in attributes:
    points[f'closest_road_{attribute}'] = points['closest_road'].map(Road_within_Outer[attribute])

# Convert geometry column to two columns: x and y
points['location_easting_osgr'], points['location_northing_osgr'] = points.geometry.x, points.geometry.y

# Merge the original LondonAccident DataFrame with the new points DataFrame
LondonAccident = LondonAccident.merge(points, on=['location_easting_osgr', 'location_northing_osgr'])

# Save as csv
LondonAccident.to_csv('Cycling/GOV_Selected/04_Cycle Accident Location_Closest Road.csv', index=False)

In [None]:
print(LondonAccident)

In [None]:
# Remove duplicate rows
LondonAccident = LondonAccident.drop_duplicates(subset='accident_index')

# Save the results to a new CSV file
LondonAccident.to_csv("Cycling/GOV_Selected/05_Cycle Accident Location_Closest Road_clean.csv", index=False)

print(LondonAccident)

## 3. Counting the number of bicycle accidents on each road

In [None]:
import pandas as pd

In [None]:
Accident = pd.read_csv('Cycling/GOV_Selected/05_Cycle Accident Location_Closest Road_clean.csv')

In [None]:
Accident_road_identifier = Count['closest_road_identifier'].value_counts()
print(Accident_road_identifier)

In [None]:
Accidents_df = pd.DataFrame(Accident_road_identifier.reset_index())
Accidents_df.columns = ['closest_road_identifier', 'accidents']
Accidents_df.to_csv('Cycling/GOV_Selected/06_Cycle Accident_road_identifier.csv', index=False)

## 4. Find out the roads where all the accidents took place in GLA 

In [None]:
# Read shapefile and csv
Outer = gp.read_file("London shp/merged_london.shp")

In [None]:
Road_within_Outer = gp.read_file("Road/oproad shp/Road_Outer/RoadLink_Outer_2.shp")

In [None]:
Outer.crs = 'epsg:27700'

In [None]:
LondonAccident = pd.read_csv("Cycling/GOV_Selected_2/01_ALL_accident_Location_outer.csv")
x = LondonAccident['location_easting_osgr']
y = LondonAccident['location_northing_osgr']

#### 4.1 Plot all accident in GLA

In [None]:
# 显示图形
plt.show()

# 创建图形对象
fig, ax = plt.subplots(figsize=(50, 50))

# 绘制Shapefile数据
Outer.plot(ax=ax, color='none', edgecolor='red',linewidth=3, label='Shapefile')

# 绘制GeoPackage数据
Road_within_Outer.plot(ax=ax, color='gray',linewidth=0.8, label='Road', zorder=1)

# 绘制车祸的点
ax.scatter(x, y, s=3, color='orange', label='All Accidents', zorder=2)

# 添加图例
ax.legend()
ax.grid(False)
ax.set_title("All Accidents Locations in London", fontsize=40)

# 隐藏坐标轴经纬度数字
ax.set_xticks([])
ax.set_yticks([])

# 显示图形
plt.show()

#### 4.2 Calculate the road where all the accidents occurred

In [None]:
from shapely.geometry import Point

# Creating graphic objects
fig, ax = plt.subplots(figsize=(50, 50))

# Plotting Shapefile data
Outer.plot(ax=ax, color='none', edgecolor='black',linewidth=3, label='Shapefile')

# Plotting GeoPackage data
Road_within_Outer.plot(ax=ax, color='gray',linewidth=0.3, label='Road', zorder=1)

# Plotting the all accident points
ax.scatter(x, y, s=1, color='orange', label='All Accidents', zorder=2)

def find_closest(row, roads):
    return roads.geometry.distance(row.geometry).idxmin()

# x and y are my list of coordinates
points = gp.GeoDataFrame(geometry=gp.points_from_xy(x, y))  

points['closest_road'] = points.apply(find_closest, roads=Road_within_Outer, axis=1)

closest_roads_accident = Road_within_Outer.loc[points['closest_road']]

# Plotting the nearest road to each point
closest_roads_accident.plot(ax=ax, color='red', linewidth=0.5, label='Accident Closest Roads')

# Add legend
ax.legend()
ax.set_title("All Accidents in London 2017-2021", fontsize=60)

# Plotting graphics
plt.show()

#### 4.3 Deposit the road on which the accident occurred into the accident dataset

In [None]:
# Add other attributes of the nearest road to points
attributes = ['identifier', 'class', 'name1', 'formOfWay', 'length']
for attribute in attributes:
    points[f'closest_road_{attribute}'] = points['closest_road'].map(Road_within_Outer[attribute])

# Convert geometry column to two columns: x and y
points['location_easting_osgr'], points['location_northing_osgr'] = points.geometry.x, points.geometry.y

# Merge the original LondonAccident DataFrame with the new points DataFrame
LondonAccident = LondonAccident.merge(points, on=['location_easting_osgr', 'location_northing_osgr'])

# Save as csv
LondonAccident.to_csv('Cycling/GOV_Selected_2/03_Merged_ALL_accident_with_closest_road.csv', index=False)

## 5. Counting the number of all accidents on each road

In [None]:
Count = pd.read_csv('Cycling/GOV_Selected_2/03_Merged_ALL_accident_with_closest_road.csv')

In [None]:
Count_road_identifier = Count['closest_road_identifier'].value_counts()
print(Count_road_identifier)

In [None]:
Counts_df = pd.DataFrame(Count_road_identifier.reset_index())
Counts_df.columns = ['closest_road_identifier', 'all accident counts']
Counts_df.to_csv('Cycling/GOV_Selected_2/04_ALL Accident Count_road_identifier.csv', index=False)

## 6. Calculation of Proportion of Cycle Accidents (PCA)

#### 6.1 Combining the number of bicycle accidents and total accidents on each road segment

In [None]:
import pandas as pd

# Read two CSV files
df1 = pd.read_csv('Cycling/GOV_Selected/06_Cycle Accident_road_identifier.csv')
df2 = pd.read_csv('Cycling/GOV_Selected_2/04_ALL Accident Count_road_identifier.csv')

# Merging two DataFrames
df = pd.merge(df1, df2, on='closest_road_identifier', how='inner')

print(df)

# Save the results to a new CSV file
df.to_csv('Cycling/GOV_Selected_2/05_Cycle_All Accident_Closest Road_count.csv', index=False) 

#### 6.2 Combining the Proportion of Cycle Accidents (PCA) dataset and the bicycle accident dataset

In [None]:
# Read two CSV files
df3 = pd.read_csv('Cycling/GOV_Selected/05_Cycle Accident Location_Closest Road_clean.csv')
df4 = pd.read_csv('Cycling/GOV_Selected_2/05_Cycle_All Accident_Closest Road_count.csv')

# Merging two DataFrames
df2 = pd.merge(df3, df4, on='closest_road_identifier', how='left')

print(df2)

# Save the results to a new CSV file
df2.to_csv('Cycling/GOV_Selected_2/06_Cycle_All Accident_Accident Rate.csv', index=False) 