# Load libraries

In [82]:
# load required libraries
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import LineString, Point, MultiLineString
import networkx as nx
import folium
from shapely import wkt


# 1. Boundary Dataset

In [83]:
# Read files
boundary = gpd.read_file('../raw_data/postcodes.geojson')


In [84]:
# drop unnecessary columns
boundary = boundary[['mccid_gis','geo_point_2d','geometry']].copy()

# rename columns 
boundary.rename(columns={'geometry': 'polygon_geometry'}, inplace=True)
boundary.head(2)

Unnamed: 0,mccid_gis,geo_point_2d,polygon_geometry
0,5,"{'lon': 144.98291661287664, 'lat': -37.8375942...","MULTIPOLYGON (((144.98502 -37.84568, 144.98031..."
1,13,"{'lon': 144.98255916174276, 'lat': -37.8144213...","MULTIPOLYGON (((144.97136 -37.80772, 144.97319..."


mapping is based on data from https://www.melbourne.vic.gov.au/about-melbourne/research-and-statistics/city-economy/census-land-use-employment/Pages/clue-small-area-and-block-maps.aspx


In [85]:
mccid_to_lga = {
    '1': 'Kensington',
    '2': 'Port Melbourne',
    '3': 'North Melbourne',
    '4': 'West Melbourne',
    '5': 'South Yarra',
    '6': 'Parkville',
    '7': 'Carlton',
    '8': 'Docklands',
    '9': 'Southbank',
    '10': 'Carlton',
    '11': 'Parkville',
    '12': 'Melbourne (Remainder)',
    '13': 'East Melbourne',
    '14': 'Docklands',
    '15': 'Melbourne(CBD)',
    '16': 'Parkville',
    '17': 'Kensington',
}

In [86]:
boundary['area_name'] = boundary['mccid_gis'].map(mccid_to_lga)
boundary

Unnamed: 0,mccid_gis,geo_point_2d,polygon_geometry,area_name
0,5,"{'lon': 144.98291661287664, 'lat': -37.8375942...","MULTIPOLYGON (((144.98502 -37.84568, 144.98031...",South Yarra
1,13,"{'lon': 144.98255916174276, 'lat': -37.8144213...","MULTIPOLYGON (((144.97136 -37.80772, 144.97319...",East Melbourne
2,17,"{'lon': 144.92130845946264, 'lat': -37.7921555...","MULTIPOLYGON (((144.93608 -37.80231, 144.93530...",Kensington
3,2,"{'lon': 144.9122365023871, 'lat': -37.83135640...","MULTIPOLYGON (((144.92008 -37.81908, 144.92062...",Port Melbourne
4,4,"{'lon': 144.92545908085012, 'lat': -37.8091092...","MULTIPOLYGON (((144.92008 -37.81908, 144.91703...",West Melbourne
5,6,"{'lon': 144.95587348168718, 'lat': -37.7986894...","MULTIPOLYGON (((144.95360 -37.79851, 144.95459...",Parkville
6,7,"{'lon': 144.96814562437285, 'lat': -37.8004517...","MULTIPOLYGON (((144.96463 -37.79660, 144.96523...",Carlton
7,8,"{'lon': 144.9527168910556, 'lat': -37.82272521...","MULTIPOLYGON (((144.94836 -37.82339, 144.94825...",Docklands
8,9,"{'lon': 144.96197101435533, 'lat': -37.8252848...","MULTIPOLYGON (((144.96825 -37.81923, 144.96845...",Southbank
9,10,"{'lon': 144.96355128708092, 'lat': -37.7870687...","MULTIPOLYGON (((144.96523 -37.79314, 144.96492...",Carlton


# 2. Accident Dataset

In [87]:
# lower case the column names for ease
accident = gpd.read_file('../raw_data/accidents.geojson')
accident.rename(columns= lambda x : x.lower(), inplace = True)
accident.head(2)

Unnamed: 0,accident_no,accident_date,accident_time,accident_type,day_of_week,dca_code,light_condition,police_attend,road_geometry,severity,...,heavyvehicle,passengervehicle,motorcycle,pt_vehicle,deg_urban_name,srns,rma,divided,stat_div_name,geometry
0,T20120000009,20120101,22500,Collision with a fixed object,Sunday,LEFT OFF CARRIAGEWAY INTO OBJECT/PARKED VEHICLE,Dark No street lights,Yes,Not at intersection,Other injury accident,...,0.0,1.0,0.0,0.0,RURAL_VICTORIA,C,Arterial Other,Undivided,Country,POINT (145.72671 -38.23496)
1,T20120000012,20120101,20000,Collision with vehicle,Sunday,CROSS TRAFFIC(INTERSECTIONS ONLY),Dark Street lights on,Yes,Cross intersection,Serious injury accident,...,0.0,2.0,0.0,0.0,MELB_URBAN,,Arterial Highway,Divided,Metro,POINT (145.16140 -37.90354)


In [88]:
# check in which local area each accident point lies
for index, row in accident.iterrows():
    point = row['geometry']
    found_polygon = False
    
    for _, boundary_row in boundary.iterrows():
        if point.within(boundary_row['polygon_geometry']):
            accident.loc[index, 'mccid_gis'] = boundary_row['mccid_gis']
            accident.loc[index, 'suburb'] = boundary_row['area_name']
            found_polygon = True
            break
    
    if not found_polygon:
        accident.loc[index, 'mccid_gis'] = 'NA'


In [89]:
# filter the accidents in Melbourne area and with bicycles
accident_df = accident[(accident['bicyclist'] != 0) & (accident['mccid_gis'] != 'NA')].copy()
accident_df.head(2)

Unnamed: 0,accident_no,accident_date,accident_time,accident_type,day_of_week,dca_code,light_condition,police_attend,road_geometry,severity,...,motorcycle,pt_vehicle,deg_urban_name,srns,rma,divided,stat_div_name,geometry,mccid_gis,suburb
152,T20120000460,20120105,194500,Collision with vehicle,Thursday,VEHICLE STRIKES DOOR OF PARKED/STATIONARY VEHICLE,Dusk/Dawn,No,Not at intersection,Other injury accident,...,0.0,0.0,MELB_URBAN,,Local Road,Undivided,Metro,POINT (144.96755 -37.81770),15,Melbourne(CBD)
274,T20120000819,20120112,85000,Collision with vehicle,Thursday,VEHICLE STRIKES DOOR OF PARKED/STATIONARY VEHICLE,Day,Yes,T intersection,Serious injury accident,...,0.0,0.0,MELB_URBAN,,Local Road,Divided,Metro,POINT (144.96950 -37.79737),7,Carlton


In [90]:
accident_df['accident_date'] = pd.to_datetime(accident_df['accident_date'].astype(str), format='%Y%m%d')
accident_df['accident_time'] = pd.to_datetime(accident_df['accident_time'].astype(str).str.zfill(6), format='%H%M%S').dt.time

# Combine 'accident_date' and 'accident_time' into 'date_time' column
accident_df['date_time'] = pd.to_datetime(accident_df['accident_date'].astype(str) + accident_df['accident_time'].astype(str), format='%Y-%m-%d%H:%M:%S')
# drop columns
accident_df.drop(columns=['accident_date','accident_time','dca_code','police_attend','run_offroad','node_id','vicgrid_x', 'vicgrid_y','total_persons','inj_or_fatal','deg_urban_name','srns','stat_div_name'], inplace=True)
# rename severity types into 3 classes
accident_df['severity'] = accident_df['severity'].map({'Other injury accident': 'Minor injury', 'Serious injury accident': 'Serious injury','Fatal accident': 'Fatal accident'})

In [91]:
print(f"Total accidents in Inner Melbourne: {len(accident[accident['mccid_gis'] != 'NA'])}")
print(f"Bicycle accidents in Inner Melbourne: {len(accident_df)}")


Total accidents in Inner Melbourne: 8253
Bicycle accidents in Inner Melbourne: 2279


# 3. Bicycle Dataset

In [92]:
# Load the GeoJSON file
geojson_file = '../raw_data/bikes.geojson'
bikelanes_gdf = gpd.read_file(geojson_file)


In [93]:
# Drop columns 'info', 'status', 'notes', 'direction', and 'name'
bikelanes_gdf.drop(columns=['info', 'status', 'notes', 'direction', 'name'], inplace=True)

# Add a new column 'lane_number' with values from 1 to the length of the DataFrame
bikelanes_gdf['lane_number'] = range(1, len(bikelanes_gdf) + 1)

# Reset the index
bikelanes_gdf.reset_index(drop=True, inplace=True)

# Select specific columns 'lane_number', 'geo_point_2d', 'type', and 'geometry' and order them
bikelanes_gdf = bikelanes_gdf[['lane_number', 'geo_point_2d', 'type', 'geometry']]

bikelanes_gdf.head(5)


Unnamed: 0,lane_number,geo_point_2d,type,geometry
0,1,"{'lon': 144.94940742049585, 'lat': -37.7922299...",Off-Road Bike Route,"MULTILINESTRING ((144.95178 -37.79023, 144.951..."
1,2,"{'lon': 144.9177809870309, 'lat': -37.79210545...",Off-Road Bike Route,"MULTILINESTRING ((144.91256 -37.79449, 144.912..."
2,3,"{'lon': 144.95417546909417, 'lat': -37.7811595...",Off-Road Bike Route,"MULTILINESTRING ((144.95143 -37.78076, 144.951..."
3,4,"{'lon': 144.93230774579578, 'lat': -37.8038877...",Off-Road Bike Route,"MULTILINESTRING ((144.92933 -37.80321, 144.930..."
4,5,"{'lon': 144.95799608462175, 'lat': -37.8051149...",On-Road Bike Lane,"MULTILINESTRING ((144.95842 -37.80604, 144.958..."


In [94]:
# for bike lanes
def find_nearest_lane(accident,road_dataframe):
    buffer_distance = 0.005
    buffer_zone = accident['geometry'].buffer(buffer_distance)
    near_bikelanes = road_dataframe[road_dataframe.geometry.intersects(buffer_zone)].reset_index()
    try:
        if near_bikelanes.empty:
            return None 
        else:
            nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]
            return nearest_bikelane['lane_number'].iloc[0]
    except ValueError:
        print(nearest_bikelane['lane_number'])
        


# Perform a spatial join to associate each accident event with the corresponding bike lane
accident_df['bike_lane_id'] = accident_df.apply(lambda row: find_nearest_lane(row,bikelanes_gdf), axis=1)
accident_df.dropna(inplace=True)

# Count the number of accidents associated with each bike lane
accident_counts = accident_df['bike_lane_id'].value_counts()
accident_counts_df = accident_counts.reset_index()
accident_counts_df.columns = ['bike_lane_id', 'count']

# Add a new column to the bike lanes dataset with the accident category
bikelanes_joined = bikelanes_gdf.merge(accident_counts_df, left_on='lane_number', right_on='bike_lane_id', how='left')
bikelanes_joined['count'] = bikelanes_joined['count'].fillna(0)
bikelanes_joined.drop(columns=['bike_lane_id'], inplace=True)


  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_b

In [95]:
# Extract the cycle lanes
cycle_lanes = []

for geom in bikelanes_joined.geometry:
    if isinstance(geom, LineString):
        cycle_lanes.append(geom.coords)
    elif isinstance(geom, MultiLineString):
        for line in geom.geoms:
            cycle_lanes.append(line.coords)

# Create a graph representation of the cycle lanes
G = nx.Graph()
for line_coords in cycle_lanes:
    for i in range(len(line_coords) - 1):
        G.add_edge(line_coords[i], line_coords[i + 1])


In [96]:
m = folium.Map(location=[-37.790231531464,144.951780451295, ], zoom_start=12,tiles="cartodb positron")

# BICYCLE ACCIDENTS
for index, row in accident_df.iterrows():
    folium.CircleMarker([row['latitude'], row['longitude']], radius=1, weight=1, opacity=1).add_to(m)


# BOUNDARIES
colors = [
    'blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink', 'gray',
    'olive', 'lightblue', 'darkgreen', 'darkorange', 'lavender', 'magenta',
    'limegreen', 'lightblue2', 'lightpink']

for index,row in boundary.iterrows():
    folium.Choropleth(
        geo_data=row['polygon_geometry'],

        fill_color=colors[index],
        fill_opacity=0.2,
        line_opacity=1
        
    ).add_to(m)
  


# BICYCLE LANES
i = 0
# Add the cycle lanes to the map
for line_coords in cycle_lanes:
    
    path_coords = [list(coord) for coord in line_coords]
    path_coords = [[j,i] for i,j in path_coords]
    colors = ['yellow','yellow']
    if i%2 == 0:
        color = colors[0]
    else:
        color=colors[1]
    folium.PolyLine(locations=path_coords, color=color,opacity=0.2 ,tooltip = f'{path_coords[0],path_coords[-1]}').add_to(m)
    i+=1    

m

# 4. Crime dataset

In [97]:
crime = pd.read_excel('../raw_data/crimes.xlsx', sheet_name='Table 03')


In [98]:
# rename columns and drop unneccessary columns
crime.rename(columns={'Year':'year','Local Government Area':'lga','Suburb/Town Name':'suburb', 'Offence Division':'offence_division','Offence Subdivision':'offence_subdivision','Offence Subgroup':'offence_subgroup','Offence Count':'count'}, inplace=True)
crime.drop(columns=['Year ending','Postcode'], inplace=True)


In [99]:
# filter crime cases in Melbourne city only
crime_df = crime[crime['lga']=='Melbourne'].copy()

# create new column record_number as primary key
crime_df.loc[:,'record_number'] = range(1, len(crime_df) + 1)

# reorder columns 
crime_df = crime_df[['record_number','year','lga','suburb','offence_division','offence_subdivision','offence_subgroup','count']]
crime_df = crime_df.reset_index(drop=True)
crime_df.head()

Unnamed: 0,record_number,year,lga,suburb,offence_division,offence_subdivision,offence_subgroup,count
0,1,2023,Melbourne,Carlton,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,1
1,2,2023,Melbourne,Carlton,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,1
2,3,2023,Melbourne,Carlton,A Crimes against the person,A50 Robbery,A51 Aggravated robbery,1
3,4,2023,Melbourne,Carlton,A Crimes against the person,"A70 Stalking, harassment and threatening behav...",A722 Non-FV Harassment and private nuisance,1
4,5,2023,Melbourne,Carlton,A Crimes against the person,"A70 Stalking, harassment and threatening behav...",A732 Non-FV Threatening behaviour,1


In [100]:
# remove criminal codes for 
crime_df.loc[:, 'offence_subgroup'] = crime_df['offence_subgroup'].apply(lambda x: ' '.join(x.split()[1:]))
crime_df.loc[:, 'offence_subdivision'] = crime_df['offence_subdivision'].apply(lambda x: ' '.join(x.split()[1:]))


In [101]:
crime_df['offence_subdivision'].unique()

array(['Assault and related offences', 'Robbery',
       'Stalking, harassment and threatening behaviour',
       'Property damage', 'Burglary/Break and enter', 'Theft',
       'Deception', 'Disorderly and offensive conduct',
       'Public nuisance offences', 'Justice procedures',
       'Dangerous and negligent acts endangering people',
       'crimes against the person', 'Arson',
       'Drug dealing and trafficking', 'Cultivate or manufacture drugs',
       'Drug use and possession', 'Other drug offences',
       'Weapons and explosives offences', 'Breaches of orders',
       'Transport regulation offences',
       'Other government regulatory offences', 'Miscellaneous offences',
       'Public security offences', 'Regulatory driving offences',
       'Bribery'], dtype=object)

In [102]:
# filter out offence group relevant to bikers and commuters
common_biker_crimes = ['Assault and related offences','Stalking, harassment and threatening behaviour','Theft','Drug dealing and trafficking']
crime_df = crime_df[crime_df['offence_subdivision'].isin(common_biker_crimes)]

In [103]:
crime_df['count'] = crime_df['count'].astype(int)
crime_df

Unnamed: 0,record_number,year,lga,suburb,offence_division,offence_subdivision,offence_subgroup,count
0,1,2023,Melbourne,Carlton,A Crimes against the person,Assault and related offences,Non-FV Serious assault,1
1,2,2023,Melbourne,Carlton,A Crimes against the person,Assault and related offences,Non-FV Common assault,1
3,4,2023,Melbourne,Carlton,A Crimes against the person,"Stalking, harassment and threatening behaviour",Non-FV Harassment and private nuisance,1
4,5,2023,Melbourne,Carlton,A Crimes against the person,"Stalking, harassment and threatening behaviour",Non-FV Threatening behaviour,1
8,9,2023,Melbourne,Carlton,B Property and deception offences,Theft,Steal from a motor vehicle,4
...,...,...,...,...,...,...,...,...
7650,7651,2014,Melbourne,Port Melbourne,B Property and deception offences,Theft,Steal from a motor vehicle,20
7651,7652,2014,Melbourne,Port Melbourne,B Property and deception offences,Theft,Theft of a bicycle,3
7652,7653,2014,Melbourne,Port Melbourne,B Property and deception offences,Theft,Receiving or handling stolen goods,4
7653,7654,2014,Melbourne,Port Melbourne,B Property and deception offences,Theft,Other theft,41


In [104]:
crime_df['offence_subgroup'].unique()

array(['Non-FV Serious assault', 'Non-FV Common assault',
       'Non-FV Harassment and private nuisance',
       'Non-FV Threatening behaviour', 'Steal from a motor vehicle',
       'Steal from a retail store', 'Theft of a bicycle', 'Other theft',
       'FV Serious assault',
       'Assault police, emergency services or other authorised officer',
       'FV Common assault', 'FV Stalking', 'Non-FV Stalking',
       'FV Harassment and private nuisance', 'FV Threatening behaviour',
       'Motor vehicle theft', 'Receiving or handling stolen goods',
       'Drug dealing', 'Drug trafficking', 'Fare evasion'], dtype=object)

In [105]:
# Get the count of each unique value in the 'offence_subgroup' column
counts = crime_df['offence_subgroup'].value_counts()

# Display the counts
print(counts)

offence_subgroup
Other theft                                                       168
Steal from a motor vehicle                                        164
Motor vehicle theft                                               160
Theft of a bicycle                                                159
Non-FV Common assault                                             158
Non-FV Serious assault                                            153
Receiving or handling stolen goods                                149
Steal from a retail store                                         148
FV Common assault                                                 139
FV Serious assault                                                133
Assault police, emergency services or other authorised officer    131
Non-FV Threatening behaviour                                      131
Drug trafficking                                                  127
FV Threatening behaviour                                          116
Non

In [106]:
# List of crime subgroups to remove
crimes_to_remove = [
    'Steal from a motor vehicle', 'Steal from a retail store', 'FV Serious assault',
    'Assault police, emergency services or other authorised officer', 'FV Common assault',
    'FV Stalking', 'Non-FV Stalking', 'FV Harassment and private nuisance',
    'FV Threatening behaviour', 'Motor vehicle theft', 'Receiving or handling stolen goods',
    'Drug dealing', 'Drug trafficking', 'Fare evasion'
]

# Filter the DataFrame to remove the specified rows
crime_df = crime_df[~crime_df['offence_subgroup'].isin(crimes_to_remove)]


In [107]:
# Replace 'Other theft' with 'General Theft' in the 'offence_subgroup' column
crime_df['offence_subgroup'] = crime_df['offence_subgroup'].replace('Other theft', 'General Theft')

# Remove 'Non-FV ' prefix from the 'offence_subgroup' column
crime_df['offence_subgroup'] = crime_df['offence_subgroup'].str.replace('Non-FV ', '')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crime_df['offence_subgroup'] = crime_df['offence_subgroup'].replace('Other theft', 'General Theft')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  crime_df['offence_subgroup'] = crime_df['offence_subgroup'].str.replace('Non-FV ', '')


In [108]:
crime_df['offence_subgroup'].unique()

array(['Serious assault', 'Common assault',
       'Harassment and private nuisance', 'Threatening behaviour',
       'Theft of a bicycle', 'General Theft'], dtype=object)

In [109]:
# Get the count of each unique value in the 'offence_subgroup' column
counts = crime_df['offence_subgroup'].value_counts()

# Display the counts
print(counts)

offence_subgroup
General Theft                      168
Theft of a bicycle                 159
Common assault                     158
Serious assault                    153
Threatening behaviour              131
Harassment and private nuisance     94
Name: count, dtype: int64


# 5. Bike rails dataset

In [110]:
bikerails = gpd.read_file('../raw_data/bikerails.geojson')


In [111]:
bikerails.drop(['model_no', 'division', 'location_desc', 'condition_rating', 'evaluation_date', 'easting', 'northing', 'uploaddate'], axis=1, inplace=True)

bikerails = bikerails[bikerails['asset_type'] == 'Bicycle Rails']

# Display the DataFrame to confirm only the desired rows are left
bikerails.head()


Unnamed: 0,gis_id,description,asset_class,asset_type,model_descr,company,geometry
14,1769545,Bicycle Rails - Stainless Steel Bicycle Hoop,Outdoor Furniture,Bicycle Rails,Bicycle Rails - Stainless Steel Bicycle Hoop,City of Melbourne,POINT (144.96689 -37.79601)
15,1769548,Bicycle Rails - Stainless Steel Bicycle Hoop,Outdoor Furniture,Bicycle Rails,Bicycle Rails - Stainless Steel Bicycle Hoop,City of Melbourne,POINT (144.96717 -37.79636)
16,1769843,Bicycle Rails - Stainless Steel Bicycle Hoop -...,Outdoor Furniture,Bicycle Rails,Bicycle Rails - Stainless Steel Bicycle Hoop,City of Melbourne,POINT (144.96067 -37.80182)
17,1769844,Bicycle Rails - Stainless Steel Bicycle Hoop -...,Outdoor Furniture,Bicycle Rails,Bicycle Rails - Stainless Steel Bicycle Hoop,City of Melbourne,POINT (144.96063 -37.80203)
18,1769848,Bicycle Rails - Stainless Steel Bicycle Hoop -...,Outdoor Furniture,Bicycle Rails,Bicycle Rails - Stainless Steel Bicycle Hoop,City of Melbourne,POINT (144.96064 -37.80196)


# 6. All roads Dataset

In [112]:
all_roads_gdf = gpd.read_file('../raw_data/all_roads.geojson')

all_roads_gdf = all_roads_gdf[~all_roads_gdf.roadclass.isin(['91','92'])]

# Drop columns 'objectid', 'status', 'notes', 'direction', and 'name'
all_roads_gdf.drop(columns=['objectid', 'roadclass', 'width', 'oneway', 'management','direction','shape_length','description'], inplace=True)

# Add a new column 'lane_number' with values from 1 to the length of the DataFrame
all_roads_gdf['lane_number'] = range(1, len(all_roads_gdf) + 1)

# Reset the index
all_roads_gdf.reset_index(drop=True, inplace=True)

# Select specific columns 'lane_number', 'geo_point_2d', 'type', and 'geometry' and order them
all_roads_gdf = all_roads_gdf[['lane_number', 'geo_point_2d', 'geometry']]

all_roads_gdf.head(5)


Unnamed: 0,lane_number,geo_point_2d,geometry
0,1,"{'lon': 144.92809397315, 'lat': -37.7897418881}","LINESTRING (144.92792 -37.78972, 144.92827 -37..."
1,2,"{'lon': 144.92837896689997, 'lat': -37.7897765...","LINESTRING (144.92827 -37.78976, 144.92849 -37..."
2,3,"{'lon': 144.92785881895, 'lat': -37.79214913845}","LINESTRING (144.92786 -37.79215, 144.92786 -37..."
3,4,"{'lon': 144.9286197784, 'lat': -37.79063023765}","LINESTRING (144.92862 -37.79061, 144.92862 -37..."
4,5,"{'lon': 144.93007961235, 'lat': -37.79070105385}","LINESTRING (144.93006 -37.79063, 144.93010 -37..."


In [113]:
# for all roads
accidents_gdf2 = accident_df.copy()
# Perform a spatial join to associate each accident event with the corresponding bike lane
accidents_gdf2['bike_lane_id'] = accidents_gdf2.apply(lambda row: find_nearest_lane(row,all_roads_gdf), axis=1)
accidents_gdf2.dropna(inplace=True)

# Count the number of accidents associated with each bike lane
accident_counts2 = accidents_gdf2['bike_lane_id'].value_counts()
accident_counts_df2 = accident_counts2.reset_index()
accident_counts_df2.columns = ['bike_lane_id', 'count']

# Add a new column to the bike lanes dataset with the accident category
all_roads_joined = all_roads_gdf.merge(accident_counts_df2, left_on='lane_number', right_on='bike_lane_id', how='left')
all_roads_joined['count'] = all_roads_joined['count'].fillna(0)
all_roads_joined.drop(columns=['bike_lane_id'], inplace=True)


  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_bikelanes.loc[[near_bikelanes.geometry.distance(accident['geometry']).idxmin()]]

  nearest_bikelane = near_b

In [114]:
len(all_roads_joined[all_roads_joined['count'] != 0])/len(all_roads_joined)*100

2.399664275904166

# Write out clean dataframes as csv files

In [115]:
# write out clean dataset as csv files in data folder 
boundary.to_csv('../data/boundary.csv', index= False)
accident_df.to_csv('../data/accident.csv',index= False)
bikelanes_joined.to_csv('../data/bikelanes.csv',index= False)
crime_df.to_csv('../data/crimes.csv',index= False)
bikerails.to_csv('../data/bikerails.csv', index=False)
all_roads_joined.to_csv('../data/all_roads.csv', index= False)