# Crime data With SAL

This notebook will focuse in preliminary visualization of crime data in each SAL region

Before run this, you need to run 1.17 Preprocessing_suburb_shapefile.ipynb and 1.14 Preprocessing_crime.ipynb

In [1]:
import pandas as pd
import geopandas as gpd
from fuzzywuzzy import fuzz
import folium
import numpy as np



# Import data 

In [2]:
# Read crime count data 
crime_count_gpd = gpd.read_file('../data/raw/crime_count_with_point_geo.geojson')  # Specify the driver based on the desired output format
# Read SA2 gdf
SAL_gdf = gpd.read_file("../data/raw/victoria_region_gdf/SAL_region_gdf.geojson")

In [3]:
crime_count_gpd

Unnamed: 0,Suburb/Town Name,Postcode,Year,Incidents Recorded,geometry
0,abbeyard,3737,2015,4,POINT (146.76407 -37.02132)
1,abbeyard,3737,2017,1,POINT (146.76407 -37.02132)
2,abbeyard,3737,2019,3,POINT (146.76407 -37.02132)
3,abbeyard,3737,2021,1,POINT (146.76407 -37.02132)
4,abbotsford,3067,2014,793,POINT (144.99836 -37.80236)
...,...,...,...,...,...
23023,zeerust,3634,2022,2,POINT (145.39733 -36.27020)
23024,zeerust,3634,2023,8,POINT (145.39733 -36.27020)
23025,zumsteins,3401,2018,2,POINT (142.38424 -37.11879)
23026,zumsteins,3401,2019,2,POINT (142.38424 -37.11879)


In [4]:
SAL_gdf

Unnamed: 0,SAL_NAME21,SAL_CODE21,SHAPE_Area,geometry
0,Abbeyard,20001,0.033162,"POLYGON ((146.89824 -37.04602, 146.89947 -37.0..."
1,Abbotsford,20002,0.000178,"POLYGON ((145.00195 -37.79665, 145.00190 -37.7..."
2,Aberfeldie,20003,0.000159,"POLYGON ((144.89576 -37.76514, 144.89547 -37.7..."
3,Aberfeldy,20004,0.001107,"POLYGON ((146.38814 -37.72232, 146.38808 -37.7..."
4,Acheron,20005,0.007381,"POLYGON ((145.76731 -37.25433, 145.76757 -37.2..."
...,...,...,...,...
2939,Yundool,22940,0.003174,"POLYGON ((145.86040 -36.28432, 145.86038 -36.2..."
2940,Yuroke,22941,0.000906,"POLYGON ((144.85250 -37.55800, 144.85303 -37.5..."
2941,Yuulong,22942,0.005404,"POLYGON ((143.32185 -38.68969, 143.32203 -38.6..."
2942,Zeerust,22943,0.001808,"POLYGON ((145.40454 -36.25294, 145.40479 -36.2..."


## Now join with victoria SAL file by their geometry

In [5]:
# Use sjoin to find out which suburb each point belongs to
crime_count_gpd = gpd.sjoin(crime_count_gpd, SAL_gdf, how="left", op="within")

  if (await self.run_code(code, result,  async_=asy)):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: EPSG:7844

  crime_count_gpd = gpd.sjoin(crime_count_gpd, SAL_gdf, how="left", op="within")


In [6]:
crime_count_gpd

Unnamed: 0,Suburb/Town Name,Postcode,Year,Incidents Recorded,geometry,index_right,SAL_NAME21,SAL_CODE21,SHAPE_Area
0,abbeyard,3737,2015,4,POINT (146.76407 -37.02132),0.0,Abbeyard,20001,0.033162
1,abbeyard,3737,2017,1,POINT (146.76407 -37.02132),0.0,Abbeyard,20001,0.033162
2,abbeyard,3737,2019,3,POINT (146.76407 -37.02132),0.0,Abbeyard,20001,0.033162
3,abbeyard,3737,2021,1,POINT (146.76407 -37.02132),0.0,Abbeyard,20001,0.033162
4,abbotsford,3067,2014,793,POINT (144.99836 -37.80236),1.0,Abbotsford,20002,0.000178
...,...,...,...,...,...,...,...,...,...
23023,zeerust,3634,2022,2,POINT (145.39733 -36.27020),2942.0,Zeerust,22943,0.001808
23024,zeerust,3634,2023,8,POINT (145.39733 -36.27020),2942.0,Zeerust,22943,0.001808
23025,zumsteins,3401,2018,2,POINT (142.38424 -37.11879),2943.0,Zumsteins,22944,0.026011
23026,zumsteins,3401,2019,2,POINT (142.38424 -37.11879),2943.0,Zumsteins,22944,0.026011


#### Retain key features

In [7]:
crime_count_gpd = crime_count_gpd[['SAL_NAME21', 'SAL_CODE21', 'Year', 'Incidents Recorded']]

### Rename the incident recorded to number of crime incident
crime_count_gpd = crime_count_gpd.rename(columns={'Incidents Recorded': 'Crime_incidents_count'})

crime_count_gpd

Unnamed: 0,SAL_NAME21,SAL_CODE21,Year,Crime_incidents_count
0,Abbeyard,20001,2015,4
1,Abbeyard,20001,2017,1
2,Abbeyard,20001,2019,3
3,Abbeyard,20001,2021,1
4,Abbotsford,20002,2014,793
...,...,...,...,...
23023,Zeerust,22943,2022,2
23024,Zeerust,22943,2023,8
23025,Zumsteins,22944,2018,2
23026,Zumsteins,22944,2019,2


In [8]:
## Check if one location/suburb have been mapped to same SAL region 

In [9]:
## Check if one location/suburb have been mapped to same SAL region 
temp = crime_count_gpd[['SAL_NAME21', 'SAL_CODE21', 'Year']]

duplicates = temp[temp.duplicated()]
duplicates

Unnamed: 0,SAL_NAME21,SAL_CODE21,Year
967,Balnarring,20120,2014
968,Balnarring,20120,2015
969,Balnarring,20120,2016
970,Balnarring,20120,2017
971,Balnarring,20120,2018
...,...,...,...
22914,Alvie,20038,2017
22915,Alvie,20038,2018
22916,Alvie,20038,2019
22917,Alvie,20038,2020


We aware this duplication could due to the geographical scale and region for SAL, which could result single SAL region contain one or more suburb, hence we will group by SAL_NAME21	SAL_CODE21	Year and sum up the Crime_incidents_count

In [10]:

# Group by 'SAL_NAME21', 'SAL_CODE21', and 'Year' and sum 'Crime_incidents_count'
crime_count_gpd = crime_count_gpd.groupby(['SAL_NAME21', 'SAL_CODE21', 'Year'])['Crime_incidents_count'].sum().reset_index()

# Rename the 'Crime_incidents_count' column to 'total_crime_incidents'
crime_count_gpd = crime_count_gpd.rename(columns={'Crime_incidents_count': 'total_crime_incidents'})


In [11]:
crime_count_gpd

Unnamed: 0,SAL_NAME21,SAL_CODE21,Year,total_crime_incidents
0,Abbeyard,20001,2015,4
1,Abbeyard,20001,2017,1
2,Abbeyard,20001,2019,3
3,Abbeyard,20001,2021,1
4,Abbotsford,20002,2014,793
...,...,...,...,...
22729,Zeerust,22943,2022,2
22730,Zeerust,22943,2023,8
22731,Zumsteins,22944,2018,2
22732,Zumsteins,22944,2019,2


#### Check number of SAL we cover 

In [12]:
yearly_count = crime_count_gpd.groupby('Year').size().reset_index(name='RecordCount')
yearly_count

Unnamed: 0,Year,RecordCount
0,2014,2202
1,2015,2192
2,2016,2259
3,2017,2259
4,2018,2278
5,2019,2302
6,2020,2330
7,2021,2343
8,2022,2272
9,2023,2297


good !

In [13]:
# Save to Cureated 
crime_count_gpd.to_csv("../data/curated/crime_count_SAL.csv", index=False)


# Visualisation of Crime data in 2023 with SAL region


In [14]:
# Read data 
crime_count_gpd = pd.read_csv("../data/curated/crime_count_SAL.csv")

In [15]:
# Select the releveant year 2023
crime_2023 = crime_count_gpd[crime_count_gpd['Year']==2023]

In [16]:
#reading  the geo data 
file_path = "../data/raw/victoria_region_gdf/SAL_region_gdf.geojson"
# Read the GeoPandas DataFrame from the specified file
SAL_gdf = gpd.read_file(file_path)

In [17]:
SAL_gdf

Unnamed: 0,SAL_NAME21,SAL_CODE21,SHAPE_Area,geometry
0,Abbeyard,20001,0.033162,"POLYGON ((146.89824 -37.04602, 146.89947 -37.0..."
1,Abbotsford,20002,0.000178,"POLYGON ((145.00195 -37.79665, 145.00190 -37.7..."
2,Aberfeldie,20003,0.000159,"POLYGON ((144.89576 -37.76514, 144.89547 -37.7..."
3,Aberfeldy,20004,0.001107,"POLYGON ((146.38814 -37.72232, 146.38808 -37.7..."
4,Acheron,20005,0.007381,"POLYGON ((145.76731 -37.25433, 145.76757 -37.2..."
...,...,...,...,...
2939,Yundool,22940,0.003174,"POLYGON ((145.86040 -36.28432, 145.86038 -36.2..."
2940,Yuroke,22941,0.000906,"POLYGON ((144.85250 -37.55800, 144.85303 -37.5..."
2941,Yuulong,22942,0.005404,"POLYGON ((143.32185 -38.68969, 143.32203 -38.6..."
2942,Zeerust,22943,0.001808,"POLYGON ((145.40454 -36.25294, 145.40479 -36.2..."


In [18]:
crime_2023

Unnamed: 0,SAL_NAME21,SAL_CODE21,Year,total_crime_incidents
13,Abbotsford,20002,2023,1097
23,Aberfeldie,20003,2023,79
35,Acheron,20005,2023,1
47,Adams Estate,20007,2023,10
51,Addington,20008,2023,1
...,...,...,...,...
22692,Youarang,22938,2023,1
22701,Yulecart,22939,2023,5
22716,Yuroke,22941,2023,9
22720,Yuulong,22942,2023,2


In [19]:
crime_2023 =crime_2023.drop(columns=['Year'])
crime_2023

Unnamed: 0,SAL_NAME21,SAL_CODE21,total_crime_incidents
13,Abbotsford,20002,1097
23,Aberfeldie,20003,79
35,Acheron,20005,1
47,Adams Estate,20007,10
51,Addington,20008,1
...,...,...,...
22692,Youarang,22938,1
22701,Yulecart,22939,5
22716,Yuroke,22941,9
22720,Yuulong,22942,2


In [20]:
# create a JSON 
geoJSON = SAL_gdf[['SAL_CODE21', 'geometry']].to_json()

# print the first 300 chars of the json
print(geoJSON[:300])

{"type": "FeatureCollection", "features": [{"id": "0", "type": "Feature", "properties": {"SAL_CODE21": "20001"}, "geometry": {"type": "Polygon", "coordinates": [[[146.89823840500003, -37.046023841999954], [146.89946546400006, -37.048662812999964], [146.8993791040001, -37.048770832999935], [146.89915


# Draw geo visualisation

In [21]:
m = folium.Map(location=[-36.9848, 143.3906], tiles="Stamen Terrain", zoom_start=7)

# refer to the folium documentations on more information on how to plot aggregated data.
c = folium.Choropleth(
    geo_data=geoJSON, # geoJSON 
    name='choropleth', # name of plot
    data=crime_2023.reset_index(), # data source
    columns=['SAL_CODE21','total_crime_incidents'], # the columns required
    key_on='properties.SAL_CODE21', # this is from the geoJSON's properties
    fill_color='YlOrRd', # color scheme
    nan_fill_color='black',
    legend_name='crime cases'
)

c.add_to(m)

m.save('../plots/preliminary_crime_incidents_SAL.html')



In [22]:
# file size too large, cant show m

# apply log scale to total_crime_incidents and repeat the visualisation

In [23]:
crime_2023['log_total_crime_incidents'] = np.log(crime_2023['total_crime_incidents'])


In [24]:
crime_2023

Unnamed: 0,SAL_NAME21,SAL_CODE21,total_crime_incidents,log_total_crime_incidents
13,Abbotsford,20002,1097,7.000334
23,Aberfeldie,20003,79,4.369448
35,Acheron,20005,1,0.000000
47,Adams Estate,20007,10,2.302585
51,Addington,20008,1,0.000000
...,...,...,...,...
22692,Youarang,22938,1,0.000000
22701,Yulecart,22939,5,1.609438
22716,Yuroke,22941,9,2.197225
22720,Yuulong,22942,2,0.693147


## Re draw with loged crime incident

In [25]:
k = folium.Map(location=[-36.9848, 143.3906], tiles="Stamen Terrain", zoom_start=7)

# refer to the folium documentations on more information on how to plot aggregated data.
c = folium.Choropleth(
    geo_data=geoJSON, # geoJSON 
    name='choropleth', # name of plot
    data=crime_2023.reset_index(), # data source
    columns=['SAL_CODE21','log_total_crime_incidents'], # the columns required
    key_on='properties.SAL_CODE21', # this is from the geoJSON's properties
    fill_color='YlOrRd', # color scheme
   # nan_fill_color='black',
    legend_name='crime cases'
)

c.add_to(k)

k.save('../plots/log_crime_incidents_SAL.html')

# file size too big to show map
#k


In [26]:
# save the data table to curated 
crime_2023.to_csv('../data/curated/preliminary_2023crime_count_under_SAL.csv', index=False)  # Specify the driver based on the desired output format