In [1]:
# Import the required libraries and dependencies
import numpy as np
import pandas as pd
import shapefile as shp
import matplotlib.pyplot as plt
import seaborn as sns
from shapely.geometry import Polygon
from shapely.geometry import Point

import os

%matplotlib inline
import geopandas as gpd

import warnings
warnings.filterwarnings("ignore")


ModuleNotFoundError: No module named 'shapefile'

In [None]:
# Read polygon shape file to catpture vic Local Government Areas information
shp_path = "../Data/NOV21_VIC_LGA_POLYGON_shp/vic_lga.shp"
govpolmap = gpd.read_file(shp_path)
govpolmap.head(10)

In [None]:
# Get Criminal Incidents detail from the excel file
path = "../Data/"
xls_lga_ci = pd.ExcelFile(path+'LGA_Criminal_Incidents_Year_Ending_September_2021.xlsx')

# Use data in the worksheet "Table01" of the Criminal Incidents file
lga_ci_tab1 = pd.read_excel(xls_lga_ci, "Table 01")

# Create a dataframe to store data from the Criminal Incidents excel file
lga_ci = lga_ci_tab1[lga_ci_tab1['Local Government Area'] != 'Total']

In [None]:
# Read recorded offences file
xls_lga_ro = pd.ExcelFile(path+'LGA_Recorded_Offences_Year_Ending_September_2021.xlsx')

# Use data in the worksheet "Table01" of the Criminal Incidents file
lga_ro_tab1 = pd.read_excel(xls_lga_ro, "Table 01")

# Create a dataframe to store data from the recorded offences excel file
lga_ro = lga_ro_tab1[lga_ro_tab1['Local Government Area'] != 'Total']

In [None]:
# Read alleged offenders file
xls_lga_ao = pd.ExcelFile(path+'LGA_Alleged_Offenders_Year_Ending_September_2021.xlsx')

lga_ao_tab1 = pd.read_excel(xls_lga_ao, "Table 01")

# Create a dataframe to store data from the alleged offenders excel file
lga_ao = lga_ao_tab1[lga_ao_tab1['Local Government Area'] != 'Total']

In [None]:
# Read victims report file
xls_lga_vr = pd.ExcelFile(path+'LGA_Victim_Reports_Year_Ending_September_2021.xlsx')

lga_vr_tab1 = pd.read_excel(xls_lga_vr, "Table 01")

# Create a dataframe to store data from the victim reports excel file
lga_vr = lga_vr_tab1[lga_vr_tab1['Local Government Area'] != 'Total']

In [None]:
# Read family incidents file
xls_lga_fi = pd.ExcelFile(path+'LGA_Family_Incidents_Year_Ending_September_2021.xlsx')

lga_fi_tab1 = pd.read_excel(xls_lga_fi, "Table 01")

# Create a dataframe to store data from the family incidents excel file
lga_fi = lga_fi_tab1[lga_fi_tab1['Local Government Area'] != 'Total']

In [None]:
# Initialise variable
incidents_by_lga = []
lga_df=[]

# Copy the lga_ci dataframe with selected columns
lga_df  = lga_ci[["Year","Police Region","Local Government Area","Incidents Recorded"]]
lga_df = lga_df.rename(columns={"Incidents Recorded": "Average Criminal Incidents"})
lga_df

In [None]:
# Add more columns with recorded offence, alleged incidents and victim reports data
lga_df["Average Recorded Offence Count"]=lga_ro["Offence Count"]
lga_df["Average Alleged Incidents"] = lga_ao["Alleged Offender Incidents"]
lga_df["Average Victim Reports"] = lga_vr["Victim Reports"]
lga_df

In [None]:
# Calculate the average population and average rate per 100K population in each LGA
lga_df["Average Population"] = lga_ci['Incidents Recorded']/lga_ci['Rate per 100,000 population']*100000 + \
                            lga_ro['Offence Count']/lga_ro['Rate per 100,000 population']*100000 + \
                            lga_ao['Alleged Offender Incidents']/lga_ao['Rate per 100,000 population']*100000 + \
                            lga_vr['Victim Reports']/lga_vr['Rate per 100,000 population']*100000

lga_df["Average Rate per 100K Population"] = lga_ci['Rate per 100,000 population'] + \
                                     lga_ro['Rate per 100,000 population'] + \
                                     lga_ao['Rate per 100,000 population'] + \
                                     lga_vr['Rate per 100,000 population']

lga_df

In [None]:
# Create new data frame to hold police region information, formatting police region's name and reset index
region = []
region = lga_df[["Local Government Area","Police Region"]]
region = region.drop_duplicates(subset="Local Government Area").reset_index(drop=True)

# Data cleaning process -
# Drop non-police region rows
region = region.drop([79,80])

# Sort dataframe by column Local Goverment Area 
region = region.sort_values("Local Government Area").reset_index(drop=True)

# Data cleaning process -
# Slice the name of Police Region to remove the prefix number
region["Police Region"] = region["Police Region"].str.slice(start=(2))

region

In [None]:
# Initialise & create a new dataframe that groupby "Local Government Area" to hold the average values of the crime data
lga_df_grpby_lga = []
lga_df_grpby_lga = lga_df.groupby(["Local Government Area"]).mean()

# Data cleaning process -
# Removed all the NaN rows, ie. row index 34 & 69, reset index and drop the unused "Year" column
lga_df_grpby_lga = lga_df_grpby_lga.dropna().reset_index(drop=True)
lga_df_grpby_lga = lga_df_grpby_lga.drop("Year", axis=1)
lga_df_grpby_lga

In [None]:
# Initialise variable for shapefile data frame
new_polymap = []

# Data cleaning process - 
# Select only the required columns from the government shape file and drop rows that contain no LGA details by its index
new_polymap = govpolmap[["ABB_NAME", "geometry"]]
new_polymap = new_polymap.drop([21,23,24,25,26,43,62,63,64,65])
new_polymap.head(10) 

In [None]:
# Data cleaning process - 
# Drop duplicated rows if found in the shape file under column ABB_NAME as only one LGA name is required for the plot
unique_new_polymap = new_polymap.drop_duplicates(subset='ABB_NAME').reset_index(drop=True)

unique_new_polymap 

In [None]:
# Create all_crime_df with geometry information
all_crime_df = []
all_crime_df = pd.concat([unique_new_polymap,lga_df_grpby_lga],axis=1,join="outer")

In [None]:
# Rename column ABB_Name to "LGA" and round all float numbers to 2 decimal points
all_crime_df = all_crime_df.rename(columns={"ABB_NAME": "LGA"})
all_crime_df = all_crime_df.round(decimals=2)

In [None]:
# Join police region to the final all_crime_df_byregion for plotting graph
all_crime_df_byregion = pd.concat([all_crime_df,region],axis=1,join="outer")
all_crime_df_byregion

In [None]:
# PLotting map contains crime details by LGA on average of 10 years, from 2012 to 2021
all_crime_df_byregion.explore("Average Rate per 100K Population")

In [None]:
# Plot crime map based on police region
all_crime_df_byregion.explore("Police Region")