In [78]:
import pandas as pd

# Part 1. Criminal_Incidents And Recorded_Offences

In [79]:
# Read Raw Data
raw_incidents = pd.read_excel('raw/Data_Tables_LGA_Criminal_Incidents_Year_Ending_December_2024.xlsx', sheet_name='Table 03', header=0, engine='openpyxl')
raw_offense = pd.read_excel('raw/Data_Tables_LGA_Recorded_Offences_Year_Ending_December_2024.xlsx', sheet_name='Table 03', header=0, engine='openpyxl')

## Process Data

In [80]:
def filter_df(df, melb_only=True, person_crime_only=True):
    if df is not None:

        # Create a copy 
        filtered_df = df.copy()

        # Rename Columns
        filtered_df.columns = df.columns.str.replace(r'[^\w]', '_', regex=True).str.lower()
                
        # Filter to keep Melbourne data only
        if 'local_government_area' in filtered_df.columns and melb_only ==True:
            filtered_df = filtered_df[filtered_df['local_government_area']=='Melbourne']
        
        # Filter to keep offence_division to Crimes against the person only, Based on flag
        if 'offence_division' in filtered_df.columns and person_crime_only == True:
            filtered_df = filtered_df[filtered_df['offence_division']=='A Crimes against the person']

        if 'year_ending' in filtered_df.columns:
            filtered_df = filtered_df.drop(columns=['year_ending'])

        return filtered_df.reset_index(drop=True)

In [81]:
incidents_df = filter_df(raw_incidents)
offense_df = filter_df(raw_offense)

In [82]:
crime_yearly_df = pd.merge(
    incidents_df,
    offense_df,
    how='outer',
    on=['year', 'local_government_area', 'postcode', 'suburb_town_name', 
        'offence_division', 'offence_subdivision', 'offence_subgroup']
)
crime_yearly_df = crime_yearly_df.fillna(0)
crime_yearly_df.head()

Unnamed: 0,year,local_government_area,postcode,suburb_town_name,offence_division,offence_subdivision,offence_subgroup,incidents_recorded,offence_count
0,2024,Melbourne,3000,Carlton,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,1.0,1.0
1,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,85.0,83.0
2,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,445.0,443.0
3,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or othe...",100.0,146.0
4,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,125.0,141.0


## Data Validation

In [83]:
unq_key = ['year','postcode', 'suburb_town_name', 'offence_subgroup']

In [84]:
print("incidents_df")
print(f"row cnt: {incidents_df.shape[0]}")
print(f"unq key cnt: {incidents_df[unq_key].drop_duplicates().shape[0]}")
incidents_df.head()


incidents_df
row cnt: 1972
unq key cnt: 1972


Unnamed: 0,year,local_government_area,postcode,suburb_town_name,offence_division,offence_subdivision,offence_subgroup,incidents_recorded
0,2024,Melbourne,3000,Carlton,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,1
1,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,85
2,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,445
3,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or othe...",100
4,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,125


In [85]:
print("offense_df")
print(f"row cnt: {offense_df.shape[0]}")
print(f"unq key cnt: {offense_df[unq_key].drop_duplicates().shape[0]}")
offense_df.head()

offense_df
row cnt: 2074
unq key cnt: 2074


Unnamed: 0,year,local_government_area,postcode,suburb_town_name,offence_division,offence_subdivision,offence_subgroup,offence_count
0,2024,Melbourne,3000,Carlton,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,1
1,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,83
2,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,443
3,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or othe...",146
4,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,141


In [86]:
print("crime_yearly_df")
print(f"row cnt: {crime_yearly_df.shape[0]}")
print(f"unq key cnt: {crime_yearly_df[unq_key].drop_duplicates().shape[0]}")
crime_yearly_df.head()

crime_yearly_df
row cnt: 2080
unq key cnt: 2080


Unnamed: 0,year,local_government_area,postcode,suburb_town_name,offence_division,offence_subdivision,offence_subgroup,incidents_recorded,offence_count
0,2024,Melbourne,3000,Carlton,A Crimes against the person,A20 Assault and related offences,A232 Non-FV Common assault,1.0,1.0
1,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,A211 FV Serious assault,85.0,83.0
2,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,A212 Non-FV Serious assault,445.0,443.0
3,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,"A22 Assault police, emergency services or othe...",100.0,146.0
4,2024,Melbourne,3000,Melbourne,A Crimes against the person,A20 Assault and related offences,A231 FV Common assault,125.0,141.0


# Part 2. Victim Report

In [87]:
raw_victims = pd.read_excel('raw/Data_Tables_Victim_Reports_Visualisation_Year_Ending_December_2024.xlsx', sheet_name='Table 03', header=0, engine='openpyxl')
raw_victims.head()

Unnamed: 0,Year,Year ending,Sex,Offence Division,Offence Subdivision,Victim Reports
0,2024,December,Females,A Crimes against the person,A10 Homicide and related offences,76
1,2024,December,Females,A Crimes against the person,A20 Assault and related offences,24744
2,2024,December,Females,A Crimes against the person,A30 Sexual offences,9483
3,2024,December,Females,A Crimes against the person,A40 Abduction and related offences,328
4,2024,December,Females,A Crimes against the person,A50 Robbery,579


In [104]:
victims_df = filter_df(raw_victims).rename(columns={'victim_reports': 'victims'})
victims_df.head()

Unnamed: 0,year,sex,offence_division,offence_subdivision,victims
0,2024,Females,A Crimes against the person,A10 Homicide and related offences,76
1,2024,Females,A Crimes against the person,A20 Assault and related offences,24744
2,2024,Females,A Crimes against the person,A30 Sexual offences,9483
3,2024,Females,A Crimes against the person,A40 Abduction and related offences,328
4,2024,Females,A Crimes against the person,A50 Robbery,579


In [113]:
victims_all_gender = victims_df\
    .groupby(['year', 'offence_division', 'offence_subdivision'])\
    .agg({'victims': 'sum'})\
    .rename(columns={'victims': 'total_victims'})\
    .reset_index()

victims_df = pd.merge(victims_df, victims_all_gender, how='left', on=['year', 'offence_division', 'offence_subdivision'])

In [114]:
print("victims_all_gender")
print(f"row cnt: {victims_all_gender.shape[0]}")
print(f"unq key cnt: {victims_all_gender[['year', 'offence_division', 'offence_subdivision']].drop_duplicates().shape[0]}")

print("\nvictims_df")
print(f"unq key cnt (except gender): {victims_df[['year', 'offence_division', 'offence_subdivision']].drop_duplicates().shape[0]}")

print(f"row cnt: {victims_df.shape[0]}")
print(f"unq key cnt (incl. gender): {victims_df[['year', 'offence_division', 'offence_subdivision', 'sex']].drop_duplicates().shape[0]}")

victims_all_gender
row cnt: 80
unq key cnt: 80

victims_df
unq key cnt (except gender): 80
row cnt: 160
unq key cnt (incl. gender): 160


In [115]:
victims_df.head()

Unnamed: 0,year,sex,offence_division,offence_subdivision,victims,total_victims
0,2024,Females,A Crimes against the person,A10 Homicide and related offences,76,226
1,2024,Females,A Crimes against the person,A20 Assault and related offences,24744,49162
2,2024,Females,A Crimes against the person,A30 Sexual offences,9483,11220
3,2024,Females,A Crimes against the person,A40 Abduction and related offences,328,563
4,2024,Females,A Crimes against the person,A50 Robbery,579,3213


# Export Data

In [91]:
crime_yearly_df.to_csv('melb_crime_stat_yearly.csv')

In [None]:
victims_df.to_csv('aus_victims_by_gender_yearly.csv')