## Normalising Crime and School count in each TA
Normalize crime and school data to the population in each Territorial Authority (TA).   
Calculate the crime and school counts per 10,000 people.

In [1]:
# loading the necessary libraries
library(dplyr)
library(tidyverse)

"package 'dplyr' was built under R version 4.2.3"

Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


"package 'tidyverse' was built under R version 4.2.3"
"package 'ggplot2' was built under R version 4.2.3"
"package 'tibble' was built under R version 4.2.3"
"package 'purrr' was built under R version 4.2.3"
── [1mAttaching core tidyverse packages[22m ──────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mggplot2  [39m 3.4.3     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mpurrr    [39m 1.0.2     [32m✔[39m [34mtidyr    [39m 1.3.0
── [1mConflicts[22m ──────────────────────────────────────────────────────────────────────

In [2]:
# read four CSV files
data_crime <- read.csv("Data/Final_crime_by_TA.csv")
data_housing <- read.csv("Data/Final_housedata.csv")
data_population <- read.csv("Data/Final_Population_Data.csv")
data_school <- read.csv("Data/Final_School_Data.csv")

# exclude the first column
data_crime <- data_crime[,-1]

In [4]:
# Normalising crime data

# Step 1: Sum up total victimizations for each Territorial Authority
data_crime <- data_crime %>% 
    group_by(Territorial_Authority)  %>% 
    summarise(Total_Victimisations = sum(Total_Victimisations))

# Step 2: Join the crime data with population data based on Territorial Authority
data_crime_norming <- left_join(data_crime,data_population,by = 'Territorial_Authority')

# Step 3: Calculate victimizations per 10,000 people
data_crime_norming$Victimisations_Per_10000 <- round((data_crime_norming$Total_Victimisations / data_crime_norming$Total_Population) * 10000)

# Step 4: Remove unnecessary columns (Total_Victimisations and Total_Population)
data_crime_norming <- data_crime_norming %>%
  select(-Total_Victimisations, -Total_Population)

# Step 5: Print the resulting normalized crime data
data_crime_norming


Territorial_Authority,Victimisations_Per_10000
<chr>,<dbl>
Area Outside Territorial Authority,
Ashburton District,169
Auckland,391
Buller District,252
Carterton District,155
Central Hawke's Bay District,158
Central Otago District,103
Chatham Islands Territory,88
Christchurch City,549
Clutha District,154


In [5]:
# Normalising school data

# Step 1: Sum up total school counts for each Territorial Authority
data_school <- data_school  %>% 
    group_by(Territorial_Authority)  %>% 
    summarise(School_Count = sum(School_Count))

# Step 2: Join the school data with population data based on Territorial Authority
data_school_norming <- left_join(data_school,data_population,by = 'Territorial_Authority')

# Step 3: Calculate schools per 10,000 people
data_school_norming$School_Per_10000 <- round((data_school_norming$School_Count / data_school_norming$Total_Population) * 10000)

# Step 4: Remove unnecessary columns (School_Count and Total_Population)
data_school_norming <- data_school_norming %>%
        select(-School_Count, -Total_Population)

# Step 5: Print the resulting normalized school data
data_school_norming

Territorial_Authority,School_Per_10000
<chr>,<dbl>
Ashburton District,6
Auckland,3
Buller District,9
Carterton District,6
Central Hawke's Bay District,11
Central Otago District,6
Chatham Islands Territory,38
Christchurch City,4
Clutha District,13
Dunedin City,6


In [24]:
# join the normalized crime data, housing data, and school data 

# Step 1: Left join the normalized crime data with housing data using 'Territorial_Authority' as the common column
norming_data <- left_join(data_crime_norming,data_housing, by= 'Territorial_Authority')

# Step 2: Left join the result from step 1 with the normalized school data using 'Territorial_Authority' as the common column
final_norming_data <- left_join(norming_data, data_school_norming, by='Territorial_Authority')

# Step 3: Print the final joined dataset
final_norming_data


Territorial_Authority,Victimisations_Per_10000,Price,School_Per_10000
<chr>,<dbl>,<int>,<dbl>
Area Outside Territorial Authority,,,
Ashburton District,169,532157,6
Auckland,391,1259185,3
Buller District,252,336345,9
Carterton District,155,633301,6
Central Hawke's Bay District,158,588040,11
Central Otago District,103,788144,6
Chatham Islands Territory,88,,38
Christchurch City,549,730646,4
Clutha District,154,392835,13


In [25]:
# write to a CSV file
final_norming_data %>% 
    write.csv("Data/Final_norming_data.csv")

End of Notebook