In [None]:
# Split data file into years
df_2018 = final_df[final_df["Year"]==2018]
df_2019 = final_df[final_df["Year"]==2019]
df_2020 = final_df[final_df["Year"]==2020]
df_2021 = final_df[final_df["Year"]==2021]
df_2022 = final_df[final_df["Year"]==2022]

In [None]:
# Create a data frame of population from year to year
total_population = final_df["Population"].sum()
years = [2018, 2019, 2020, 2021, 2022]
population = [df_2018.groupby("Zipcode")["Population"].unique().sum().sum(),
              df_2019.groupby("Zipcode")["Population"].unique().sum().sum(),
              df_2020.groupby("Zipcode")["Population"].unique().sum().sum(),
              df_2021.groupby("Zipcode")["Population"].unique().sum().sum(),
              df_2022.groupby("Zipcode")["Population"].unique().sum().sum()]
population_df = pd.DataFrame({"Year" : years, "Population" : population})
population_df

In [None]:
# Get a high level overview of all crime and all crime per capita
total_crime = final_df.groupby("Primary Type").value_counts()
total_crime

In [None]:
# Look at types of crime per capita on the year level

# Count total crimes by crime type per year
year_crime_cnt = final_df.groupby("Year")["Primary Type"].count()

# Get the population for each year, using lambda x: x[0] to ensure that the zip code has a unique population
pop_per_year = final_df.groupby("Year")["Population"].unique().apply(lambda x: x[0])

# Calculate per capita count
per_capita_crime_rate = year_crime_cnt/pop_per_year

# Put it in a dataframe
yearly_per_capita = pd.DataFrame({"Year" : per_capita_crime_rate.index, "Per Capita Count" : per_capita_crime_rate.values})
yearly_per_capita

In [None]:
# Graph per capita crime rates over the 5 years
plt.plot(yearly_per_capita["Year"], yearly_per_capita["Per Capita Count"])

# Add labels
plt.xlabel("Year")
plt.ylabel("Per Capita Count")
plt.title("Per Capita Rate of Crime")

# Create new x-ticks
new_xticks = [2018, 2019, 2020, 2021, 2022]
plt.xticks(new_xticks)

plt.show()

In [None]:
# Look at per capita crime rate per district over the five year timespan
district_df = final_df[["Year", "District", "Primary Type", "Population", "Zipcode"]]
district_df

# Get the population per district, using lambda x: x[0] to ensure that the zip code has a unique population. Sum(level = 0) will ensure that 
# unique populations are summed for each district, thus not counting the same zip code's population twice
pop_per_district = district_df.groupby("District")["Population"].unique().apply(lambda x: x[0]).sum(level=0)

# Get the per capita crime count
cnt_per_district = district_df.groupby("District")["Primary Type"].count()

# Get the per capita crime count
per_cap_crime_cnt = cnt_per_district/pop_per_district

# Put in a data frame
district_per_capita = pd.DataFrame({"District" : per_cap_crime_cnt.index, "Per Capita Crime Count" : per_cap_crime_cnt.values})
district_per_capita

In [None]:
# Graph the results
district_per_capita.plot.bar(x='District', y='Per Capita Crime Count', legend=False)

# Set labels and title
plt.xlabel('District')
plt.ylabel('Per Capita Crime Count')
plt.title('Per Capita Crime Count by District')

# Display the plot
plt.show()

In [None]:
# Look at per capita crime rate per primary type over the five year timespan

# Get the population per primary type, using lambda x: x[0] to ensure that the zip code has a unique population. 
pop_per_type = district_df.groupby("Primary Type")["Population"].unique().apply(lambda x: x[0])

# Get the crime count
cnt_per_type = district_df.groupby("Primary Type")["Primary Type"].count()

# Get the per capita crime count
per_cap_crime_type = cnt_per_type/pop_per_type

# Put in a data frame
type_per_capita = pd.DataFrame({"Primary Type" : per_cap_crime_type.index, "Per Capita Crime Count" : per_cap_crime_type.values})
type_per_capita

In [None]:
# Graph the results
type_per_capita.plot.bar(x='Primary Type', y='Per Capita Crime Count', legend=False)

# Set labels and title
plt.xlabel('Primary Type')
plt.ylabel('Per Capita Crime Count')
plt.title('Per Capita Crime Count by District')

# Display the plot
plt.show()