# Project 2
## Trevis Slagowski & Treyson Grange

In [None]:
import pandas as pd 
import seaborn as sns
import numpy as np
from scipy import stats
from matplotlib import pyplot as plt

In [None]:
zipcodes_df = pd.read_csv('datasets/AustinZipCodes.csv')
crime_df = pd.read_csv('datasets/crime-housing-austin-2015.csv')

## Data Exploration

In [None]:
display(zipcodes_df.head())

In [None]:
display(crime_df.head())

In [None]:
crime_df.columns

In [None]:
crime_df.describe()

In [None]:
crime_df.info()

In [None]:
display(crime_df['Location'].value_counts())

In [None]:
display(crime_df['Clearance_Status'].value_counts())

## First Analysis

## Second Analysis

## Third Analysis

### Rental units affordable to an average teacher and the frequency of crime types in those Council Districts

In [None]:
display(crime_df['Highest_NIBRS_UCR_Offense_Description'].unique())

In [None]:
display(crime_df['Council_District'].value_counts())

#### First, we need to process the teacher rental housing affordability data. We can take the mean of the affordability percentage in each Council District.

In [None]:
def percentage_to_float(percentage_str):
    if isinstance(percentage_str, str):
        return float(percentage_str.strip('%')) / 100
    return np.nan

crime_df['Rental_Affordable_to_Teacher'] = crime_df['Rentalunitsaffordabletoaverageteacher'].apply(percentage_to_float)

average_affordability = crime_df.groupby('Council_District')['Rental_Affordable_to_Teacher'].mean()
average_affordability.plot(kind='bar', ylabel='Rental Units affordable to Average Teacher', title='Average Affordability by Council District')   

#### Next, we need to aggregate crime data. We will count the number of burglaries in each Council District.

In [None]:
unique_crimes = crime_df['Highest_NIBRS_UCR_Offense_Description'].unique()

crime_data_dict = {}

for crime in unique_crimes:
    crime_counts = crime_df[crime_df['Highest_NIBRS_UCR_Offense_Description'] == crime].groupby('Council_District').size()
    merged_data = pd.DataFrame({
        f'{crime}_Count': crime_counts,
        'Average_Affordability': average_affordability
    }).dropna()
    crime_data_dict[crime] = merged_data

unique_crimes

#### Let's plot the results in a scatter plot

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
colors = plt.cm.rainbow(np.linspace(0, 1, len(unique_crimes)))

for i, crime in enumerate(unique_crimes):
    ax.scatter(x=crime_data_dict[crime]['Average_Affordability'], y=crime_data_dict[crime][f'{crime}_Count'],
               color=colors[i], label=crime)

ax.set_xlabel('Rental Affordability to Average Teachers')
ax.set_ylabel('Crime Count')
ax.set_title('Crime Count vs. Housing Affordability')
ax.legend(title='Crime Types', bbox_to_anchor=(1, 1), loc='upper left')

plt.show()

#### Check the for any correlations between Rental Affordability to Average Teachers and the Crime Type

In [None]:
correlation_dict = {}

for crime in unique_crimes:
    crime_count_col = f'{crime}_Count'

    correlation_dict[crime] = stats.pearsonr(crime_data_dict[crime][crime_count_col], crime_data_dict[crime]['Average_Affordability'])

for correlation in correlation_dict: 
    print(f"{correlation}: {correlation_dict[correlation]}")

It looks like there is a correlation for Burglary, Agg Assault, and Murder. Let's look more closely at those. 

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x=crime_data_dict['Burglary']['Average_Affordability'], y=crime_data_dict['Burglary']['Burglary_Count'], data=crime_data_dict['Burglary'])
plt.title('Scatterplot of Burglary Count vs. Housing Affordability')
plt.xlabel('Rental Affordability to Average Teachers')
plt.ylabel('Burglary Count')
plt.show()

print(f"Burglary: {correlation_dict['Burglary']}")

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x=crime_data_dict['Agg Assault']['Average_Affordability'], y=crime_data_dict['Agg Assault']['Agg Assault_Count'], data=crime_data_dict['Agg Assault'])
plt.title('Scatterplot of Agg Assault Count vs. Housing Affordability')
plt.xlabel('Rental Affordability to Average Teachers')
plt.ylabel('Agg Assault Count')
plt.show()

print(f"Agg Assault: {correlation_dict['Agg Assault']}")

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x=crime_data_dict['Murder']['Average_Affordability'], y=crime_data_dict['Murder']['Murder_Count'], data=crime_data_dict['Murder'])
plt.title('Scatterplot of Murder Count vs. Housing Affordability')
plt.xlabel('Rental Affordability to Average Teachers')
plt.ylabel('Murder Count')
plt.show()

print(f"Murder: {correlation_dict['Murder']}")

There appears to be a statistically significant relationship between the crime rates of burglary, aggravated assault, and murder, and the rental affordability to an average teacher in different council districts. A positive coefficient indicates that as the rental affordability increases, so does the crime rate. But we can not definitively say this is the only attribute contributing to crime rates in Council Districts. 

## Fourth Analysis
### Crime Statistics by Location Data