# Project 2 Crime Housing
### team members: Bekah Grant, Beau Martin
### Dataset: Crime-housing-austin, AustinZipCodes

### Import crime housing data

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from scipy import stats

crime_housing_df = pd.read_csv('crime-housing-austin-2015.csv')
crime_housing_df

In [None]:
crime_counts = crime_housing_df.groupby(['Highest_NIBRS_UCR_Offense_Description', 'Zip_Code_Crime', 'Populationwithdisability']).size().reset_index(name='Crime_Count')

crime_counts

In [None]:

crime_counts_with_salary = crime_counts.merge(
    crime_housing_df[['Zip_Code_Crime', 'Medianhouseholdincome']].drop_duplicates('Zip_Code_Crime'),
    on='Zip_Code_Crime'
).dropna()

# make medianhouseholdincome a float
crime_counts_with_salary['Medianhouseholdincome'] = crime_counts_with_salary['Medianhouseholdincome'].replace({'\$': '', ',': ''}, regex=True).astype(float)

crime_list = ['Auto Theft', 'Burglary'] # list of crimes we want to use
crime_counts_with_salary  = crime_counts_with_salary[
    crime_counts_with_salary.Highest_NIBRS_UCR_Offense_Description.isin(crime_list)
]
crime_counts_with_salary



### Plot both auto theft and burglary

In [None]:
colors = {'Theft':'blue','Auto Theft':'red','Burglary':'black'}

fig, ax = plt.subplots(figsize=(5,5))



for crime in crime_counts_with_salary.Highest_NIBRS_UCR_Offense_Description.unique():
    data = crime_counts_with_salary[crime_counts_with_salary.Highest_NIBRS_UCR_Offense_Description == crime]
    ax.scatter(x=data.Medianhouseholdincome, y=data.Crime_Count, c=colors[crime], label=crime)
ax.set_xlabel('median_income')  
ax.set_ylabel('crime_counts')    
ax.legend()

plt.show()

### Auto theft plot and correlation coefficient

In [None]:
auto_theft = crime_counts_with_salary[crime_counts_with_salary.Highest_NIBRS_UCR_Offense_Description == 'Auto Theft']
sns.regplot(x='Medianhouseholdincome', y='Crime_Count', data=auto_theft, ci=None)
display(stats.pearsonr(auto_theft.Crime_Count, auto_theft.Medianhouseholdincome))

### Burglary plot and correlation coefficient

In [None]:
burglary = crime_counts_with_salary[crime_counts_with_salary.Highest_NIBRS_UCR_Offense_Description == 'Burglary']
sns.regplot(x='Medianhouseholdincome', y='Crime_Count', data=burglary, ci=None)
display(stats.pearsonr(burglary.Crime_Count, burglary.Medianhouseholdincome))

### Average number of auto thefts and burglaries per area

In [None]:
plt.figure()
plt.bar([1,2],[auto_theft.Crime_Count.mean(), burglary.Crime_Count.mean()], tick_label=['auto_theft', 'burglary'])
plt.title('Average number of crime type per area')

display('N = {} (auto_theft) {} (burglary)'.format(sum(auto_theft.Crime_Count), sum(burglary.Crime_Count)))

# Does crime occur more in communities with higher disability? do people take advatage of the disabled


In [None]:

#determines how many crimes have been commited in a zip code.
crime_zip = crime_housing_df.groupby('Zip_Code_Crime').size().reset_index(name='Zip_crime_count')
crime_zip

crime_zip_info= crime_zip.merge(crime_housing_df[['Zip_Code_Crime', 'Medianhouseholdincome', 'Medianhomevalue', 'Populationwithdisability']].drop_duplicates('Zip_Code_Crime'), on='Zip_Code_Crime').dropna()
crime_zip_info['Populationwithdisability'] = crime_zip_info['Populationwithdisability'].str.rstrip('%').astype(float) / 100
crime_zip_info


sns.regplot(x='Populationwithdisability', y='Zip_crime_count', data=crime_zip_info, ci=None)
display(stats.pearsonr(crime_zip_info.Zip_crime_count, crime_zip_info.Populationwithdisability))

In [None]:
crime_zip_info['Medianhouseholdincome'] = crime_zip_info['Medianhouseholdincome'].replace({'\$': '', ',': ''}, regex=True).astype(float)
sns.regplot(x='Populationwithdisability', y='Medianhouseholdincome', data=crime_zip_info, ci=None)
display(stats.pearsonr(crime_zip_info.Medianhouseholdincome, crime_zip_info.Populationwithdisability))

# t-test crimes commited at the zip codes with the highest disability vs crimes commited at the zip codes with the lowest disability

In [None]:
highest_disability = crime_zip_info[crime_zip_info['Populationwithdisability'] >= 0.14 ]
highest_disability


In [None]:
lowest_disability = crime_zip_info[crime_zip_info['Populationwithdisability'] <= 0.03]
lowest_disability

In [None]:
display(stats.ttest_ind(highest_disability.Zip_crime_count, lowest_disability.Zip_crime_count))