In [None]:
# Import Dependencies
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np

In [None]:
# Importing and reading cleandata outputs

crime_master_csv = "cleandata_output/Police_Master.csv"
crime_master_2020_csv = "cleandata_output/Police_Master_2020.csv"

parcel_master_csv = "cleandata_output/Parcel_Master.csv"
parcel_master_2020_csv = "cleandata_output/Parcel_Master_2020.csv"

census_master_csv = "cleandata_output/Census_Master.csv"

#Read MPLS Police Data into DF
crime_df = pd.read_csv(crime_master_csv)
crime_2020_df = pd.read_csv(crime_master_2020_csv)
parcel_df = pd.read_csv(parcel_master_csv)
parcel_2020_df = pd.read_csv(parcel_master_2020_csv)
census_df = pd.read_csv(census_master_csv)

In [None]:
#Michael Start

In [None]:
#Michael End

In [None]:
#Emilio Start

In [None]:
crime_by_nbh = crime_df[['NEIGHBORHOOD', 'OFFENSE']].groupby(['NEIGHBORHOOD']).count()
parcel_by_nbh = parcel_df[['NEIGHBORHOOD', 'TOTALVALUE']].groupby(['NEIGHBORHOOD']).median()

In [None]:
parcel_by_nbh.count()

In [None]:
crime_by_nbh.count() ### Investigate which neighborhood were excluded and why

In [None]:
housing_crime = pd.merge(parcel_by_nbh, crime_by_nbh, on='NEIGHBORHOOD', how='left')
housing_crime['OFFENSE'].isnull().sum()

In [None]:
housing_crime.loc[housing_crime['OFFENSE'].isnull() == True]

In [None]:
#Plotting

In [None]:
x_values = housing_crime['TOTALVALUE']
y_values = housing_crime['OFFENSE']

plt.figure(figsize=(10,10))
plt.scatter(x_values, y_values, color='#1A6675')
plt.xlabel('PROPERTY VALUE MEDIAN ($)', fontsize=14)
plt.ylabel('CRIME RATE', fontsize=14)
plt.title('Median Property Value vs. Crime, by Neighborhood', fontsize=18)
ax = plt.gca() #returns the access to object
ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
ax.get_xaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.tight_layout()
plt.savefig('charts_output/01_s_Property_vs_Crime.png')
plt.show()

In [None]:
# Correlation and Regression
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values) 
# calculating the fit
regress_values = x_values * slope + intercept
# text holding the line equation:
equation = f'y = {round(slope, 6)}x + {round(intercept)}'
plt.figure(figsize=(10,10))
plt.scatter(x_values, y_values, color='#1A6675')
plt.annotate(equation, (600000,2000), fontsize=15, color='red')
plt.plot(x_values, regress_values, "r-")
plt.xlabel('PROPERTY VALUE MEDIAN ($)', fontsize=14)
plt.ylabel('CRIME RATE', fontsize=14)
plt.title('Median Property Value vs. Crime, by Neighborhood', fontsize=18)
ax = plt.gca() #returns the access to object
ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
ax.get_xaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
print(f'The correlation coefficient value r is {rvalue}')
print(f'r-squared is {rvalue**2}')
print(equation)
plt.tight_layout()
plt.savefig('charts_output/02_s_Correlation_Regression.png')
plt.show()

In [None]:
crime_df.head()

In [None]:
# exploring seasonality:
crime_monthly = crime_df.groupby(['MONTH'])['OFFENSE'].count()
crime_monthly_2020 = crime_2020_df.groupby(['MONTH'])['OFFENSE'].count()

In [None]:
month_ticks = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
month_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'Mar', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
crime_season_plot = crime_monthly.plot(kind='bar', x='MONTH', y='OFFENSE', color='#755814', figsize=(18, 5))
plt.xticks(month_ticks, month_labels, fontsize=12, rotation=0)
plt.xlabel('MONTH', fontsize=14)
plt.ylabel('CRIME RATE', fontsize=14)
plt.title('Crime Seasonality', fontsize=18)
ax = plt.gca() #returns the access to object
ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.tight_layout()
plt.savefig('charts_output/03_b_Seasonality.png')
plt.show()

In [None]:
month_ticks_2020 = [0, 1, 2, 3, 4, 5, 6]
month_labels_2020 = ['Jan', 'Feb', 'Mar', 'Apr', 'Mar', 'Jun', 'Jul']
crime_season_2020_plot = crime_monthly_2020.plot(kind='bar', x='MONTH', y='OFFENSE', color='#C29221', figsize=(18, 5))
plt.xlabel('MONTH', fontsize=14)
plt.xticks(month_ticks_2020, month_labels_2020, fontsize=12, rotation=0)
plt.ylabel('CRIME RATE', fontsize=14)
plt.title('Crime Seasonality 2020', fontsize=18)
ax = plt.gca() #returns the access to object
ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.tight_layout()
plt.savefig('charts_output/04_b_Seasonality_2020.png')
plt.show()

In [None]:
parcel_by_nbh.head()

In [None]:
parcel_by_nbh.plot.hist(bins=8, alpha=0.5, color='#1A6675', figsize=(18, 10)) # it is positively skewed (or skewed to the right)
plt.title('Property Value Median Histogram, 8 bins', fontsize=18)
plt.ylabel('FREQUENCY', fontsize=14)
plt.xlabel('PROPERTY VALUE ($)', fontsize=14)
plt.ticklabel_format(style='plain')
ax = plt.gca() #returns the access to object
ax.get_xaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.tight_layout()
plt.savefig('charts_output/05_h_Seasonality_2020.png')
plt.show

In [None]:
#Emilio End

In [None]:
#Matt Start

# ALL YEARS (2017-2019) DATAFRAME

In [None]:
# Find Median Home price by Neighborhood
parcel_median = parcel_df.groupby(["NEIGHBORHOOD"])["TOTALVALUE"].median()

# Find Total Crime by Neighborhood
total_crime = crime_df[["NEIGHBORHOOD", "YEAR"]].groupby(["NEIGHBORHOOD"]).count()

# Find violent crime numbers
crime_breakdown_violent = crime_df.loc[crime_df["CATEGORY"] == "Violent"]
crime_breakdown_violent = crime_breakdown_violent[["NEIGHBORHOOD", "YEAR"]].groupby(["NEIGHBORHOOD"]).count()

# Find non-violent crime numbers
crime_breakdown_nonviolent = crime_df.loc[crime_df["CATEGORY"] == "Non-Violent"]
crime_breakdown_nonviolent = crime_breakdown_nonviolent[["NEIGHBORHOOD", "YEAR"]].groupby(["NEIGHBORHOOD"]).count()

# Merge all crime data with median home price - rename violent/non-violent
nbh_crime_all = pd.merge(parcel_median, total_crime, on="NEIGHBORHOOD", how='left')
nbh_crime_violent = pd.merge(parcel_median, crime_breakdown_violent, on="NEIGHBORHOOD", how='left')
nbh_crime_nonviolent = pd.merge(parcel_median, crime_breakdown_nonviolent, on="NEIGHBORHOOD", how='left')

medhome_crime = pd.merge(total_crime, nbh_crime_violent, on=["NEIGHBORHOOD"], how='left')
medhome_crime = pd.merge(medhome_crime, nbh_crime_nonviolent, on=["NEIGHBORHOOD", "TOTALVALUE"], how='left')

medhome_crime = medhome_crime.rename(columns={"TOTALVALUE": "Median Home Value", "YEAR_x": "Total Crime", "YEAR_y": "Violent Crime", "YEAR": "Non-Violent Crime"})
medhome_crime = medhome_crime[["Median Home Value", "Total Crime", "Violent Crime", "Non-Violent Crime"]]

# Add population
medhome_crime = pd.merge(medhome_crime, census_df, on="NEIGHBORHOOD", how='left')

# Find Crime Rates
total_crime_rate = (medhome_crime["Total Crime"] / (medhome_crime["Total Population"] / 100)).round(2)
violent_crime_rate = (medhome_crime["Violent Crime"] / (medhome_crime["Total Population"] / 100)).round(2)
nonviolent_crime_rate = (medhome_crime["Non-Violent Crime"] / (medhome_crime["Total Population"] / 100)).round(2)

#Add crime rates to dataframe
medhome_crime["Crime Rate (Total)"] = total_crime_rate
medhome_crime["Violent Crime Rate"] = violent_crime_rate
medhome_crime["Non-Violent Crime Rate"] = nonviolent_crime_rate

#Re-arrange columns
medhome_crime = medhome_crime[["NEIGHBORHOOD",
                               "Median Home Value", 
                               "Total Crime", 
                               "Violent Crime", 
                               "Non-Violent Crime",
                               "Crime Rate (Total)",
                               "Violent Crime Rate",
                               "Non-Violent Crime Rate",
                               "Total Population"]]

#Save to CSV
medhome_crime.to_csv('cleandata_output/medhome_crime.csv')

medhome_crime

In [None]:
medianhomevalue = medhome_crime.iloc[:,1]
total_violent_crime = medhome_crime.iloc[:,3]
total_nonviolent_crime = medhome_crime.iloc[:,4]
plt.figure(figsize=(10,10))
plt.scatter(medianhomevalue,total_violent_crime, marker='o', color='#FA3B02', label="Violent")
plt.scatter(medianhomevalue,total_nonviolent_crime, marker='o', color='#DAA520', label="Non-Violent")
# plt.xticks(medianhomevalue, rotation=90)
plt.xlabel('PROPERTY VALUE MEDIAN ($)', fontsize=14)
plt.ylabel('CRIME RATE', fontsize=14 )
plt.title('Property Value Median vs. Crime, by Neighborhood', fontsize=18)
plt.legend()

plt.ticklabel_format(style='plain')
ax = plt.gca() #returns the access to object
ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
ax.get_xaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.tight_layout()
plt.savefig('charts_output/06_s_propval_crime.png')
plt.show()

# YEARLY DATAFRAMES

In [None]:
##### 2019 #####
parcel_df_2019 = parcel_df.loc[parcel_df["ASSESSMENT_YEAR"] == 2019]
crime_df_2019 = crime_df.loc[crime_df["YEAR"] == 2019]

# Find Median Home price by Neighborhood
parcel_median_2019 = parcel_df_2019.groupby(["NEIGHBORHOOD"])["TOTALVALUE"].median()

# Find Total Crime by Neighborhood
total_crime_2019 = crime_df_2019[["NEIGHBORHOOD", "YEAR"]].groupby(["NEIGHBORHOOD"]).count()

# Find violent crime numbers
crime_breakdown_violent_2019 = crime_df_2019.loc[crime_df["CATEGORY"] == "Violent"]
crime_breakdown_violent_2019 = crime_breakdown_violent_2019[["NEIGHBORHOOD", "YEAR"]].groupby(["NEIGHBORHOOD"]).count()

# Find non-violent crime numbers
crime_breakdown_nonviolent_2019 = crime_df_2019.loc[crime_df["CATEGORY"] == "Non-Violent"]
crime_breakdown_nonviolent_2019 = crime_breakdown_nonviolent_2019[["NEIGHBORHOOD", "YEAR"]].groupby(["NEIGHBORHOOD"]).count()

# Merge all crime data with median home price - rename violent/non-violent
nbh_crime_2019 = pd.merge(parcel_median_2019, total_crime_2019, on="NEIGHBORHOOD", how='left')
nbh_crime_violent_2019 = pd.merge(parcel_median_2019, crime_breakdown_violent_2019, on="NEIGHBORHOOD", how='left')
nbh_crime_nonviolent_2019 = pd.merge(parcel_median_2019, crime_breakdown_nonviolent_2019, on="NEIGHBORHOOD", how='left')

medhome_crime_2019 = pd.merge(total_crime_2019, nbh_crime_violent_2019, on=["NEIGHBORHOOD"], how='left')
medhome_crime_2019 = pd.merge(medhome_crime_2019, nbh_crime_nonviolent_2019, on=["NEIGHBORHOOD", "TOTALVALUE"], how='left')

medhome_crime_2019 = medhome_crime_2019.rename(columns={"TOTALVALUE": "Median Home Value", "YEAR_x": "Total Crime", "YEAR_y": "Violent Crime", "YEAR": "Non-Violent Crime"})
medhome_crime_2019 = medhome_crime_2019[["Median Home Value", "Total Crime", "Violent Crime", "Non-Violent Crime"]]

# Add population
medhome_crime_2019 = pd.merge(medhome_crime_2019, census_df, on="NEIGHBORHOOD", how='left')

# Find Crime Rates
total_crime_rate_2019 = (medhome_crime_2019["Total Crime"] / (medhome_crime_2019["Total Population"] / 100)).round(2)
violent_crime_rate_2019 = (medhome_crime_2019["Violent Crime"] / (medhome_crime_2019["Total Population"] / 100)).round(2)
nonviolent_crime_rate_2019 = (medhome_crime_2019["Non-Violent Crime"] / (medhome_crime_2019["Total Population"] / 100)).round(2)

#Add crime rates to dataframe
medhome_crime_2019["Crime Rate (Total)"] = total_crime_rate_2019
medhome_crime_2019["Violent Crime Rate"] = violent_crime_rate_2019
medhome_crime_2019["Non-Violent Crime Rate"] = nonviolent_crime_rate_2019

#Re-arrange columns
medhome_crime_2019 = medhome_crime_2019[["NEIGHBORHOOD",
                               "Median Home Value", 
                               "Total Crime", 
                               "Violent Crime", 
                               "Non-Violent Crime",
                               "Crime Rate (Total)",
                               "Violent Crime Rate",
                               "Non-Violent Crime Rate",
                               "Total Population"]]

#Save to CSV
medhome_crime_2019.to_csv('cleandata_output/medhome_crime_2019.csv')

medhome_crime_2019

In [None]:
##### 2018 #####
parcel_df_2018 = parcel_df.loc[parcel_df["ASSESSMENT_YEAR"] == 2018]
crime_df_2018 = crime_df.loc[crime_df["YEAR"] == 2018]

# Find Median Home price by Neighborhood
parcel_median_2018 = parcel_df_2018.groupby(["NEIGHBORHOOD"])["TOTALVALUE"].median()

# Find Total Crime by Neighborhood
total_crime_2018 = crime_df_2018[["NEIGHBORHOOD", "YEAR"]].groupby(["NEIGHBORHOOD"]).count()

# Find violent crime numbers
crime_breakdown_violent_2018 = crime_df_2018.loc[crime_df["CATEGORY"] == "Violent"]
crime_breakdown_violent_2018 = crime_breakdown_violent_2018[["NEIGHBORHOOD", "YEAR"]].groupby(["NEIGHBORHOOD"]).count()

# Find non-violent crime numbers
crime_breakdown_nonviolent_2018 = crime_df_2018.loc[crime_df["CATEGORY"] == "Non-Violent"]
crime_breakdown_nonviolent_2018 = crime_breakdown_nonviolent_2018[["NEIGHBORHOOD", "YEAR"]].groupby(["NEIGHBORHOOD"]).count()

# Merge all crime data with median home price - rename violent/non-violent
nbh_crime_2018 = pd.merge(parcel_median_2018, total_crime_2018, on="NEIGHBORHOOD", how='left')
nbh_crime_violent_2018 = pd.merge(parcel_median_2018, crime_breakdown_violent_2018, on="NEIGHBORHOOD", how='left')
nbh_crime_nonviolent_2018 = pd.merge(parcel_median_2018, crime_breakdown_nonviolent_2018, on="NEIGHBORHOOD", how='left')

medhome_crime_2018 = pd.merge(total_crime_2018, nbh_crime_violent_2018, on=["NEIGHBORHOOD"], how='left')
medhome_crime_2018 = pd.merge(medhome_crime_2018, nbh_crime_nonviolent_2018, on=["NEIGHBORHOOD", "TOTALVALUE"], how='left')

medhome_crime_2018 = medhome_crime_2018.rename(columns={"TOTALVALUE": "Median Home Value", "YEAR_x": "Total Crime", "YEAR_y": "Violent Crime", "YEAR": "Non-Violent Crime"})
medhome_crime_2018 = medhome_crime_2018[["Median Home Value", "Total Crime", "Violent Crime", "Non-Violent Crime"]]

# Add population
medhome_crime_2018 = pd.merge(medhome_crime_2018, census_df, on="NEIGHBORHOOD", how='left')

# Find Crime Rates
total_crime_rate_2018 = (medhome_crime_2018["Total Crime"] / (medhome_crime_2018["Total Population"] / 100)).round(2)
violent_crime_rate_2018 = (medhome_crime_2018["Violent Crime"] / (medhome_crime_2018["Total Population"] / 100)).round(2)
nonviolent_crime_rate_2018 = (medhome_crime_2018["Non-Violent Crime"] / (medhome_crime_2018["Total Population"] / 100)).round(2)

#Add crime rates to dataframe
medhome_crime_2018["Crime Rate (Total)"] = total_crime_rate_2018
medhome_crime_2018["Violent Crime Rate"] = violent_crime_rate_2018
medhome_crime_2018["Non-Violent Crime Rate"] = nonviolent_crime_rate_2018

#Re-arrange columns
medhome_crime_2018 = medhome_crime_2018[["NEIGHBORHOOD",
                               "Median Home Value", 
                               "Total Crime", 
                               "Violent Crime", 
                               "Non-Violent Crime",
                               "Crime Rate (Total)",
                               "Violent Crime Rate",
                               "Non-Violent Crime Rate",
                               "Total Population"]]
#Save to CSV
medhome_crime_2018.to_csv('cleandata_output/medhome_crime_2018.csv')

medhome_crime_2018

In [None]:
##### 2017 #####
parcel_df_2017 = parcel_df.loc[parcel_df["ASSESSMENT_YEAR"] == 2017]
crime_df_2017 = crime_df.loc[crime_df["YEAR"] == 2017]

# Find Median Home price by Neighborhood
parcel_median_2017 = parcel_df_2017.groupby(["NEIGHBORHOOD"])["TOTALVALUE"].median()

# Find Total Crime by Neighborhood
total_crime_2017 = crime_df_2017[["NEIGHBORHOOD", "YEAR"]].groupby(["NEIGHBORHOOD"]).count()

# Find violent crime numbers
crime_breakdown_violent_2017 = crime_df_2017.loc[crime_df["CATEGORY"] == "Violent"]
crime_breakdown_violent_2017 = crime_breakdown_violent_2017[["NEIGHBORHOOD", "YEAR"]].groupby(["NEIGHBORHOOD"]).count()

# Find non-violent crime numbers
crime_breakdown_nonviolent_2017 = crime_df_2017.loc[crime_df["CATEGORY"] == "Non-Violent"]
crime_breakdown_nonviolent_2017 = crime_breakdown_nonviolent_2017[["NEIGHBORHOOD", "YEAR"]].groupby(["NEIGHBORHOOD"]).count()

# Merge all crime data with median home price - rename violent/non-violent
nbh_crime_2017 = pd.merge(parcel_median_2017, total_crime_2017, on="NEIGHBORHOOD", how='left')
nbh_crime_violent_2017 = pd.merge(parcel_median_2017, crime_breakdown_violent_2017, on="NEIGHBORHOOD", how='left')
nbh_crime_nonviolent_2017 = pd.merge(parcel_median_2017, crime_breakdown_nonviolent_2017, on="NEIGHBORHOOD", how='left')

medhome_crime_2017 = pd.merge(total_crime_2017, nbh_crime_violent_2017, on=["NEIGHBORHOOD"], how='left')
medhome_crime_2017 = pd.merge(medhome_crime_2017, nbh_crime_nonviolent_2017, on=["NEIGHBORHOOD", "TOTALVALUE"], how='left')

medhome_crime_2017 = medhome_crime_2017.rename(columns={"TOTALVALUE": "Median Home Value", "YEAR_x": "Total Crime", "YEAR_y": "Violent Crime", "YEAR": "Non-Violent Crime"})
medhome_crime_2017 = medhome_crime_2017[["Median Home Value", "Total Crime", "Violent Crime", "Non-Violent Crime"]]

# Add population
medhome_crime_2017 = pd.merge(medhome_crime_2017, census_df, on="NEIGHBORHOOD", how='left')

# Find Crime Rates
total_crime_rate_2017 = (medhome_crime_2017["Total Crime"] / (medhome_crime_2017["Total Population"] / 100)).round(2)
violent_crime_rate_2017 = (medhome_crime_2017["Violent Crime"] / (medhome_crime_2017["Total Population"] / 100)).round(2)
nonviolent_crime_rate_2017 = (medhome_crime_2017["Non-Violent Crime"] / (medhome_crime_2017["Total Population"] / 100)).round(2)

#Add crime rates to dataframe
medhome_crime_2017["Crime Rate (Total)"] = total_crime_rate_2017
medhome_crime_2017["Violent Crime Rate"] = violent_crime_rate_2017
medhome_crime_2017["Non-Violent Crime Rate"] = nonviolent_crime_rate_2017

#Re-arrange columns
medhome_crime_2017 = medhome_crime_2018[["NEIGHBORHOOD",
                               "Median Home Value", 
                               "Total Crime", 
                               "Violent Crime", 
                               "Non-Violent Crime",
                               "Crime Rate (Total)",
                               "Violent Crime Rate",
                               "Non-Violent Crime Rate",
                               "Total Population"]]
#Save to CSV
medhome_crime_2017.to_csv('cleandata_output/medhome_crime_2017.csv')

medhome_crime_2017


# USING DATA TO GRAPH & CHART

In [None]:
# 2017 Crime (Violent/Non-Violent) by Neighborhood - sorted by Value
#Sort by Median Home Value 
medhome_crime_2017_sort = medhome_crime_2017.sort_values(by="Median Home Value", ascending=False)
colors = ["#FA3B02", '#DAA520']
# Plot using bar graph
crime_bar_2017 = medhome_crime_2017_sort.plot(kind='bar', x='NEIGHBORHOOD', y=['Violent Crime', 'Non-Violent Crime'], stacked=True, figsize=(18,5), color=colors)
plt.ylabel("CRIME RATE", fontsize=14)
plt.xlabel("NEIGHBORHOOD", fontsize=14)
plt.title("Crime by Neighborhood : 2017", fontsize=18)
ax = plt.gca() #returns the access to object
ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.tight_layout()
plt.savefig('charts_output/07_b_2017_Crime_by_nbh.png')
plt.show()

In [None]:
# 2018 Crime (Violent/Non-Violent) by Neighborhood - sorted by Value
#Sort by Median Home Value 
medhome_crime_2018_sort = medhome_crime_2018.sort_values(by="Median Home Value", ascending=False)

# Plot using bar graph
crime_bar_2018 = medhome_crime_2018_sort.plot(kind='bar', x='NEIGHBORHOOD', y=['Violent Crime', 'Non-Violent Crime'], stacked=True, figsize=(18,5), color=colors)
plt.ylabel("CRIME RATE", fontsize=14)
plt.xlabel("NEIGHBORHOOD", fontsize=14)
plt.title("Crime by Neighborhood : 2018", fontsize=18)
plt.tight_layout()
plt.savefig('charts_output/08_b_2018_Crime_by_nbh.png')
plt.show()

In [None]:
# 2017 Crime (Violent/Non-Violent) by Neighborhood - sorted by Value
#Sort by Median Home Value 
medhome_crime_2019_sort = medhome_crime_2019.sort_values(by="Median Home Value", ascending=False)

# Plot using bar graph
crime_bar_2019 = medhome_crime_2019_sort.plot(kind='bar', x='NEIGHBORHOOD', y=['Violent Crime', 'Non-Violent Crime'], stacked=True, figsize=(18,5), title="Crime by Neighborhood : 2019", color=colors)
plt.ylabel("CRIME RATE", fontsize=14)
plt.xlabel("NEIGHBORHOOD", fontsize=14)
plt.title("Crime by Neighborhood : 2019", fontsize=18)
ax = plt.gca() #returns the access to object
ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.tight_layout()
plt.savefig('charts_output/09_b_2019_Crime_by_nbh.png')
plt.show()

# FINDING QUARTILES FOR HYPOTHESIS TESTING

In [None]:
# Box Plot to find quartiles for hypothesis testing
# Put the three years into a list that can be iterated over in a for-loop 
# (and later used for plot labels)

years = [2017, 2018, 2019]

# Create empty list to fill with housing data

home_value_list =[]


# Calculate the IQR and quantitatively determine if there are any potential outliers. 
for year in years:
    # Locate the rows which contain home values for each year and get the total value
    # Find using .loc to match year from list - then find total value
    total_value = parcel_df.loc[parcel_df["ASSESSMENT_YEAR"] == year, "TOTALVALUE"]
    
    # add subset - #append the total value to a list
    home_value_list.append(total_value)
    
    # Determine outliers using upper and lower bounds
    # If the data is in a dataframe
    quartiles = total_value.quantile([.25,.5,.75])
    lowerq = quartiles[0.25]
    upperq = quartiles[0.75]
    iqr = upperq-lowerq
    
    lower_bound = lowerq - (1.5*iqr)
    upper_bound = upperq + (1.5*iqr)
    
    #Find outliersby using .loc and an "OR" statement 
    outliers = total_value.loc[(total_value < lower_bound) | (total_value > upper_bound)]
    maxoutlier = outliers.max()
    minoutlier = outliers.min()
    print(f"{year}'s outliers are:")
    print(f"{outliers}")
    print(f"Max outlier is {maxoutlier}")
    print(f"Min outlier is {minoutlier}")
    print(" ")
    
    #Print Quartiles for Hypothesis Testing
    print(f"{year}'s quartiles are:") 
    print(f"{quartiles}") 
    print(" ")

In [None]:
# Generate a box plot of the final tumor volume of each mouse across four regimens of interest
dot_outlier = dict(markerfacecolor='#1A6675')

fig1, ax1 = plt.subplots(figsize=(15, 10))
ax1.set_title('Property Value by Year', fontsize=18)
ax1.set_ylabel('PROPERTY VALUE ($)', fontsize=14)
# ax1.set_xlabel('YEAR', fontsize=14)
ax1.boxplot(home_value_list, labels=years, flierprops=dot_outlier)
ax = plt.gca() #returns the access to object
ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.tight_layout()
plt.savefig('charts_output/10_bp_MedianValue_Year.png')
plt.show()

In [None]:
#Matt End

In [None]:
#Emilio2 Starts

In [None]:
medhome_crime.head(3)

In [None]:
pd.qcut(medhome_crime['Median Home Value'], q=5)

In [None]:
bins = (0, 203400.0, 236100.0, 284700.0, 387600.0, 10000000)
group_labels = ('low', 'mid-low', 'mid', 'mid-high', 'high')
medhome_crime['Distribution'] = pd.cut(medhome_crime['Median Home Value'], 
                                       bins, labels=group_labels, include_lowest=True)

In [None]:
medhome_crime

In [None]:
medhome_crime_dist = medhome_crime[['Median Home Value', 'Crime Rate (Total)', 'Violent Crime Rate', 'Non-Violent Crime Rate', 'Distribution']].groupby(['Distribution']).mean()
medhome_crime_dist = medhome_crime_dist.rename(columns={'Median Home Value':'Mean of Median Property Value'})
medhome_crime_dist

In [None]:
population1 = medhome_crime[medhome_crime['Distribution'] == 'low']['Total Crime']
population2 = medhome_crime[medhome_crime['Distribution'] == 'mid-low']['Total Crime']
population3 = medhome_crime[medhome_crime['Distribution'] == 'mid']['Total Crime']
population4 = medhome_crime[medhome_crime['Distribution'] == 'mid-high']['Total Crime']
population5 = medhome_crime[medhome_crime['Distribution'] == 'high']['Total Crime']

In [None]:
# Calculate Independent (Two Sample) t-test
st.ttest_ind(population1, population5, equal_var=False)

In [None]:
medhome_crime.head()

In [None]:
medhome_crime.boxplot("Total Crime", by="Distribution", figsize=(18, 10))

plt.xlabel('GROUPS', fontsize=14)
plt.ylabel('CRIME RATE', fontsize=14)
#plt.title('Median Property Value vs. Crime, by Neighborhood', fontsize=18)
ax = plt.gca() #returns the access to object
ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
#plt.tight_layout()
plt.savefig('charts_output/11_bp_bp_grouped-by-distribution.png')
plt.show()

In [None]:
 # Perform the ANOVA
crime1 = medhome_crime[medhome_crime['Distribution'] == 'low']['Total Crime']
crime2 = medhome_crime[medhome_crime['Distribution'] == 'mid-low']['Total Crime']
crime3 = medhome_crime[medhome_crime['Distribution'] == 'mid']['Total Crime']
crime4 = medhome_crime[medhome_crime['Distribution'] == 'mid-high']['Total Crime']
crime5 = medhome_crime[medhome_crime['Distribution'] == 'high']['Total Crime']

st.f_oneway(crime1, crime2, crime3, crime4, crime5)

# Other charts

In [None]:
# Pie charts
pie_crime_values = medhome_crime[['Total Crime', 'Violent Crime', 'Non-Violent Crime', 'Distribution']].groupby(['Distribution']).sum()
pie_crime_norm = medhome_crime[['Crime Rate (Total)', 'Violent Crime Rate', 'Non-Violent Crime Rate', 'Distribution']].groupby(['Distribution']).sum()
pie_crime_norm

In [None]:
pie_crime_values

In [None]:
crime_df

In [None]:
crime_df['OFFENSE'].nunique()

In [None]:
colors = ['#DAA520', '#FA3B02']
grouped_category = crime_df.groupby('CATEGORY')
category_count = grouped_category['CATEGORY'].count()
plt.figure(figsize=(5,5))
crime_pie = category_count.plot(kind='pie', y='CATEGORY', subplots=True, autopct='%1.1f%%', colors=colors)
plt.title('Violent & Non-Violent Crime Distribution', fontsize=18)
plt.tight_layout()
plt.savefig('charts_output/12_p_crime_distribution.png')
plt.show()

In [None]:
category_count

In [None]:
medhome_crime_sorted = medhome_crime.sort_values(by='Median Home Value', ascending=False)
median_plot = medhome_crime_sorted.plot(kind='bar', x='NEIGHBORHOOD', y='Median Home Value', 
                                        figsize=(18,5), color='#1A6675')
plt.title('Property Value Median by Neighborhood', fontsize=18)
plt.xlabel('NEIGHBORHOOD', fontsize=14)
plt.ylabel('PROPERTY VALUE MEDIAN ($)',fontsize=14)
ax = plt.gca() #returns the access to object
ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.tight_layout()
plt.savefig('charts_output/13_b_val-by-nbh.png')
plt.show()

In [None]:
colors_bar = ['#FA3B02', '#DAA520']
total_crime_plot = medhome_crime_sorted.plot(kind='bar', x='NEIGHBORHOOD', y=['Violent Crime', 'Non-Violent Crime'], figsize=(18,5), 
                                          stacked=True, color=colors_bar)
plt.title('Crime Rate by Neighborhood', fontsize=16)
plt.xlabel('NEIGHBORHOOD')
plt.ylabel('CRIME RATE')
ax = plt.gca() #returns the access to object
ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.tight_layout()
plt.savefig('charts_output/14_crime-category-by-nbh.png')
plt.show()

In [None]:
rate_crime_plot = medhome_crime_sorted.plot(kind='bar', x='NEIGHBORHOOD', y=['Violent Crime Rate', 'Non-Violent Crime Rate'], figsize=(18,5), 
                                          title='Housing Value Median by Neighborhood', stacked=True, color=colors_bar)
plt.title('Normalized Crime Rate by Neighborhood', fontsize=18)
plt.xlabel('NEIGHBORHOOD')
plt.ylabel('CRIME RATE PER 100 PEOPLE')
ax = plt.gca() #returns the access to object
ax.get_yaxis().set_major_formatter(plt.FuncFormatter(lambda x, loc: "{:,}".format(int(x))))
plt.tight_layout()
plt.savefig('charts_output/15_b_crime-category-by-nbh_normal.png')
plt.show()

In [None]:
#Emilio2 Ends