# Dependencies

In [2]:
import pandas as pd
import requests
import json
import numpy as np
import matplotlib.pyplot as plt

import datetime as dt
import scipy.stats as sts
import scipy.ndimage as ndimage

from math import isnan
from matplotlib.animation import FuncAnimation
from matplotlib import animation, rc
from Resources.config import app_token


# Additional Depedencies which requires updating your environment

In [None]:
import descartes
import geopandas as gpd
from geopandas import GeoDataFrame
from shapely.geometry import Point, Polygon
%matplotlib inline

# Import Resources

In [None]:
## Source: Crimes - 2001 to present
## https://data.cityofchicago.org/Public-Safety/Crimes-2001-to-present/ijzp-q8t2
url_crime_data = "https://data.cityofchicago.org/resource/ijzp-q8t2.json"
date_filter="$WHERE=DATE BETWEEN '2014-01-01' AND '2018-12-31'"
crime_records_limit = 2000000
url_crime_data_with_filter = f"{url_crime_data}?{date_filter}&$limit={crime_records_limit}&$$app_token={app_token}"
crime_data_json = requests.get(url_crime_data_with_filter).json()
crime_data_df = pd.DataFrame.from_records(crime_data_json)
crime=crime_data_df

crime = crime.rename(columns={"date": "Date"
                           , "primary_type": "Primary Type"
                           , "description": "Description"
                           , "community_area": "Community Area"
                           , "longitude": "Longitude"
                           , "latitude": "Latitude"
                           , "id": "ID"
                          })
crime['Community Area'] = pd.to_numeric(crime['Community Area'],downcast='integer')
crime['Longitude'] = pd.to_numeric(crime['Longitude'],downcast='integer')
crime['Latitude'] = pd.to_numeric(crime['Latitude'],downcast='integer')

In [6]:
# Street Light Data Source from 2014-2018
street_light_reports = "Resources/Service_Requests_Street_Lights.csv" 

# Read Street Light Data and store into Pandas data frame, add ability to read special chrarters
street_light_reports_pd = pd.read_csv(street_light_reports,encoding= "ISO-8859-1")
# street_light_reports_pd = street_light_reports_pd.rename(columns={"ÈÀCreation Date": "Creation Date"})
                                
street=street_light_reports_pd

street = street.rename(columns={"ÈÀCreation Date": "Creation Date"
                                ,"Creation Date": "Creation Date"
                           , "status": "Status"
                           , "completion_date": "Completion Date"
                           , "service_request_number": "Service Request Number"
                           , "type_of_service_request": "Type of Service Request"
                           , "community_area": "Community Area"
                                , "longitude": "Longitude"
                                , "latitude": "Latitude"
                          })

street['Longitude'] = pd.to_numeric(street['Longitude'],downcast='integer')
street['Latitude'] = pd.to_numeric(street['Latitude'],downcast='integer')

In [7]:
# Community Area Names Data Source 
community_area_names = "Resources/CommAreas_20200108.csv" 

# Read Community Area Names Data and store into Pandas data frame, add ability to read special characters
community_area_names_pd = pd.read_csv(community_area_names,encoding= "ISO-8859-1")
comm=community_area_names_pd

comm = comm.rename(columns={"area_numbe": "AREA_NUMBE"
                           , "community": "COMMUNITY"
                          })

In [8]:
# Pop Census Data Source 
pop_census_data = "Resources/Pop-Census-Data-by-Chicago-Community-Area-2017.csv" 

# Read Pop Census Data by Chicago Community and store into Pandas data frame.
pop_census_data_pd = pd.read_csv(pop_census_data)
pop=pop_census_data_pd
pop['Community'] = pop['Community'].str.upper() 
pop2=pop[['Total Population','Community']]
pop2.sort_values(by='Total Population', ascending=False)

Unnamed: 0,Total Population,Community
0,102584,LAKE VIEW
1,97604,AUSTIN
2,96466,NEAR NORTH SIDE
3,84660,WEST TOWN
4,78037,BELMONT CRAGIN
...,...,...
72,7033,AVALON PARK
73,6565,RIVERDALE
74,5181,OAKLAND
75,3220,FULLER PARK


# Data cleanup

In [None]:
#Data Cleanup - Rename Creation Date Column Because it Has Unnecessary Charaters in the CSV Field
street_light_reports_pd = street_light_reports_pd.rename(columns={'ÈÀCreation Date': 'Creation Date'})

# Add a Column for Year, Month, Day in Street Light Outage Data 
street_light_reports_pd['Year'] = pd.DatetimeIndex(street_light_reports_pd['Creation Date']).year
street_light_reports_pd['Month'] = pd.DatetimeIndex(street_light_reports_pd['Creation Date']).month
street_light_reports_pd['Day'] = pd.DatetimeIndex(street_light_reports_pd['Creation Date']).day

#Filter Street Light data for everything greater than 2013 and less than 2018
street_light_reports_pd = street_light_reports_pd.loc[(street_light_reports_pd['Year']>2013) & (street_light_reports_pd['Year']<2019)]
street_light_reports_pd

## Delete extra columns
#del street_light_reports_pd["Ward", "Police District"]
street_light_reports_pd = street_light_reports_pd.drop(["Ward", "Police District"], axis=1)
street_light_reports_pd.head()

CLEANING AND FILTERING CRIME DATA

In [None]:
#remove the records with zero as the community area
crime=crime.loc[crime['Community Area']!=0]

#creating new columns for Year, Month, and Day
crime['Year'] = pd.DatetimeIndex(crime['Date']).year
crime['Month'] = pd.DatetimeIndex(crime['Date']).month
crime['Day'] = pd.DatetimeIndex(crime['Date']).day

#filtering data for everything greater than 2013 and less than 2019
crime=crime.loc[(crime['Year']>2013) & (crime['Year']<2019)]

CLEANING AND FILTERING 311 - STREET LIGHT OUTAGES DATA

In [None]:
#add a year column
street['Year'] = pd.DatetimeIndex(street['Creation Date']).year
street['Month'] = pd.DatetimeIndex(street['Creation Date']).month
street['Day'] = pd.DatetimeIndex(street['Creation Date']).day
street=street.loc[(street['Year']>2013) & (street['Year']<2019)]

#Filtering out null and zero values
street_nonull=street.dropna(subset=['Community Area'])
street_nozero=street_nonull.loc[street_nonull['Community Area']!=0]

MERGING CRIME AND COMMUNITY AREAS DATA

In [None]:
#merge the crime data with the community areas data
crime_comm = pd.merge(crime, comm, left_on="Community Area", right_on='AREA_NUMBE', how="left")


MERGING 311-STREET LIGHT OURAGES AND COMMUNITY AREAS DATA

In [None]:
#merge the street data with the community areas data
street_comm = pd.merge(street_nozero, comm, left_on="Community Area", right_on='AREA_NUMBE', how="left")

## Question 1 - Which Chicago neighborhoods have the most street light outages reported between 2014-2018?

In [None]:
#Create Community Area Count Variable
community_area_counts = street_light_reports_pd.groupby(["Community Area"])["Creation Date"].nunique()

#Display Summary of Community Area Counts in Data Frame
community_area_summary_table = pd.DataFrame({"Total Street Light Outage Reportings": community_area_counts})
community_area_summary_table

In [None]:
#Display Summary of Community Area Names and Corresposonding Number in Data Frame
community_area_names=community_area_names_pd[['COMMUNITY','AREA_NUMBE']]
community_area_names

In [None]:
# Merge Dataframes to get Street Light Outage Reports and Community Names in One Dataframe
community_data_merge = pd.merge(community_area_summary_table,community_area_names,left_on='Community Area', right_on='AREA_NUMBE', how="right")

#Sort Merged Dataframe by Highest Street Light Outages to Find Community With Highest Street Outages
community_sort = community_data_merge.sort_values(by='Total Street Light Outage Reportings', ascending=False)

# Display the Top 10 Neighborhoods With Most Street Light Outages 2014 to 2018
community_sort.head(10)

In [None]:
#Display Bar Chart to compare community area street light outages between 2014-2018
community_chart = community_sort.plot.bar(y='Total Street Light Outage Reportings', x='COMMUNITY', rot=90, figsize=(20,3))
plt.savefig("Charts/Street_Light_Outage_by_Community")

In [None]:
# Filter Street Light Outage Totals by Year 
year_2014 = street_light_reports_pd[street_light_reports_pd['Year']==2014]
year_2015 = street_light_reports_pd[street_light_reports_pd['Year']==2015]
year_2016 = street_light_reports_pd[street_light_reports_pd['Year']==2016]
year_2017 = street_light_reports_pd[street_light_reports_pd['Year']==2017]
year_2018 = street_light_reports_pd[street_light_reports_pd['Year']==2018]

In [None]:
#Filter Street Light Counts by 2014 
counts_by_year_2014 = year_2014.groupby(["Community Area","Year"])["Creation Date"].nunique()

#Display 2014 Summary of Street Light Outages by Year in Data Frame
counts_by_year_2014_summary_table = pd.DataFrame({ "# of Outages in 2014": counts_by_year_2014})

#Sort Merged Dataframe by Highest Street Light Outages to Find Community With Highest Street Outages
community_sort_2014 = counts_by_year_2014_summary_table.sort_values(by='# of Outages in 2014', ascending=False)

# Display the Top 5 Neighborhoods With Most Street Light Outages in 2014
community_sort_2014.head(5)

In [None]:
#Filter Street Light Counts by 2015 
counts_by_year_2015 = year_2015.groupby(["Community Area","Year"])["Creation Date"].nunique()

#Display 2015 Summary of Street Light Outages by Year in Data Frame
counts_by_year_2015_summary_table = pd.DataFrame({"# of Outages in 2015": counts_by_year_2015})

#Sort Merged Dataframe by Highest Street Light Outages to Find Community With Highest Street Outages
community_sort_2015 = counts_by_year_2015_summary_table.sort_values(by='# of Outages in 2015', ascending=False)

# Display the Top 5 Neighborhoods With Most Street Light Outages in 2015
community_sort_2015.head(5)

In [None]:
#Filter Street Light Counts by 2016 
counts_by_year_2016 = year_2016.groupby(["Community Area","Year"])["Creation Date"].nunique()

#Display 2014 Summary of Street Light Outages by Year in Data Frame
counts_by_year_2016_summary_table = pd.DataFrame({"# of Outages in 2016": counts_by_year_2016})

#Sort Merged Dataframe by Highest Street Light Outages to Find Community With Highest Street Outages
community_sort_2016 = counts_by_year_2016_summary_table.sort_values(by='# of Outages in 2016', ascending=False)

# Display the Top 5 Neighborhoods With Most Street Light Outages in 2016
community_sort_2016.head(5)

In [None]:
#Filter Street Light Counts by 2017 
counts_by_year_2017 = year_2017.groupby(["Community Area","Year"])["Creation Date"].nunique()

#Display 2017 Summary of Street Light Outages by Year in Data Frame
counts_by_year_2017_summary_table = pd.DataFrame({"# of Outages in 2017": counts_by_year_2017})

#Sort Merged Dataframe by Highest Street Light Outages to Find Community With Highest Street Outages
community_sort_2017 = counts_by_year_2017_summary_table.sort_values(by='# of Outages in 2017', ascending=False)

# Display the Top 5 Neighborhoods With Most Street Light Outages in 2017
community_sort_2017.head(5)

In [None]:
#Filter Street Light Counts by 2018
counts_by_year_2018 = year_2018.groupby(["Community Area","Year"])["Creation Date"].nunique()

#Display 2018 Summary of Street Light Outages by Year in Data Frame
counts_by_year_2018_summary_table = pd.DataFrame({"# of Outages in 2018": counts_by_year_2018})

#Sort Merged Dataframe by Highest Street Light Outages to Find Community With Highest Street Outages
community_sort_2018 = counts_by_year_2018_summary_table.sort_values(by='# of Outages in 2018', ascending=False)

# Display the Top 10 Neighborhoods With Most Street Light Outages in 2018
community_sort_2018.head(5)

## Question 2 - What address has the most street light outage reports? Which year had the most street light outage reports?

In [None]:
street_light_reports_pd

# Filtering for the column that I will be usings
srsl_df = street_light_reports_pd[['Service Request Number', 'Type of Service Request', 'Street Address', 'ZIP Code', 'Year', 'Month', 'Day']]
# Find the most service requests by year
most_by_year = srsl_df.groupby(["Year"])
most_by_year = most_by_year["Service Request Number"].count()
most_by_year = pd.DataFrame(most_by_year)
service_chart = most_by_year.plot(kind='bar', legend=0)
service_chart.set_ylabel("Number of Service Request")
service_chart.set_title("Total Number of Service Request Per Year")
plt.savefig("Charts/Total Number of Service Request Per Year")

In [None]:
# Using value_counts to solve for the address with the most Service Requests
# Finding the total of service requests by year
srsl_2014 = srsl_df.loc[(srsl_df['Year']==2014)]
srsl_gb_2014 = srsl_2014.groupby('Year')['Street Address'].value_counts()
srsl_gb_2014 = pd.DataFrame(srsl_gb_2014)
srsl_gb_2014 = srsl_gb_2014.rename(columns={'Street Address': 'Total'})
most_request_2014 = srsl_gb_2014.head(1)

# Solving for 2015
srsl_2015 = srsl_df.loc[(srsl_df['Year']==2015)]
srsl_gb_2015 = srsl_2015.groupby('Year')['Street Address'].value_counts()
srsl_gb_2015 = pd.DataFrame(srsl_gb_2015)
srsl_gb_2015 = srsl_gb_2015.rename(columns={'Street Address': 'Total'})
most_request_2015 = srsl_gb_2015.head(1)

# Solving for 2016
srsl_2016 = srsl_df.loc[(srsl_df['Year']==2016)]
srsl_gb_2016 = srsl_2016.groupby('Year')['Street Address'].value_counts()
srsl_gb_2016 = pd.DataFrame(srsl_gb_2016)
srsl_gb_2016 = srsl_gb_2016.rename(columns={'Street Address': 'Total'})
most_request_2016 = srsl_gb_2016.head(1)

# Solving for 2017
srsl_2017 = srsl_df.loc[(srsl_df['Year']==2017)]
srsl_gb_2017 = srsl_2017.groupby('Year')['Street Address'].value_counts()
srsl_gb_2017 = pd.DataFrame(srsl_gb_2017)
srsl_gb_2017 = srsl_gb_2017.rename(columns={'Street Address': 'Total'})
most_request_2017 = srsl_gb_2017.head(1)

# Solving for 2018
srsl_2018 = srsl_df.loc[(srsl_df['Year']==2018)]
srsl_gb_2018 = srsl_2018.groupby('Year')['Street Address'].value_counts()
srsl_gb_2018 = pd.DataFrame(srsl_gb_2018)
srsl_gb_2018 = srsl_gb_2018.rename(columns={'Street Address': 'Total'})
most_request_2018 = srsl_gb_2018.head(1)

# Appending all dataframe above
most_request_append = most_request_2014.append([most_request_2015, most_request_2016, most_request_2017, most_request_2018])

# Using a pivot_table so that I can create headings as the Street Addresses
most_request_pt = most_request_append.pivot_table(index='Year', columns='Street Address', values='Total')

# Creating a bar chart with different colors to precent Highest and lowest street outages
# Using np.arange(len(most_request_pt)) to count my x axes ticks
x_axis = np.arange(len(most_request_pt))

# Creating individual bar chart so that I can change colors
plt.barh(x_axis, most_request_pt['1000 N LAKE SHORE DR'], color='blue', label="1000 N LAKE SHORE DR", align="center")
plt.barh(x_axis, most_request_pt['12500 S PRINCETON AVE'], color='red', label="12500 S PRINCETON AVE",align="center")
plt.barh(x_axis, most_request_pt['13500 S INDIANA AVE'], color='black', label='13500 S INDIANA AVE', alpha=0.75, align="center")
plt.barh(x_axis, most_request_pt['3200 W AUGUSTA BLVD'], color='black', label='3200 W AUGUSTA BLVD', alpha=0.5, align="center")
plt.barh(x_axis, most_request_pt['3547 N ALBANY AVE'], color='black', label='3547 N ALBANY AVE', alpha=0.25, align="center")
tick_locations = [value for value in x_axis]
plt.gcf().set_size_inches(15, 5)
plt.yticks(tick_locations, [2014, 2015, 2016, 2017, 2018])
plt.legend(loc="best")
plt.title("Most Service Request Per Year")
plt.ylabel("By Year")
plt.xlabel("Number of Service Request")
plt.savefig("Charts/Most Service Request Per Year")

## Question 3 - Average time it takes for outages to get fixed per neighborhood?

In [None]:
## Join 2 tables to display community names.
service_requests_by_community = pd.merge(street_light_reports_pd, community_area_names_pd, left_on='Community Area', right_on='AREA_NUMBE', how='left')
## Drop extra column
service_requests_by_community = service_requests_by_community.drop(["Latitude","Longitude","Location","AREA_NUMBE","the_geom","PERIMETER","AREA","COMAREA_","COMAREA_ID","AREA_NUM_1","SHAPE_AREA","SHAPE_LEN"], axis=1)
## Calculate service requests resolution time after setting completion_date to current date if N/A
service_requests_by_community['Resolution Time(days)'] = (pd.to_datetime(service_requests_by_community['Completion Date'].fillna(pd.datetime.now().date())) - pd.to_datetime(service_requests_by_community['Creation Date'])).dt.days
service_requests_by_community.head()

In [None]:
## Filter out rows where community_area = 0 OR service_request_number is duplicate 
service_requests_by_community = service_requests_by_community.loc[(service_requests_by_community['Community Area']!=0)]
service_requests_by_community = service_requests_by_community.drop_duplicates(subset="Service Request Number")
service_requests_by_community.head()

In [None]:
service_requests_by_community_average_time = service_requests_by_community.groupby(["COMMUNITY","Year"])["Resolution Time(days)"].mean()
service_requests_by_community_average_time.sort_values(ascending=False)
service_requests_by_community_average_time = pd.DataFrame(service_requests_by_community_average_time).reset_index()
service_requests_by_community_average_time

In [None]:
# Filtered by year
service_requests_by_community_average_time_2014 = service_requests_by_community_average_time.loc[service_requests_by_community_average_time["Year"] == 2014].sort_values(by="Resolution Time(days)",ascending=False)
service_requests_by_community_average_time_2015 = service_requests_by_community_average_time.loc[service_requests_by_community_average_time["Year"] == 2015].sort_values(by="Resolution Time(days)",ascending=False)
service_requests_by_community_average_time_2016 = service_requests_by_community_average_time.loc[service_requests_by_community_average_time["Year"] == 2016].sort_values(by="Resolution Time(days)",ascending=False)
service_requests_by_community_average_time_2017 = service_requests_by_community_average_time.loc[service_requests_by_community_average_time["Year"] == 2017].sort_values(by="Resolution Time(days)",ascending=False)
service_requests_by_community_average_time_2018 = service_requests_by_community_average_time.loc[service_requests_by_community_average_time["Year"] == 2018].sort_values(by="Resolution Time(days)",ascending=False)

### Visuals for

In [None]:
average_time_chart_2014 = service_requests_by_community_average_time_2014.plot.bar(y='Resolution Time(days)', x='COMMUNITY', label="Outage Resolution Time(in days)",alpha=0.5, align="center", color="green", rot=90, figsize=(30,5), title="Average Time To Fix Street Lights Outages in 2014")
plt.savefig("Charts/average_time_chart_2014.png", bbox_inches = 'tight')
average_time_chart_2015 = service_requests_by_community_average_time_2015.plot.bar(y='Resolution Time(days)', x='COMMUNITY', label="Outage Resolution Time(in days)", alpha=0.5, align="center", color="blue", rot=90, figsize=(30,5), title="Average Time To Fix Street Lights Outages in 2015")
plt.savefig("Charts/average_time_chart_2015.png", bbox_inches = 'tight')
average_time_chart_2016 = service_requests_by_community_average_time_2016.plot.bar(y='Resolution Time(days)', x='COMMUNITY', label="Outage Resolution Time(in days)", alpha=0.5, align="center", color="red", rot=90, figsize=(30,5), title="Average Time To Fix Street Lights Outages in 2016")
plt.savefig("Charts/average_time_chart_2016.png", bbox_inches = 'tight')
average_time_chart_2017 = service_requests_by_community_average_time_2017.plot.bar(y='Resolution Time(days)', x='COMMUNITY', label="Outage Resolution Time(in days)", alpha=0.7, align="center", color="blue", rot=90, figsize=(30,5), title="Average Time To Fix Street Lights Outages in 2017")
plt.savefig("Charts/average_time_chart_2017.png", bbox_inches = 'tight')
average_time_chart_2018 = service_requests_by_community_average_time_2018.plot.bar(y='Resolution Time(days)', x='COMMUNITY', label="Outage Resolution Time(in days)", alpha=0.7, align="center", color="green", rot=90, figsize=(30,5), title="Average Time To Fix Street Lights Outages in 2018")
plt.savefig("Charts/average_time_chart_2018.png", bbox_inches = 'tight')

## Question 4 - What are the crime rates/reports in neighborhoods with street light outages?

CRIME ANALYTICS

In [None]:
#total number of records by Community Area
c_bycomm=crime_comm.groupby(['COMMUNITY'])['ID'].count().reset_index().sort_values(by='ID',ascending=False)
# c_bycomm=c_bycomm.sort_values(by='ID',ascending=False)

#total number of records by Year
c_byyear=crime_comm.groupby(['Year'])['ID'].count().reset_index().sort_values(by='ID',ascending=False)

#total number of records by year by Month
c_byyear_bymonth=crime_comm.groupby(['Year','Month'])['ID'].count().reset_index().sort_values(by='ID',ascending=False)

#total number of crimes by crime type
c_bytype=crime_comm.groupby(['Primary Type'])['ID'].count().reset_index().sort_values(by='ID',ascending=False)

#total number of crime reports by community area by year by month
c_bycomm_byyear=crime_comm.groupby(['COMMUNITY','Year'])['ID'].count().reset_index().sort_values(by='ID',ascending=False)

#total number of crime reports by community area by year by month
c_bycomm_byyear_bymonth=crime_comm.groupby(['COMMUNITY','Year','Month'])['ID'].count().reset_index().sort_values(by='ID',ascending=False)

c_bycomm_byType=crime_comm.groupby(['COMMUNITY','Primary Type'])['ID'].count().reset_index()
c_bycomm_byType=c_bycomm_byType.loc[c_bycomm_byType['COMMUNITY']=='AUSTIN']
c_bycomm_byType=c_bycomm_byType.sort_values(by='ID',ascending=False).head(5)

In [None]:
c_bycomm

In [None]:
c_byyear

In [None]:
c_byyear_bymonth.head()

In [None]:
c_bytype.head()

In [None]:
c_bycomm_byType.head(5)

311-STREET REQUEST ANALYTICS

In [None]:
#Total number of requests by community area
s_bycomm=street_comm.groupby(['COMMUNITY'])['Service Request Number'].count().reset_index()
s_bycomm.head()

#Total number of requests by year
s_byyear=street_comm.groupby(['Year'])['Service Request Number'].count().reset_index()

#Total number of requests by year by month
s_byyear_bymonth=street_comm.groupby(['Year','Month'])['Service Request Number'].count().reset_index()

#Total number of requests by community area by year
s_bycomm_byyear=street_comm.groupby(['COMMUNITY','Year'])['Service Request Number'].count().reset_index()

MERGING CRIME & 311 - STREET LIGHT OUTAGES & POPULATION DATA

In [None]:
bycomm = pd.merge(c_bycomm, s_bycomm, on="COMMUNITY", how="left")
bycomm_pop = pd.merge(bycomm, pop, left_on="COMMUNITY", right_on='Community', how="left")


CLEANING & CREATING A NEW COLUMN IN THE MERGED DATA

In [None]:
bycomm_pop['Total Population']=pd.to_numeric(bycomm_pop['Total Population'].astype(str).str.replace(',',''), errors='coerce')
bycomm_pop['No. of Service Requests to Crime Reports']=bycomm_pop['ID']/bycomm_pop['Service Request Number']


MERGED DATA ANALYTICS

In [None]:
#sorting by the ratio of Service Requests to Crime reports
bycomm_pop=bycomm_pop.sort_values(['No. of Service Requests to Crime Reports'],ascending=False)

byyear = pd.merge(c_byyear, s_byyear, on="Year", how="left")

byyearbymonth = pd.merge(c_byyear_bymonth, s_byyear_bymonth, on=["Year","Month"], how="left")

bycommbyyear = pd.merge(c_bycomm_byyear, s_bycomm_byyear, on=["COMMUNITY","Year"], how="left")
bycommbyyear = pd.merge(bycommbyyear, comm, on=["COMMUNITY"], how="left")

In [None]:
bycomm_pop
# Notes:
# Forest Glen (1:3.73)
# Austin (1:31)
# Loop (1:57)

LET'S MAKE SOME VISUALS


In [None]:
#community areas by crime by street

x=bycomm_pop['ID']
y=bycomm_pop['Service Request Number']
z=bycomm_pop['Community']
w=bycomm_pop['No. of Service Requests to Crime Reports']

fig, ax = plt.subplots(figsize=(20,10))
ax.scatter(x,y,s=bycomm_pop['Total Population']*.003,c='purple',alpha=0.6)

#Street light request data
y_max=y.max()
y_min=y.min()

i=bycomm_pop.loc[y==y_max, 'ID'].iloc[0]
j=bycomm_pop.loc[y==y_max, 'Service Request Number'].iloc[0]
ax.annotate(bycomm_pop.loc[y==y_max, 'Community'].iloc[0] + " (Largest Service Request)", xy=(i+1000, j))

k=bycomm_pop.loc[y==y_min, 'ID'].iloc[0]
m=bycomm_pop.loc[y==y_min, 'Service Request Number'].iloc[0]
ax.annotate(bycomm_pop.loc[y==y_min, 'Community'].iloc[0] + " (Smallest Service Request)", xy=(k, m))

#Crime numbers
x_max=x.max()
x_min=x.min()

n=bycomm_pop.loc[x==x_max, 'ID'].iloc[0]
p=bycomm_pop.loc[x==x_max, 'Service Request Number'].iloc[0]
ax.annotate(bycomm_pop.loc[x==x_max, 'Community'].iloc[0] + " (Largest Crime)", xy=(n-1000, p-75))

q=bycomm_pop.loc[x==x_min, 'ID'].iloc[0]
r=bycomm_pop.loc[x==x_min, 'Service Request Number'].iloc[0]
ax.annotate(bycomm_pop.loc[x==x_min, 'Community'].iloc[0] + " (Smallest Crime)", xy=(q, r))

#Service Request to Crime ratio
w_max=w.max()
w_min=w.min()

t=bycomm_pop.loc[w==w_max, 'ID'].iloc[0]
u=bycomm_pop.loc[w==w_max, 'Service Request Number'].iloc[0]
ax.annotate(bycomm_pop.loc[w==w_max, 'Community'].iloc[0] + " (Largest Service Request to Crime Report Ratio)", xy=(t, u))

a=bycomm_pop.loc[w==w_min, 'ID'].iloc[0]
b=bycomm_pop.loc[w==w_min, 'Service Request Number'].iloc[0]
print(a)
print(b)
ax.annotate(bycomm_pop.loc[w==w_min, 'Community'].iloc[0] + " (Smallest  Service Request to Crime Report Ratio)", xy=(a, b))

In [None]:
#https://www.kaggle.com/threadid/geopandas-mapping-chicago-crimes 

street_map= gpd.read_file(r'C:\Users\dharti.patel\Desktop\NU_DS_Project1\Street-Lights\Street-Lights\geo_export_f11eb402-3887-43c2-a681-95fde0bf6fbf.shp')

# fig,ax=plt.subplots(figsize=(15,15))
# street_map.plot(ax=ax)

In [None]:
#Street Lights

crs={'init':'epsg:4326'}
street_comm.dtypes
street_comm.head()

geometry= [Point(xy) for xy in zip(street_comm["Longitude"],street_comm["Latitude"])]
geometry[:3]


geo_df=gpd.GeoDataFrame(street_comm,
                       crs=crs,
                       geometry=geometry)
geo_df.head()

# fig.ax=plt.subplots(figsize=(15,15))
# street_map.plot(ax=ax,alpha=0.4,color="grey")
# geo_df[geo_df[geometry]==0].plot(ax=ax,markersize=20,color="blue",marker="o",label="Neg")
# # geo_df[geo_df['WnvPresent']==0].plot(ax=ax,markersize=20,color="red",marker="^",label="Pos")
# # plt.legend(plot={'size':15})

street_loc_df = geo_df.dropna(inplace=False)  # Remove all nan entries. 
street_loc_df = street_loc_df.drop(street_loc_df[(street_loc_df.Latitude < 41.0)].index) #Remove bad values in Long/Lat 
street_loc_df['AREA_NUMBE'] = street_loc_df['AREA_NUMBE'].astype(int)
street_geometry = [Point(xy) for xy in zip(street_loc_df.Longitude, street_loc_df.Latitude)]
# street_geometry = geo_df['geometry']

street_crs = {'type': 'EPSG', 'properties': {'code': 102671}}
street_points = GeoDataFrame(street_loc_df, crs=street_crs, geometry=street_geometry)
street_points.head(5)

# street_map = geo_df.plot(figsize=(25,25), edgecolor='k', facecolor='b', alpha=0.25, linewidth=2) 

#Street lights data
# street_points = street_points.plot(figsize=(10,10), markersize=5) 
# street_points.set_axis_off()

# chistreet_map = street_map.plot(figsize=(25,25), edgecolor='#7f7f7f', cmap='nipy_spectral', alpha=0.5, linewidth=2) 
# street_map.apply(lambda x: chistreet_map.annotate(s=x.community, xy=x.geometry.centroid.coords[0], ha='center', size=16),axis=1);
# chistreet_map.set_axis_off()

chistreet_map = street_map.plot(figsize=(15,15), edgecolor='#7f7f7f', facecolor='#1f77b4', alpha=0.25, linewidth=2) 
street_map.apply(lambda x: chistreet_map.annotate(s=x.community, xy=x.geometry.centroid.coords[0], ha='center', size=6),axis=1);
street_points.plot(figsize=(25,25),ax=chistreet_map, markersize=1, color='y', alpha=0.25)
chistreet_map.set_axis_off()

#yearly streetlight visual

# for i in (2014,2015,2016,2017,2018):
#     street_vc_points = street_points.loc[street_points['Year'].isin([i])]

#     street_vc_points_map = street_vc_points.plot(figsize=(5,5), markersize=1, c='blue', alpha=0.2) 
#     street_vc_points_map.set_axis_off()
#     street_vc_points_map.set(title=i)
    
#yearly streetlight visual

street_vc_points = street_points.loc[street_points['Year'].isin(['2014'])]

street_vc_points_map = street_vc_points.plot(figsize=(10,10), markersize=1, c='red', alpha=0.2) 
street_vc_points_map.set_axis_off()
street_vc_points_map.set(title='Street Light Outages 2014')

street_vc_points = street_points.loc[street_points['Year'].isin(['2015'])]

street_vc_points_map = street_vc_points.plot(figsize=(10,10), markersize=1, c='blue', alpha=0.2) 
street_vc_points_map.set_axis_off()
street_vc_points_map.set(title='Street Light Outages 2015')

street_vc_points = street_points.loc[street_points['Year'].isin(['2016'])]

street_vc_points_map = street_vc_points.plot(figsize=(10,10), markersize=1, c='purple', alpha=0.2) 
street_vc_points_map.set_axis_off()
street_vc_points_map.set(title='Street Light Outages 2016')

street_vc_points = street_points.loc[street_points['Year'].isin(['2017'])]

street_vc_points_map = street_vc_points.plot(figsize=(10,10), markersize=1, c='green', alpha=0.2) 
street_vc_points_map.set_axis_off()
street_vc_points_map.set(title='Street Light Outages 2017')

street_vc_points = street_points.loc[street_points['Year'].isin(['2018'])]

street_vc_points_map = street_vc_points.plot(figsize=(10,10), markersize=1, c='orange', alpha=0.2) 
street_vc_points_map.set_axis_off()
street_vc_points_map.set(title='Street Light Outages 2018')



In [None]:
# Crime maps

crime_comm['Primary Type'].unique()

crime_comm_type=crime_comm.loc[crime_comm['Primary Type']=='BATTERY']




crs={'init':'epsg:4326'}
crime_comm_type.dtypes
crime_comm_type.head()

geometry= [Point(xy) for xy in zip(crime_comm_type["Longitude"],crime_comm_type["Latitude"])]
geometry[:3]


geo_df=gpd.GeoDataFrame(crime_comm_type,
                       crs=crs,
                       geometry=geometry)
geo_df.head()

# fig.ax=plt.subplots(figsize=(15,15))
# crime_map.plot(ax=ax,alpha=0.4,color="grey")
# geo_df[geo_df[geometry]==0].plot(ax=ax,markersize=20,color="blue",marker="o",label="Neg")
# # geo_df[geo_df['WnvPresent']==0].plot(ax=ax,markersize=20,color="red",marker="^",label="Pos")
# # plt.legend(plot={'size':15})

crime_loc_df = geo_df.dropna(inplace=False)  # Remove all nan entries. 
crime_loc_df = crime_loc_df.drop(crime_loc_df[(crime_loc_df.Latitude < 41.0)].index) #Remove bad values in Long/Lat 
crime_loc_df['AREA_NUMBE'] = crime_loc_df['AREA_NUMBE'].astype(int)
crime_geometry = [Point(xy) for xy in zip(crime_loc_df.Longitude, crime_loc_df.Latitude)]
# crime_geometry = geo_df['geometry']

crime_crs = {'type': 'EPSG', 'properties': {'code': 102671}}
crime_points = GeoDataFrame(crime_loc_df, crs=crime_crs, geometry=crime_geometry)
crime_points.head(5)

# crime_map = geo_df.plot(figsize=(25,25), edgecolor='k', facecolor='b', alpha=0.25, linewidth=2) 

#crime lights data
# crime_points = crime_points.plot(figsize=(10,10), markersize=5) 
# crime_points.set_axis_off()

# chicrime_map = crime_map.plot(figsize=(25,25), edgecolor='#7f7f7f', cmap='nipy_spectral', alpha=0.5, linewidth=2) 
# crime_map.apply(lambda x: chicrime_map.annotate(s=x.community, xy=x.geometry.centroid.coords[0], ha='center', size=16),axis=1);
# chicrime_map.set_axis_off()

chicrime_map = street_map.plot(figsize=(15,15), edgecolor='#7f7f7f', facecolor='#1f77b4', alpha=0.25, linewidth=2) 
street_map.apply(lambda x: chicrime_map.annotate(s=x.community, xy=x.geometry.centroid.coords[0], ha='center', size=6),axis=1);
crime_points.plot(figsize=(25,25),ax=chicrime_map, markersize=1, color='r', alpha=0.25)
chicrime_map.set_axis_off()

#yearly streetlight visual

crime_vc_points = crime_points.loc[crime_points['Year'].isin(['2014'])]

crime_vc_points_map = crime_vc_points.plot(figsize=(10,10), markersize=1, c='red', alpha=0.2) 
crime_vc_points_map.set_axis_off()
crime_vc_points_map.set(title='Crime: Battery (2014)')

crime_vc_points = crime_points.loc[crime_points['Year'].isin(['2015'])]

crime_vc_points_map = crime_vc_points.plot(figsize=(10,10), markersize=1, c='blue', alpha=0.2) 
crime_vc_points_map.set_axis_off()
crime_vc_points_map.set(title='Crime: Battery (2015)')

crime_vc_points = crime_points.loc[crime_points['Year'].isin(['2016'])]

crime_vc_points_map = crime_vc_points.plot(figsize=(10,10), markersize=1, c='purple', alpha=0.2) 
crime_vc_points_map.set_axis_off()
crime_vc_points_map.set(title='Crime: Battery (2016)')

crime_vc_points = crime_points.loc[crime_points['Year'].isin(['2017'])]

crime_vc_points_map = crime_vc_points.plot(figsize=(10,10), markersize=1, c='green', alpha=0.2) 
crime_vc_points_map.set_axis_off()
crime_vc_points_map.set(title='Crime: Battery (2017)')

crime_vc_points = crime_points.loc[crime_points['Year'].isin(['2018'])]

crime_vc_points_map = crime_vc_points.plot(figsize=(10,10), markersize=1, c='orange', alpha=0.2) 
crime_vc_points_map.set_axis_off()
crime_vc_points_map.set(title='Crime: Battery (2018)')