## Fire Department Calls  in San Francisco


In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
from IPython.display import display, HTML
import datetime
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS 


In [2]:
pd.options.display.float_format = '{0:,.2f}'.format
pd.set_option('display.max_columns', 500)

# File to Load 
#fire_data_to_load = "Fire_Department_Calls_for_Service.csv"
fire_data_to_load = "Resources/Fire_Department_Calls_for_Service.csv"

fields = ["Incident Number","Call Date","Call Type", "Call Type Group","Address","Supervisor District","Location"]


fire_data = pd.read_csv(fire_data_to_load,usecols=fields, dtype={
 "Incident Number" : object,
 "Call Date" : object,
 "Call Type" : object,
 "Call Type Group" : object,
 "Address" : object,
 "Supervisor District" : object,
 "Location" : object}
 )


In [None]:
# View the data
#fire_data.head()

In [3]:
fire_data['Year'] = pd.DatetimeIndex(fire_data['Call Date']).year
#fire_data.dtypes

In [5]:
fire_data_year_selected = fire_data.loc[(fire_data["Year"] >= 2006) & (fire_data["Year"] <= 2018)]
#fire_data_year

In [6]:
#Analyze data - Get all possible values for Use Definition
call_type_list = fire_data_year_selected["Call Type Group"].unique()
call_type_list

array(['Potentially Life-Threatening', 'Fire', 'Alarm',
       'Non Life-threatening', nan], dtype=object)

In [7]:
fire_data_noDups = fire_data_year_selected.drop_duplicates(["Incident Number"])

In [8]:
fire_data_noDups.head()

Unnamed: 0,Incident Number,Call Type,Call Date,Address,Call Type Group,Supervisor District,Location,Year
149011,18153403,Medical Incident,12/31/2018,0 Block of 6TH ST,Potentially Life-Threatening,6,"(37.78114586126, -122.409026046516)",2018
149013,18153402,Outside Fire,12/31/2018,HAIGHT ST/ASHBURY ST,Fire,5,"(37.770007605724, -122.446933818288)",2018
149014,18153401,Medical Incident,12/31/2018,400 Block of CARL ST,Potentially Life-Threatening,5,"(37.76467688189, -122.45777819403)",2018
149017,18153400,Alarms,12/31/2018,800 Block of CLAYTON ST,Alarm,5,"(37.766548172887, -122.447850161714)",2018
149020,18153399,Medical Incident,12/31/2018,SUTTER ST/POLK ST,Non Life-threatening,3,"(37.787756652528, -122.420116076828)",2018


In [None]:

#fire_data_noDups2.dtypes
#WORD CLOUD for Call Type#

In [None]:
supervisor_districts = {
1  : "1-Richmond, Golden Gate Park",
2  : "2-Marina, Pacific Heights",
3  : "3-North Beach, Chinatown, Financial District",
4  : "4-Sunset",
5  : "5-Haight, Fillmore, Hayes Valley",
6  : "6-Union Square, Tenderloin, Civic Center, SOMA",
7  : "7-West Portal, St. Francis Wood, Stonestown",
8  : "8-The Castro, Noe Valley, Twin Peaks",
9  : "9-Mission District, Bernal Heights",
10 : "10-Potrero Hill, Bayview-Hunters Point",
11 : "11-Excelsior,  Ingleside, Outer Mission",
}

In [9]:
#CALLS TYPE OVER TIME
fire_data_calltype_solo= fire_data_noDups.pivot_table(index="Call Type", values='Incident Number', aggfunc=np.size)
fire_data_calltype_solo2 = pd.DataFrame(fire_data_calltype_solo)
#fire_data_calltype_solo2.head()
#fire_data_calltype_solo['Incident Number']

In [None]:
#Cloud Bubble
d = {}
#for series its. items() and for dataframe its iterrows()
for a, x in fire_data_calltype_solo['Incident Number'].items():
    d[a] = x
    
plt.figure( figsize=(20,10) )

wordcloud = WordCloud(background_color='white') 
wordcloud.generate_from_frequencies(frequencies=d)
plt.imshow(wordcloud, interpolation="bilinear")
#plt.figure(figsize=(16,16), facecolor='k')
#plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
#plt.tight_layout(pad=0)
plt.show()

In [11]:
# CALL TYPE Group OVER TIME
fire_data_calltype= fire_data_noDups.pivot_table(index="Year", columns="Call Type Group", values='Incident Number', aggfunc=np.size)
fire_data_calltype.head(200)

Call Type Group,Alarm,Fire,Non Life-threatening,Potentially Life-Threatening
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,,,,1.0
2011,,,2.0,
2012,24880.0,4023.0,36522.0,55218.0
2013,25016.0,3070.0,38112.0,55560.0
2014,25793.0,2956.0,38886.0,57048.0
2015,25690.0,3206.0,43771.0,62726.0
2016,26061.0,3111.0,48475.0,64218.0
2017,25812.0,3351.0,52110.0,67038.0
2018,25744.0,3534.0,53747.0,66067.0


In [None]:
# #PLOT OF Calls OVER TIME
# plt = fire_data_calltype.plot()

# plt.show()

In [None]:
ax = fire_data_calltype.plot.bar(stacked=True, figsize=(12,8), legend = False)
plt.legend((fire_data_calltype.columns.tolist()),  bbox_to_anchor=(1.02, 1.0))

# Complete formatting the plot
plt.title("Fire Department Call Types 20012-2018")
plt.xlabel("Year")
plt.ylabel("Number of Calls")
plt.grid(linestyle='-', color = 'gray')

ax.get_yaxis().set_major_formatter(
  plt.FuncFormatter(lambda x, p: format(int(x), ',')))

plt.show()

In [None]:
# DISTRICT OVER TIME
fire_data_districtovertime= fire_data_noDups2.pivot_table(index="Year", columns="Supervisor District", values='Incident Number', aggfunc=np.size)
fire_data_districtovertime.head()

In [None]:
plt = fire_data_districtovertime.plot()
plt.legend(list(supervisor_districts.values()), bbox_to_anchor=(1.2, 1.0))
plt.title("Calls to Fire Department 2006-2018")

plt.show()

In [None]:
ax = fire_data_districtovertime.plot.bar(stacked=True, figsize=(12,8), legend = False)
#plt.legend((fire_data_districtovertime.columns.tolist()),  bbox_to_anchor=(1.05, 1.0))
plt.legend(list(supervisor_districts.values()), bbox_to_anchor=(1.05, 1.0))

# Complete formatting the plot
plt.title('District Calls to the Fire Department 2012-2018')
plt.xlabel("Year")
plt.ylabel("Number of Calls")
plt.grid(linestyle='-', color = 'gray')

ax.get_yaxis().set_major_formatter(
  plt.FuncFormatter(lambda x, p: format(int(x), ',')))

plt.show()

In [None]:
count = fire_data_noDups2.groupby(["Call Type Group"]).count()
count
#count_df = pd.DataFrame({"Call Type Group": "Call Number"})
#count_df.head()
#plt.pie(count["Call Number"].count)  
#plt.show()

In [None]:
# #Data Frame by Year, District, Call Type, Number of Incidents
# District_Count = pd.DataFrame({"Number of Incidents": count["Incident Number"]})
# District_Count


In [None]:
# #Taking no dups tavle into a pivot --> essentially the same as the District Count DF
# #Pivot fire_data_year (incident numbe) dataframe by Supervisor District
# # fire_data_clean_grouped_pivot = fire_data_noDups.pivot(index="Year", columns=["Supervisor District"])
# fire_data_clean_grouped_pivot = fire_data_noDups2.pivot_table(index="Year", columns=["Supervisor District", "Call Type"], values='Incident Number', aggfunc=np.size)

# fire_data_clean_grouped_pivot.head()

In [None]:
# #Pivot fire_data_year (incident numbe) dataframe by Supervisor District
# # fire_data_clean_grouped_pivot = fire_data_noDups.pivot(index="Year", columns=["Supervisor District"])
# fire_data_clean_grouped_pivot = fire_data_noDups2.pivot_table(index="Year", columns=["Supervisor District", "Call Type"], values='Incident Number', aggfunc=np.size)

# fire_data_clean_grouped_pivot.head()

In [None]:
# Function to parse the coordinates into a tuple 
def to_gis (x, y):
    yy = ()
    yy = yy + (x,)
    yy = yy + (y,)
    return tuple(yy)


def lat(x):
    try:
        xx = x.replace("(","")
        xx = xx.replace(")","")
        xx = xx.replace(" ","")
        yy = xx.split(",")
        zz = round(float(yy[0]),3)
        return zz
    except:
        print (zz)
        return 0

def lon(x):
    try:
        xx = x.replace("(","")
        xx = xx.replace(")","")
        xx = xx.replace(" ","")
        yy = xx.split(",")
        zz = round(float(yy[1]),3)
        return zz
    except:
        print (zz)
        return 0

fire = fire_data["Incident Number"].loc[(fire_data["Year"] == 2018) & (fire_data["Supervisor District"] == "1")]
fire["latx"] = fire_data["Location"].map(lat)
fire["lonx"] = fire_data["Location"].map(lon)

data_slice_group=fire.groupby(["latx","lonx"]).agg(
    {"Incident Number" : "count"
    })
data_slice_group.head()

In [9]:
data_slice_group.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Incident Number
latx,lonx,Unnamed: 2_level_1
37.79,-122.47,8
37.79,-122.47,14
37.79,-122.47,8
37.79,-122.47,2
37.79,-122.47,11


In [11]:
# Put coordinates of these properties into a list (a list of tuples)
property_list = []
for i in range(len(data_slice_group)):
    property_list.append(to_gis(data_slice_group.iloc[i]["latx"],data_slice_group.iloc[i]["lonx"]))

KeyError: 'latx'

In [8]:
print (property_list)

[]


In [50]:
geo_slice_group.head()

NameError: name 'geo_slice_group' is not defined

In [45]:
data_slice_group.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Incident Number
lat,lon,Unnamed: 2_level_1
37.76,-122.51,1
37.76,-122.51,2
37.76,-122.5,4
37.76,-122.5,3
37.76,-122.5,3
