In [1]:
import os 
from dotenv import load_dotenv, find_dotenv
import psycopg2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib inline

In this notebook, I explore the sample fire dispatch data provided to HACK OR Emergency Response Project Team. In particular, we're after the proportion of calls that are of high or low priority, both city wide and by Fire Management Area (FMA).

In [2]:
# walk root diretory to find and load .env file w/ AWS host, username and password
load_dotenv(find_dotenv())

True

In [3]:
# connect to postgres
def pgconnect():
    try:
        conn = psycopg2.connect(database=os.environ.get("erdatabase"), user=os.environ.get("eruser"), 
                            password = os.environ.get("erpassword"), 
                            host=os.environ.get("erhost"), port=os.environ.get("erport"))
        print("Opened database successfully")
        return conn
    
    except psycopg2.Error as e:
        print("I am unable to connect to the database")
        print(e)
        print(e.pgcode)
        print(e.pgerror)
        print(traceback.format_exc())
        return None

In [4]:
def pquery(QUERY):
    '''
    takes SQL query string, opens a cursor, and executes query in psql
    '''
    conn = pgconnect()
    
    if conn is None:
        return None
    
    cur = conn.cursor()
    
    try:
        print("SQL QUERY = "+QUERY)
        cur.execute("SET statement_timeout = 0")
        cur.execute(QUERY)
        # Extract the column names and insert them in header
        col_names = []
        for elt in cur.description:
            col_names.append(elt[0])    
    
        D = cur.fetchall() #convert query result to list
        # Create the dataframe, passing in the list of col_names extracted from the description
        return pd.DataFrame(D, columns=col_names)

    except Exception as e:
        print(e.pgerror)
        
    finally:
        conn.close()


In [5]:
# we first get the relevant columns from the incident table.
QUERY1='''
SELECT fmarespcomp fma
    ,alarmlevel_id - 1 alarmlevel
    ,incident_id
    ,incsitfoundprm_id incsitfound_id
FROM incident;
'''

In [6]:
df1 = pquery(QUERY1)

Opened database successfully
SQL QUERY = 
SELECT fmarespcomp fma
    ,alarmlevel_id - 1 alarmlevel
    ,incident_id
    ,incsitfoundprm_id incsitfound_id
FROM incident;



In [7]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545059 entries, 0 to 545058
Data columns (total 4 columns):
fma               521789 non-null object
alarmlevel        545057 non-null float64
incident_id       545059 non-null int64
incsitfound_id    512281 non-null float64
dtypes: float64(2), int64(1), object(1)
memory usage: 16.6+ MB


In [8]:
df1.head(5)

Unnamed: 0,fma,alarmlevel,incident_id,incsitfound_id
0,30,1.0,1089329,542.0
1,26,1.0,1089330,326.0
2,8,1.0,1089331,481.0
3,13,1.0,1089332,111.0
4,1,1.0,1089333,326.0


In [9]:
# It would also be nice to get all the descriptions of the incidents found so that we can cut along those lines.
QUERY2='''
select a.incsitfound_id incsitfound_id
    ,a.description incsitfound_desc
    ,b.description incsitfoundsub_desc
    ,c.description incsitfoundclass_desc
from incsitfound as a
left join incsitfoundsub as b
    on a.incsitfoundsub_id = b.incsitfoundsub_id
left join incsitfoundclass as c
    on b.incsitfoundclass_id = c.incsitfoundclass_id
'''

In [10]:
df2 = pquery(QUERY2)

Opened database successfully
SQL QUERY = 
select a.incsitfound_id incsitfound_id
    ,a.description incsitfound_desc
    ,b.description incsitfoundsub_desc
    ,c.description incsitfoundclass_desc
from incsitfound as a
left join incsitfoundsub as b
    on a.incsitfoundsub_id = b.incsitfoundsub_id
left join incsitfoundclass as c
    on b.incsitfoundclass_id = c.incsitfoundclass_id



In [11]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 224 entries, 0 to 223
Data columns (total 4 columns):
incsitfound_id           224 non-null int64
incsitfound_desc         224 non-null object
incsitfoundsub_desc      224 non-null object
incsitfoundclass_desc    224 non-null object
dtypes: int64(1), object(3)
memory usage: 7.1+ KB


In [12]:
df2.head(5)

Unnamed: 0,incsitfound_id,incsitfound_desc,incsitfoundsub_desc,incsitfoundclass_desc
0,100,"FIRE/EXPLOSION, OTHER ...","FIRE/EXPLOSION, OTHER ...",FIRE/EXPLOSION
1,110,"STRUCTURE FIRE, OTHER ...",STRUCTURE FIRE ...,FIRE/EXPLOSION
2,111,BUILDING FIRE ...,STRUCTURE FIRE ...,FIRE/EXPLOSION
3,112,"STRUCTURE FIRE OTHER THAN IN BUILDING (PIERS, ...",STRUCTURE FIRE ...,FIRE/EXPLOSION
4,113,"COOKING FIRE, CONFINED TO CONTAINER ...",STRUCTURE FIRE ...,FIRE/EXPLOSION


In [13]:
# merge the situation descriptions into the incident df
df = pd.merge(df1, df2, how='left')[['fma','alarmlevel','incsitfoundclass_desc','incident_id']]

In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 545059 entries, 0 to 545058
Data columns (total 4 columns):
fma                      521789 non-null object
alarmlevel               545057 non-null float64
incsitfoundclass_desc    512281 non-null object
incident_id              545059 non-null int64
dtypes: float64(1), int64(1), object(2)
memory usage: 20.8+ MB


In [15]:
df.head(5)

Unnamed: 0,fma,alarmlevel,incsitfoundclass_desc,incident_id
0,30,1.0,SERVICE CALL,1089329
1,26,1.0,MEDICAL AID / RESCUE CALLS,1089330
2,8,1.0,HAZARDOUS CONDITIONS,1089331
3,13,1.0,FIRE/EXPLOSION,1089332
4,1,1.0,MEDICAL AID / RESCUE CALLS,1089333


In [16]:
df = df.set_index(['fma','incsitfoundclass_desc','alarmlevel'])
df.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,incident_id
fma,incsitfoundclass_desc,alarmlevel,Unnamed: 3_level_1
30,SERVICE CALL,1.0,1089329
26,MEDICAL AID / RESCUE CALLS,1.0,1089330
8,HAZARDOUS CONDITIONS,1.0,1089331
13,FIRE/EXPLOSION,1.0,1089332
1,MEDICAL AID / RESCUE CALLS,1.0,1089333


In [17]:
df.groupby(level=2).count()

Unnamed: 0_level_0,incident_id
alarmlevel,Unnamed: 1_level_1
0.0,1
1.0,544482
2.0,548
3.0,20
4.0,3
5.0,2
9.0,1


In [18]:
df.groupby(level=[0,2]).count().head(14)

Unnamed: 0_level_0,Unnamed: 1_level_0,incident_id
fma,alarmlevel,Unnamed: 2_level_1
0,1.0,3
1,1.0,43097
1,2.0,40
1,3.0,2
2,0.0,1
2,1.0,12779
2,2.0,23
2,3.0,1
3,1.0,30715
3,2.0,7


In [19]:
df.groupby(level=[1,2]).count()#.reset_index(1).pivot(columns='alarmlevel').plot.barh()

Unnamed: 0_level_0,Unnamed: 1_level_0,incident_id
incsitfoundclass_desc,alarmlevel,Unnamed: 2_level_1
FALSE CALLS,1.0,39174
FALSE CALLS,2.0,35
FALSE CALLS,3.0,1
FIRE/EXPLOSION,1.0,16115
FIRE/EXPLOSION,2.0,104
FIRE/EXPLOSION,3.0,8
FIRE/EXPLOSION,4.0,2
FIRE/EXPLOSION,5.0,2
GOOD INTENT CALLS,1.0,24832
GOOD INTENT CALLS,2.0,52
