In [11]:
import os 
import psycopg2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib inline

In [12]:
##Load variables 
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv('fire_var.env'))

True

In [13]:
# connect to postgres

try:
    conn = psycopg2.connect(database=os.environ.get("erdatabase"), user=os.environ.get("eruser"), 
                            password = os.environ.get("erpassword"), 
                            host=os.environ.get("erhost"), 
                            port=os.environ.get('port'))
    
except psycopg2.Error as e:
    print("I am unable to connect to the database")
    print(e)
    print(e.pgcode)
    print(e.pgerror)
    print(traceback.format_exc())

In [14]:
def pgquery(QUERY): 
    '''
    takes SQL query string, opens a cursor, and executes query in psql
    '''
    
    cur = conn.cursor()
    
    try:
        print("SQL QUERY = "+QUERY)
        cur.execute("SET statement_timeout = 0")
        cur.execute(QUERY)
        # Extract the column names and insert them in header
        col_names = []
        for elt in cur.description:
            col_names.append(elt[0])    
    
        D = cur.fetchall() #convert query result to list
        #pprint(D)
        #conn.close()
        # Create the dataframe, passing in the list of col_names extracted from the description
        return pd.DataFrame(D, columns=col_names)

    except Exception as e:
        print(e.pgerror)



In [20]:
##Pull data from the EC2 instance, extract inctime, station, and agency info
def getTimeDataset1(timedesc_dict):
    RESPONSE_TIME_QUERY='''
                        
                        SELECT  I.incident_id, R.responderunit_id,
                                T.timedesc_id, S.description as Station, A.description as Agency,
                                T.realtime
                        FROM incident as I
                        INNER JOIN inctimes as T
                                ON I.incident_id = T.incident_id
                        INNER JOIN responder as R
                                ON ( I.incident_id = R.incident_id AND T.responder_id = R.responder_id)
                        INNER JOIN responderunit as RU
                                ON R.responderunit_id = RU.responderunit_id
                        LEFT JOIN station as S
                                ON RU.station_id= S.station_id
                        LEFT JOIN agency as A
                                ON RU.agency_id = A.agency_id
                        WHERE T.timedesc_id IN ??TIMEDESC_IDS??
                                AND T.responder_id IS NOT NULL
                                AND I.incident_id = 1281359;
                        '''
    
    # add the timedesc_ids that we want to the query
    timedesc_ids = str(tuple(timedesc_dict.keys())).replace("'", "")
    RESPONSE_TIME_QUERY = RESPONSE_TIME_QUERY.replace("??TIMEDESC_IDS??", str(timedesc_ids))
    
    # execute the query: **this takes a pretty long time**
    df = pgquery(RESPONSE_TIME_QUERY)
    
   
    return df

In [21]:
d = {3: 'Dispatched', 5 : 'On Scene', 9: 'Depart Scene', 12: 'Clear'}
table = getTimeDataset1(d)

SQL QUERY = 
                        
                        SELECT  I.incident_id, R.responderunit_id,
                                T.timedesc_id, S.description as Station, A.description as Agency,
                                T.realtime
                        FROM incident as I
                        INNER JOIN inctimes as T
                                ON I.incident_id = T.incident_id
                        INNER JOIN responder as R
                                ON ( I.incident_id = R.incident_id AND T.responder_id = R.responder_id)
                        INNER JOIN responderunit as RU
                                ON R.responderunit_id = RU.responderunit_id
                        LEFT JOIN station as S
                                ON RU.station_id= S.station_id
                        LEFT JOIN agency as A
                                ON RU.agency_id = A.agency_id
                        WHERE T.timedesc_id IN (9, 3, 12, 5)
                                A

In [22]:
table.head(30)
#len(table)

Unnamed: 0,incident_id,responderunit_id,timedesc_id,station,agency,realtime
0,1281359,89,3,INVESTIGATOR/INSPECTOR,,2016-10-19 10:05:14
1,1281359,89,3,INVESTIGATOR/INSPECTOR,,2016-10-20 06:28:34
2,1281359,89,5,INVESTIGATOR/INSPECTOR,,2016-10-19 12:56:38
3,1281359,89,5,INVESTIGATOR/INSPECTOR,,2016-10-20 06:28:34
4,1281359,89,12,INVESTIGATOR/INSPECTOR,,2016-10-19 22:07:44
5,1281359,89,12,INVESTIGATOR/INSPECTOR,,2016-10-20 19:39:08
6,1281359,93,3,INVESTIGATOR/INSPECTOR,,2016-10-19 09:48:54
7,1281359,93,3,INVESTIGATOR/INSPECTOR,,2016-10-20 06:27:43
8,1281359,93,5,INVESTIGATOR/INSPECTOR,,2016-10-19 09:48:57
9,1281359,93,5,INVESTIGATOR/INSPECTOR,,2016-10-20 06:27:43


In [23]:
###Filter out the responders from other cities
###There are still some non-integer station description codes
table_s = table.copy()
table_s.dropna(subset=['station'],inplace=True)
stationTable = table_s.pivot_table(index=['incident_id', 'responderunit_id','station'],\
                columns='timedesc_id', values='realtime', aggfunc='first')
stationTable.rename(columns=d, inplace=True)
print(len(stationTable))
stationTable

52


Unnamed: 0_level_0,Unnamed: 1_level_0,timedesc_id,Dispatched,On Scene,Clear
incident_id,responderunit_id,station,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1281359,7,C01,2016-10-19 09:18:00,2016-10-19 09:20:51,2016-10-19 15:06:28
1281359,8,C102,2016-10-19 09:41:27,2016-10-19 09:52:31,2016-10-19 18:48:42
1281359,9,C103,2016-10-19 09:39:33,2016-10-19 09:56:52,2016-10-19 19:41:16
1281359,12,C02,2016-10-19 09:39:33,2016-10-19 09:59:58,2016-10-19 17:36:52
1281359,13,C03,2016-10-19 09:41:27,2016-10-19 10:00:30,2016-10-19 21:04:28
1281359,15,C04,2016-10-19 09:08:12,2016-10-19 09:20:27,2016-10-19 21:05:13
1281359,30,01,2016-10-19 18:01:03,2016-10-19 18:01:03,2016-10-19 18:01:07
1281359,33,13,2016-10-19 09:39:33,2016-10-19 09:47:57,2016-10-19 16:20:31
1281359,35,15,2016-10-19 09:08:12,2016-10-19 09:13:22,2016-10-19 13:12:57
1281359,38,19,2016-10-19 10:12:45,2016-10-19 10:20:02,2016-10-19 17:49:46


In [24]:
###Filter out responders from Portland
table_a= table.copy()
table_a = table_a.dropna(subset=['agency'])
agencyTable = table_a.pivot_table(index=['incident_id', 'responderunit_id', 'agency'],\
                columns='timedesc_id', values='realtime', aggfunc='first')
agencyTable.rename(columns=d, inplace=True)
agencyTable

Unnamed: 0_level_0,Unnamed: 1_level_0,timedesc_id,Dispatched,On Scene,Clear
incident_id,responderunit_id,agency,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1281359,95,GRESHAM,2016-10-20 06:31:09,2016-10-20 07:16:06,2016-10-20 16:02:29
1281359,96,GRESHAM,2016-10-20 06:31:09,2016-10-20 07:16:06,2016-10-20 16:02:29
1281359,508,SANDY FD,2016-10-19 09:57:28,2016-10-19 10:40:35,2016-10-19 12:10:40
1281359,1596,PORT OF PORTLAND,2016-10-19 10:23:27,2016-10-19 10:38:10,2016-10-19 12:15:42


In [25]:
## Check out metrics for other incidents happened in the very day
def getTimeDataset2(timedesc_dict):
    RESPONSE_TIME_QUERY='''
                      SELECT  I.incident_id, R.responderunit_id, 
                                T.timedesc_id, I.fireblock, I.fmarespcomp,
                                T.realtime
                        FROM incident as I
                        INNER JOIN inctimes as T
                                ON I.incident_id = T.incident_id
                        INNER JOIN responder as R
                                ON ( I.incident_id = R.incident_id AND T.responder_id = R.responder_id)
                        WHERE T.timedesc_id IN ??TIMEDESC_IDS??
                                AND T.responder_id IS NOT NULL
                                AND CAST(T.realtime AS DATE) = date '2016-10-19'
                                AND I.incident_id != 1281359;
                        '''
    
    # add the timedesc_ids that we want to the query
    timedesc_ids = str(tuple(timedesc_dict.keys())).replace("'", "")
    RESPONSE_TIME_QUERY = RESPONSE_TIME_QUERY.replace("??TIMEDESC_IDS??", str(timedesc_ids))
    
    # execute the query: **this takes a pretty long time**
    df = pgquery(RESPONSE_TIME_QUERY)
    
    # now reshape the data so that we can do analysis more easily. 
    table = df.pivot_table(index=['incident_id', 'responderunit_id', 'fireblock',
                                  'fmarespcomp'],
                   columns='timedesc_id', values='realtime', aggfunc='first')
    table.rename(columns=timedesc_dict, inplace=True)
    
    return table

In [26]:
table2 = getTimeDataset2(d)



SQL QUERY = 
                      SELECT  I.incident_id, R.responderunit_id, 
                                T.timedesc_id, I.fireblock, I.fmarespcomp,
                                T.realtime
                        FROM incident as I
                        INNER JOIN inctimes as T
                                ON I.incident_id = T.incident_id
                        INNER JOIN responder as R
                                ON ( I.incident_id = R.incident_id AND T.responder_id = R.responder_id)
                        WHERE T.timedesc_id IN (9, 3, 12, 5)
                                AND T.responder_id IS NOT NULL
                                AND CAST(T.realtime AS DATE) = date '2016-10-19'
                                AND I.incident_id != 1281359;
                        


In [27]:
##Calculate "Response_Time" and "OnScene_Time"
table2["Response_Time"] = table2['On Scene'].sub(table2['Dispatched'], axis = 0)
table2["OnScene_Time"] = table2['Clear'].sub(table2['On Scene'], axis = 0)

table2["Response_Time"].astype('timedelta64[m]')
table2["OnScene_Time"].astype('timedelta64[m]')
table2.head(20)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,timedesc_id,Dispatched,On Scene,Depart Scene,Clear,Response_Time,OnScene_Time
incident_id,responderunit_id,fireblock,fmarespcomp,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1281269,242,2842,28,NaT,NaT,NaT,2016-10-19 00:26:08,NaT,NaT
1281272,224,403,4,NaT,NaT,NaT,2016-10-19 00:29:25,NaT,NaT
1281274,221,133,1,NaT,NaT,2016-10-19 00:07:01,2016-10-19 00:43:35,NaT,NaT
1281276,42,2228,22,NaT,NaT,NaT,2016-10-19 00:30:10,NaT,NaT
1281277,227,1145,11,NaT,NaT,2016-10-19 00:03:29,2016-10-19 00:42:56,NaT,NaT
1281278,61,3106,31,2016-10-19 00:03:22,NaT,NaT,2016-10-19 00:08:46,NaT,NaT
1281278,234,3106,31,2016-10-19 00:03:22,2016-10-19 00:06:44,2016-10-19 00:21:58,2016-10-19 01:00:11,00:03:22,00:53:27
1281279,33,1360,13,NaT,NaT,NaT,2016-10-19 00:07:57,NaT,NaT
1281279,222,1360,13,NaT,NaT,2016-10-19 00:22:31,2016-10-19 01:05:26,NaT,NaT
1281280,114,130,1,2016-10-19 00:00:43,2016-10-19 00:06:22,NaT,2016-10-19 00:11:41,00:05:39,00:05:19


In [28]:
table2.describe()
##Looks like the bagelshop incident does not have much of impact on other incidents 

timedesc_id,Response_Time,OnScene_Time
count,422,407
mean,0 days 00:05:52.236966,0 days 00:26:47.363636
std,0 days 00:04:27.321464,0 days 00:36:39.679312
min,0 days 00:00:00,-1 days +23:54:11
25%,0 days 00:04:10,0 days 00:05:47
50%,0 days 00:05:20.500000,0 days 00:13:51
75%,0 days 00:06:40.750000,0 days 00:46:52.500000
max,0 days 01:06:59,0 days 08:27:31


In [29]:
## Let's take a look on the other incidents happened in the time window that explosion occured
def getTimeDataset3(timedesc_dict):
    RESPONSE_TIME_QUERY='''
                      SELECT  I.incident_id, R.responderunit_id, 
                                T.timedesc_id, I.fireblock, I.fmarespcomp,
                                T.realtime
                        FROM incident as I
                        INNER JOIN inctimes as T
                                ON I.incident_id = T.incident_id
                        INNER JOIN responder as R
                                ON ( I.incident_id = R.incident_id AND T.responder_id = R.responder_id)
                        WHERE T.timedesc_id IN ??TIMEDESC_IDS??
                                AND T.responder_id IS NOT NULL
                                
                                AND T.realtime >= '2016-10-19 09:08:12' 
                                AND T.realtime <  '2016-10-19 11:05:16'
                                AND I.incident_id != 1281359;
                        '''
    
    # add the timedesc_ids that we want to the query
    timedesc_ids = str(tuple(timedesc_dict.keys())).replace("'", "")
    RESPONSE_TIME_QUERY = RESPONSE_TIME_QUERY.replace("??TIMEDESC_IDS??", str(timedesc_ids))
    
    # execute the query: **this takes a pretty long time**
    df = pgquery(RESPONSE_TIME_QUERY)
    
    # now reshape the data so that we can do analysis more easily. 
    table = df.pivot_table(index=['incident_id', 'responderunit_id', 'fireblock',
                                  'fmarespcomp'],
                   columns='timedesc_id', values='realtime', aggfunc='first')
    table.rename(columns=timedesc_dict, inplace=True)
    
    return table

In [30]:
table3 = getTimeDataset3(d)

SQL QUERY = 
                      SELECT  I.incident_id, R.responderunit_id, 
                                T.timedesc_id, I.fireblock, I.fmarespcomp,
                                T.realtime
                        FROM incident as I
                        INNER JOIN inctimes as T
                                ON I.incident_id = T.incident_id
                        INNER JOIN responder as R
                                ON ( I.incident_id = R.incident_id AND T.responder_id = R.responder_id)
                        WHERE T.timedesc_id IN (9, 3, 12, 5)
                                AND T.responder_id IS NOT NULL
                                
                                AND T.realtime >= '2016-10-19 09:08:12' 
                                AND T.realtime <  '2016-10-19 11:05:16'
                                AND I.incident_id != 1281359;
                        


In [31]:
##Calculate "Response_Time" and "OnScene_Time"
table3["Response_Time"] = table3['On Scene'].sub(table3['Dispatched'], axis = 0)
table3["OnScene_Time"] = table3['Clear'].sub(table3['On Scene'], axis = 0)

table3["Response_Time"].astype('timedelta64[m]')
table3["OnScene_Time"].astype('timedelta64[m]')
table3.describe()
##Still,the bagelshop incident does not have much of impact on other incidents 

timedesc_id,Response_Time,OnScene_Time
count,33,26
mean,0 days 00:05:39.242424,0 days 00:20:09.076923
std,0 days 00:02:29.423063,0 days 00:18:38.602768
min,0 days 00:00:33,0 days 00:00:46
25%,0 days 00:04:08,0 days 00:04:27.250000
50%,0 days 00:05:09,0 days 00:13:11
75%,0 days 00:06:28,0 days 00:32:23.750000
max,0 days 00:11:26,0 days 01:10:48
