In [1]:
import os 
import psycopg2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib inline

In [2]:
##Load variables 
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv('fire_var.env'))

True

In [3]:
# connect to postgres

try:
    conn = psycopg2.connect(database=os.environ.get("erdatabase"), user=os.environ.get("eruser"), 
                            password = os.environ.get("erpassword"), 
                            host=os.environ.get("erhost"), 
                            port=os.environ.get('port'))
    
except psycopg2.Error as e:
    print("I am unable to connect to the database")
    print(e)
    print(e.pgcode)
    print(e.pgerror)
    print(traceback.format_exc())

In [4]:
def pgquery(QUERY): 
    '''
    takes SQL query string, opens a cursor, and executes query in psql
    '''
    
    cur = conn.cursor()
    
    try:
        print("SQL QUERY = "+QUERY)
        cur.execute("SET statement_timeout = 0")
        cur.execute(QUERY)
        # Extract the column names and insert them in header
        col_names = []
        for elt in cur.description:
            col_names.append(elt[0])    
    
        D = cur.fetchall() #convert query result to list
        #pprint(D)
        #conn.close()
        # Create the dataframe, passing in the list of col_names extracted from the description
        return pd.DataFrame(D, columns=col_names)

    except Exception as e:
        print(e.pgerror)



In [6]:
## Since we only have one row in the incident table, there is not much data to pull from there.
## Let's try to pull everything about the responders.

##Pull data from the EC2 instance, extract inctime, station, and agency info
def getTimeDataset1(timedesc_dict):
    RESPONSE_TIME_QUERY='''
                        
                        SELECT  I.incident_id, R.responderunit_id,
                                T.timedesc_id, S.description as Station, A.description as Agency,
                                T.realtime
                        FROM incident as I
                        INNER JOIN inctimes as T
                                ON I.incident_id = T.incident_id
                        INNER JOIN responder as R
                                ON ( I.incident_id = R.incident_id AND T.responder_id = R.responder_id)
                        INNER JOIN responderunit as RU
                                ON R.responderunit_id = RU.responderunit_id
                        LEFT JOIN station as S
                                ON RU.station_id= S.station_id
                        LEFT JOIN agency as A
                                ON RU.agency_id = A.agency_id
                        WHERE T.timedesc_id IN ??TIMEDESC_IDS??
                                AND T.responder_id IS NOT NULL
                                AND I.incident_id = 1281359;
                        '''
    
    # add the timedesc_ids that we want to the query
    timedesc_ids = str(tuple(timedesc_dict.keys())).replace("'", "")
    RESPONSE_TIME_QUERY = RESPONSE_TIME_QUERY.replace("??TIMEDESC_IDS??", str(timedesc_ids))
    
    # execute the query: **this takes a pretty long time**
    df = pgquery(RESPONSE_TIME_QUERY)
    
   
    return df

In [7]:
d = {3: 'Dispatched', 5 : 'On Scene', 9: 'Depart Scene', 12: 'Clear'}
table = getTimeDataset1(d)

SQL QUERY = 
                        
                        SELECT  I.incident_id, R.responderunit_id,
                                T.timedesc_id, S.description as Station, A.description as Agency,
                                T.realtime
                        FROM incident as I
                        INNER JOIN inctimes as T
                                ON I.incident_id = T.incident_id
                        INNER JOIN responder as R
                                ON ( I.incident_id = R.incident_id AND T.responder_id = R.responder_id)
                        INNER JOIN responderunit as RU
                                ON R.responderunit_id = RU.responderunit_id
                        LEFT JOIN station as S
                                ON RU.station_id= S.station_id
                        LEFT JOIN agency as A
                                ON RU.agency_id = A.agency_id
                        WHERE T.timedesc_id IN (9, 3, 12, 5)
                                A

In [8]:
len(table)

233

In [9]:
table.head(233)


Unnamed: 0,incident_id,responderunit_id,timedesc_id,station,agency,realtime
0,1281359,89,3,INVESTIGATOR/INSPECTOR,,2016-10-19 10:05:14
1,1281359,89,3,INVESTIGATOR/INSPECTOR,,2016-10-20 06:28:34
2,1281359,89,5,INVESTIGATOR/INSPECTOR,,2016-10-19 12:56:38
3,1281359,89,5,INVESTIGATOR/INSPECTOR,,2016-10-20 06:28:34
4,1281359,89,12,INVESTIGATOR/INSPECTOR,,2016-10-19 22:07:44
5,1281359,89,12,INVESTIGATOR/INSPECTOR,,2016-10-20 19:39:08
6,1281359,93,3,INVESTIGATOR/INSPECTOR,,2016-10-19 09:48:54
7,1281359,93,3,INVESTIGATOR/INSPECTOR,,2016-10-20 06:27:43
8,1281359,93,5,INVESTIGATOR/INSPECTOR,,2016-10-19 09:48:57
9,1281359,93,5,INVESTIGATOR/INSPECTOR,,2016-10-20 06:27:43


## It seems the records expand more than one day, so let's pull the data just for '2016-10-19' 

In [18]:


##Pull data from the EC2 instance, extract inctime, station, and agency info

## on date '2016-10-19'

def getTimeDataset2(timedesc_dict):
    RESPONSE_TIME_QUERY='''
                        
                        SELECT  R.responderunit_id,
                                T.timedesc_id, S.description as Station, A.description as Agency,
                                RU.description as Responder,
                                T.realtime
                        FROM incident as I
                        INNER JOIN inctimes as T
                                ON I.incident_id = T.incident_id
                        INNER JOIN responder as R
                                ON ( I.incident_id = R.incident_id AND T.responder_id = R.responder_id)
                        INNER JOIN responderunit as RU
                                ON R.responderunit_id = RU.responderunit_id
                        LEFT JOIN station as S
                                ON RU.station_id= S.station_id
                        LEFT JOIN agency as A
                                ON RU.agency_id = A.agency_id
                        WHERE T.timedesc_id IN ??TIMEDESC_IDS??
                                AND T.responder_id IS NOT NULL
                                AND I.incident_id = 1281359
                                AND CAST(T.realtime AS DATE) = date '2016-10-19'
                        ORDER BY T.realtime
                                ASC;
                        '''
    
    # add the timedesc_ids that we want to the query
    timedesc_ids = str(tuple(timedesc_dict.keys())).replace("'", "")
    RESPONSE_TIME_QUERY = RESPONSE_TIME_QUERY.replace("??TIMEDESC_IDS??", str(timedesc_ids))
    
    # execute the query: **this takes a pretty long time**
    df = pgquery(RESPONSE_TIME_QUERY)
    
   
    return df

In [71]:
d = {3: 'Dispatched', 5 : 'On Scene', 9: 'Depart Scene', 12: 'Clear'}
table2 = getTimeDataset2(d)

SQL QUERY = 
                        
                        SELECT  R.responderunit_id,
                                T.timedesc_id, S.description as Station, A.description as Agency,
                                RU.description as Responder,
                                T.realtime
                        FROM incident as I
                        INNER JOIN inctimes as T
                                ON I.incident_id = T.incident_id
                        INNER JOIN responder as R
                                ON ( I.incident_id = R.incident_id AND T.responder_id = R.responder_id)
                        INNER JOIN responderunit as RU
                                ON R.responderunit_id = RU.responderunit_id
                        LEFT JOIN station as S
                                ON RU.station_id= S.station_id
                        LEFT JOIN agency as A
                                ON RU.agency_id = A.agency_id
                        WHERE T.timedesc_id IN (

In [72]:
len(table2)

193

In [73]:
table2.head(193)

Unnamed: 0,responderunit_id,timedesc_id,station,agency,responder,realtime
0,122,3,03,,T03,2016-10-19 09:08:12
1,15,3,C04,,C04,2016-10-19 09:08:12
2,35,3,15,,E15,2016-10-19 09:08:12
3,52,3,03,,E03,2016-10-19 09:08:12
4,64,3,06,,E06,2016-10-19 09:08:12
5,122,5,03,,T03,2016-10-19 09:11:46
6,52,5,03,,E03,2016-10-19 09:12:00
7,35,5,15,,E15,2016-10-19 09:13:22
8,64,5,06,,E06,2016-10-19 09:15:44
9,7,3,C01,,C01,2016-10-19 09:18:00


## Discussion

*I pulled all the "description" from the "station", "agency" , and "responderunit" tables, but they are still bunch of codes which are probably alein to public. I searched the database, and did not find anything relevant that is descriptive enough to show a story to public. Maybe Mark can help on this part.* 

*At this point, I will show how do we calculate the response time and onscene time. According to Mark's note: 3 to 5 is response time and 5 to 12 is on scene time.*




In [74]:
###Filter out the responders from other cities
###There are still some non-integer station description codes
table_s = table2.copy()
#table_s.dropna(subset=['station'],inplace=True)
stationTable = table_s.pivot_table(index=['responderunit_id'],\
                columns='timedesc_id', values='realtime', aggfunc='first')
stationTable.rename(columns=d, inplace=True)

## sort by dispatch time
#stationTable.sort_values('Dispatched', axis=0, ascending=True, inplace=True)
print(len(stationTable))
stationTable

63


timedesc_id,Dispatched,On Scene,Depart Scene,Clear
responderunit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7,2016-10-19 09:18:00,2016-10-19 09:20:51,NaT,2016-10-19 15:06:28
8,2016-10-19 09:41:27,2016-10-19 09:52:31,NaT,2016-10-19 18:48:42
9,2016-10-19 09:39:33,2016-10-19 09:56:52,NaT,2016-10-19 19:41:16
12,2016-10-19 09:39:33,2016-10-19 09:59:58,NaT,2016-10-19 17:36:52
13,2016-10-19 09:41:27,2016-10-19 10:00:30,NaT,2016-10-19 21:04:28
15,2016-10-19 09:08:12,2016-10-19 09:20:27,NaT,2016-10-19 21:05:13
30,2016-10-19 18:01:03,2016-10-19 18:01:03,NaT,2016-10-19 18:01:07
33,2016-10-19 09:39:33,2016-10-19 09:47:57,NaT,2016-10-19 16:20:31
35,2016-10-19 09:08:12,2016-10-19 09:13:22,NaT,2016-10-19 13:12:57
38,2016-10-19 10:12:45,2016-10-19 10:20:02,NaT,2016-10-19 17:49:46


In [75]:
##Calculate "Response_Time" and "OnScene_Time"
stationTable["Response_Time"] = stationTable['On Scene'].sub(stationTable['Dispatched'], axis = 0)
stationTable["OnScene_Time"] = stationTable['Clear'].sub(stationTable['On Scene'], axis = 0)

stationTable["Response_Time"].astype('timedelta64[m]')
stationTable["OnScene_Time"].astype('timedelta64[m]')
stationTable.head(20)

timedesc_id,Dispatched,On Scene,Depart Scene,Clear,Response_Time,OnScene_Time
responderunit_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
7,2016-10-19 09:18:00,2016-10-19 09:20:51,NaT,2016-10-19 15:06:28,00:02:51,05:45:37
8,2016-10-19 09:41:27,2016-10-19 09:52:31,NaT,2016-10-19 18:48:42,00:11:04,08:56:11
9,2016-10-19 09:39:33,2016-10-19 09:56:52,NaT,2016-10-19 19:41:16,00:17:19,09:44:24
12,2016-10-19 09:39:33,2016-10-19 09:59:58,NaT,2016-10-19 17:36:52,00:20:25,07:36:54
13,2016-10-19 09:41:27,2016-10-19 10:00:30,NaT,2016-10-19 21:04:28,00:19:03,11:03:58
15,2016-10-19 09:08:12,2016-10-19 09:20:27,NaT,2016-10-19 21:05:13,00:12:15,11:44:46
30,2016-10-19 18:01:03,2016-10-19 18:01:03,NaT,2016-10-19 18:01:07,00:00:00,00:00:04
33,2016-10-19 09:39:33,2016-10-19 09:47:57,NaT,2016-10-19 16:20:31,00:08:24,06:32:34
35,2016-10-19 09:08:12,2016-10-19 09:13:22,NaT,2016-10-19 13:12:57,00:05:10,03:59:35
38,2016-10-19 10:12:45,2016-10-19 10:20:02,NaT,2016-10-19 17:49:46,00:07:17,07:29:44


In [76]:
Mean_response_time = stationTable['Response_Time'].mean()
Mean_response_time

Timedelta('0 days 00:17:22.322580')

In [77]:
Median_response_time = stationTable['Response_Time'].median()
Median_response_time

Timedelta('0 days 00:08:35')

In [78]:
Mean_onscene_time = stationTable['OnScene_Time'].mean()
Mean_onscene_time

Timedelta('0 days 04:39:36.283018')

In [79]:
Median_onscene_time = stationTable['OnScene_Time'].median()
Median_onscene_time

Timedelta('0 days 03:59:35')

## Discussion
*It turns out this incident is not a good example to show how a tipical incident's response time and onscene time would be, because it has investigators and other weird station codes. But it is a good candidate to show the full spectrum of responders.*

*I have done an analysis on the other incidents happened in the same day to show a tipical incident's response time and onscene time, which is showed below.*

In [57]:
## Check out metrics for other incidents happened in the very day
def getTimeDataset3(timedesc_dict):
    RESPONSE_TIME_QUERY='''
                        
                        SELECT  I.incident_id,R.responderunit_id,
                                T.timedesc_id, S.description as Station, A.description as Agency,
                                RU.description as Responder,
                                T.realtime
                        FROM incident as I
                        INNER JOIN inctimes as T
                                ON I.incident_id = T.incident_id
                        INNER JOIN responder as R
                                ON ( I.incident_id = R.incident_id AND T.responder_id = R.responder_id)
                        INNER JOIN responderunit as RU
                                ON R.responderunit_id = RU.responderunit_id
                        LEFT JOIN station as S
                                ON RU.station_id= S.station_id
                        LEFT JOIN agency as A
                                ON RU.agency_id = A.agency_id
                        WHERE T.timedesc_id IN ??TIMEDESC_IDS??
                                AND T.responder_id IS NOT NULL
                                AND I.incident_id != 1281359
                                AND CAST(T.realtime AS DATE) = date '2016-10-19'
                        ORDER BY I.incident_id
                                ASC;
                        '''
    
    # add the timedesc_ids that we want to the query
    timedesc_ids = str(tuple(timedesc_dict.keys())).replace("'", "")
    RESPONSE_TIME_QUERY = RESPONSE_TIME_QUERY.replace("??TIMEDESC_IDS??", str(timedesc_ids))
    
    # execute the query: **this takes a pretty long time**
    df = pgquery(RESPONSE_TIME_QUERY)
    
   
    return df

In [80]:
table3 = getTimeDataset3(d)



SQL QUERY = 
                        
                        SELECT  I.incident_id,R.responderunit_id,
                                T.timedesc_id, S.description as Station, A.description as Agency,
                                RU.description as Responder,
                                T.realtime
                        FROM incident as I
                        INNER JOIN inctimes as T
                                ON I.incident_id = T.incident_id
                        INNER JOIN responder as R
                                ON ( I.incident_id = R.incident_id AND T.responder_id = R.responder_id)
                        INNER JOIN responderunit as RU
                                ON R.responderunit_id = RU.responderunit_id
                        LEFT JOIN station as S
                                ON RU.station_id= S.station_id
                        LEFT JOIN agency as A
                                ON RU.agency_id = A.agency_id
                        WHERE T.ti

In [81]:
len(table3)

1623

In [82]:
table3.head(50)

Unnamed: 0,incident_id,responderunit_id,timedesc_id,station,agency,responder,realtime
0,1281269,242,12,,,M342,2016-10-19 00:26:08
1,1281272,224,12,,,M324,2016-10-19 00:29:25
2,1281274,221,9,,,M321,2016-10-19 00:07:01
3,1281274,221,12,,,M321,2016-10-19 00:43:35
4,1281276,42,12,22.0,,E22,2016-10-19 00:30:10
5,1281277,227,9,,,M327,2016-10-19 00:03:29
6,1281277,227,12,,,M327,2016-10-19 00:42:56
7,1281278,61,3,31.0,,E31,2016-10-19 00:03:22
8,1281278,234,3,,,M334,2016-10-19 00:03:22
9,1281278,234,5,,,M334,2016-10-19 00:06:44


In [83]:
#table3.dropna(subset=['station'],inplace=True)
table3 = table3.pivot_table(index=['incident_id','responderunit_id'],\
                columns='timedesc_id', values='realtime', aggfunc='first')
table3.rename(columns=d, inplace=True)

## sort by dispatch time
table3.sort_values('Dispatched', axis=0, ascending=True, inplace=True)
print(len(table3))
table3

541


Unnamed: 0_level_0,timedesc_id,Dispatched,On Scene,Depart Scene,Clear
incident_id,responderunit_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1281280,114,2016-10-19 00:00:43,2016-10-19 00:06:22,NaT,2016-10-19 00:11:41
1281280,243,2016-10-19 00:00:43,2016-10-19 00:04:53,2016-10-19 00:23:09,2016-10-19 00:57:09
1281278,61,2016-10-19 00:03:22,NaT,NaT,2016-10-19 00:08:46
1281278,234,2016-10-19 00:03:22,2016-10-19 00:06:44,2016-10-19 00:21:58,2016-10-19 01:00:11
1281281,37,2016-10-19 00:18:02,NaT,NaT,2016-10-19 00:21:36
1281281,219,2016-10-19 00:18:02,2016-10-19 00:20:43,2016-10-19 00:27:22,2016-10-19 01:10:31
1281282,223,2016-10-19 00:26:30,2016-10-19 00:32:21,2016-10-19 01:01:55,2016-10-19 01:36:41
1281282,428,2016-10-19 00:26:30,2016-10-19 00:33:37,NaT,2016-10-19 00:34:17
1281283,1597,2016-10-19 00:29:57,2016-10-19 00:39:09,NaT,2016-10-19 00:43:07
1281286,33,2016-10-19 00:43:10,2016-10-19 00:50:36,NaT,2016-10-19 00:55:49


In [84]:
##Calculate "Response_Time" and "OnScene_Time"
table3["Response_Time"] = table3['On Scene'].sub(table3['Dispatched'], axis = 0)
table3["OnScene_Time"] = table3['Clear'].sub(table3['On Scene'], axis = 0)

table3["Response_Time"].astype('timedelta64[m]')
table3["OnScene_Time"].astype('timedelta64[m]')
table3.head(20)


Unnamed: 0_level_0,timedesc_id,Dispatched,On Scene,Depart Scene,Clear,Response_Time,OnScene_Time
incident_id,responderunit_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1281280,114,2016-10-19 00:00:43,2016-10-19 00:06:22,NaT,2016-10-19 00:11:41,00:05:39,00:05:19
1281280,243,2016-10-19 00:00:43,2016-10-19 00:04:53,2016-10-19 00:23:09,2016-10-19 00:57:09,00:04:10,00:52:16
1281278,61,2016-10-19 00:03:22,NaT,NaT,2016-10-19 00:08:46,NaT,NaT
1281278,234,2016-10-19 00:03:22,2016-10-19 00:06:44,2016-10-19 00:21:58,2016-10-19 01:00:11,00:03:22,00:53:27
1281281,37,2016-10-19 00:18:02,NaT,NaT,2016-10-19 00:21:36,NaT,NaT
1281281,219,2016-10-19 00:18:02,2016-10-19 00:20:43,2016-10-19 00:27:22,2016-10-19 01:10:31,00:02:41,00:49:48
1281282,223,2016-10-19 00:26:30,2016-10-19 00:32:21,2016-10-19 01:01:55,2016-10-19 01:36:41,00:05:51,01:04:20
1281282,428,2016-10-19 00:26:30,2016-10-19 00:33:37,NaT,2016-10-19 00:34:17,00:07:07,00:00:40
1281283,1597,2016-10-19 00:29:57,2016-10-19 00:39:09,NaT,2016-10-19 00:43:07,00:09:12,00:03:58
1281286,33,2016-10-19 00:43:10,2016-10-19 00:50:36,NaT,2016-10-19 00:55:49,00:07:26,00:05:13


In [85]:
Mean_response_time_other = table3['Response_Time'].mean()
Mean_response_time_other


Timedelta('0 days 00:05:50.139534')

In [89]:
Median_response_time_other = table3['Response_Time'].median()
Median_response_time_other

Timedelta('0 days 00:05:19')

In [90]:
Mean_onscene_time_other = table3['OnScene_Time'].mean()
Mean_onscene_time_other

Timedelta('0 days 00:26:25.393719')

In [91]:
Median_onscene_time_other = table3['OnScene_Time'].median()
Median_onscene_time_other

Timedelta('0 days 00:12:31')