## This file contains useful utility functions that can be reused by others. 
## Please feel free to add more! :-)


### To utilize the functions in this file:

1. Install the [**nbimporter**](https://github.com/grst/nbimporter) package

        pip install nbimporter

2. Set-up your environment in the folder where you cloned this repository. Create a **.env** file with the following:

        eruser=**replace with user**
        erdatabase=**replace with database**
        erpassword=**replace with password**
        erhost=**replace with host**
        erport=**replace with port**
        
3. Create a new iPython Notebook
4. Import both **nbimporter** and **utilities** (this notebook)

        import nbimporter
        import utilities
          
5. To call a function in this notebook:
   
        utilities.your_function_name()

In [1]:
import os 
from dotenv import load_dotenv, find_dotenv
import psycopg2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
%matplotlib inline

load_dotenv(find_dotenv())

# connect to postgres
def pgconnect():
    try:
        conn = psycopg2.connect(database=os.environ.get("erdatabase"),
                                user=os.environ.get("eruser"),
                                password = os.environ.get("erpassword"),
                                host=os.environ.get("erhost"),
                                port=os.environ.get("erport"))
        print("Opened database successfully")
        return conn
    
    except psycopg2.Error as e:
        print("I am unable to connect to the database")
        print(e)
        print(e.pgcode)
        print(e.pgerror)
        print(traceback.format_exc())
        return None

In [2]:
def pgquery(QUERY):
    '''
    takes SQL query string, opens a cursor, and executes query in psql
    '''
    conn = pgconnect()
    cur = conn.cursor()
    
    try:
        print("SQL QUERY = "+QUERY)
        cur.execute("SET statement_timeout = 0")
        cur.execute(QUERY)
        # Extract the column names and insert them in header
        col_names = []
        for elt in cur.description:
            col_names.append(elt[0])    
    
        D = cur.fetchall() #convert query result to list
        # Create the dataframe, passing in the list of col_names extracted from the description
        conn.close()
        return pd.DataFrame(D, columns=col_names)

    except Exception as e:
        print(e.pgerror)
        conn.close()

In [3]:
# This function will return a reshaped dataframe that contains the following columns:
#
# incident_id | responderunit_id | responder_id | typenaturecode_id | fireblock | t1 | t2 |  ...
# where t1, t2, ... , tn are the timedesc_id that we are looking for.
#
# usage: getTimeDataset(timedesc_dict)
#
# param: timedesc_dict - keys are the timedesc_ids you want
#                      - values are the human descriptions for them 
# known issues: this eliminates "responder_id" (so we can probably just get rid of it in the query)
#               responder_id values were inconsistent and strange, so I decided to just ignore them
#               when doing anaylsis.
def getTimeDataset(timedesc_dict):
    RESPONSE_TIME_QUERY='''
                        SELECT  I.incident_id, R.responderunit_id, T.responder_id,
                                T.timedesc_id, I.typenaturecode_id, I.fireblock, I.fmarespcomp,
                                T.realtime
                        FROM incident as I
                        INNER JOIN inctimes as T
                                ON I.incident_id = T.incident_id
                        INNER JOIN responder as R
                                ON ( I.incident_id = R.incident_id AND T.responder_id = R.responder_id)
                        WHERE T.timedesc_id IN ??TIMEDESC_IDS??
                                AND T.responder_id IS NOT NULL;
                        '''
    
    # add the timedesc_ids that we want to the query
    timedesc_ids = str(tuple(timedesc_dict.keys())).replace("'", "")
    RESPONSE_TIME_QUERY = RESPONSE_TIME_QUERY.replace("??TIMEDESC_IDS??", str(timedesc_ids))
    
    # execute the query: **this takes a pretty long time**
    df = pgquery(RESPONSE_TIME_QUERY)
    
    # now reshape the data so that we can do analysis more easily. 
    table = df.pivot_table(index=['incident_id', 'responderunit_id', 'typenaturecode_id',
                                  'fireblock', 'fmarespcomp'],
                      columns='timedesc_id', values='realtime', aggfunc='first')
    table.rename(columns=timedesc_dict, inplace=True)
    
    return table