In this notebook, I attempt to address [ER team issue #72](https://github.com/hackoregon/emergency-response/issues/72), 'Get incident rates for each FMA'

4/12
So far, I have calculated FMA incident rates by square mile (also split by medical vs fire/explosion). If the rollups look acceptable to the team, I can append them to `fma_api_rollup` in the db and then can add incidents per capita and household. 

In [1]:
import os 
from dotenv import load_dotenv, find_dotenv
import psycopg2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import sys

%matplotlib inline

In [2]:
# walk root diretory to find and load .env file w/ AWS host, username and password
load_dotenv(find_dotenv())

True

In [3]:
# connect to postgres
def pgconnect():
    try:
        conn = psycopg2.connect(database=os.environ.get("erdatabase"), user=os.environ.get("eruser"), 
                            password = os.environ.get("erpassword"), 
                            host=os.environ.get("erhost"), port=os.environ.get("erport"))
        print("Opened database successfully")
        return conn
    
    except psycopg2.Error as e:
        print("I am unable to connect to the database")
        print(e)
        print(e.pgcode)
        print(e.pgerror)
        print(traceback.format_exc())
        return None

In [4]:
def pquery(QUERY):
    '''
    takes SQL query string, opens a cursor, executes query in psql, and pulls results into pandas df
    '''
    conn = pgconnect()
    cur = conn.cursor()
    
    try:
        print("SQL QUERY = "+QUERY)
        cur.execute("SET statement_timeout = 0")
        cur.execute(QUERY)
        # Extract the column names and insert them in header
        col_names = []
        for elt in cur.description:
            col_names.append(elt[0])    
    
        D = cur.fetchall() #convert query result to list
        # Create the dataframe, passing in the list of col_names extracted from the description
        return pd.DataFrame(D, columns=col_names)
        
        
    except Exception as e:
        print(e.pgerror)
            
    finally:
        conn.close()



In [5]:
# table of fma, fma_area, and number of incidents
QUERY1='''select s.fma, ST_Area(s.geom::geography) as fma_area,count(i.incident_id) as num_incidents
 FROM fma_shapes s
 INNER JOIN
 incident i
   ON
   s.fma = i.fmarespcomp
 GROUP BY s.fma, fma_area
 ORDER BY num_incidents DESC;
'''

In [6]:
df1 = pquery(QUERY1)

Opened database successfully
SQL QUERY = select s.fma, ST_Area(s.geom::geography) as fma_area,count(i.incident_id) as num_incidents
 FROM fma_shapes s
 INNER JOIN
 incident i
   ON
   s.fma = i.fmarespcomp
 GROUP BY s.fma, fma_area
 ORDER BY num_incidents DESC;



In [7]:
df1

Unnamed: 0,fma,fma_area,num_incidents
0,1,2653218.0,43139
1,7,14774830.0,42676
2,3,6330879.0,30723
3,11,13743870.0,30118
4,4,6131893.0,29829
5,13,8638372.0,29499
6,31,10932370.0,24451
7,25,12958040.0,22742
8,30,10782350.0,21593
9,19,10847010.0,21263


In [8]:
# same table but convert area to square miles (1m^2 = 3.861x10^-7 mile^2)
QUERY2='''select s.fma, ST_Area(s.geom::geography)*(.0000003861) as fma_area_mi, count(i.incident_id) as num_incidents,
count(i.incident_id)/(ST_Area(s.geom::geography)*(.0000003861)) as incidents_per_sqmi
 FROM fma_shapes s
 INNER JOIN
 incident i
   ON
   s.fma = i.fmarespcomp
 GROUP BY s.fma, s.geom
 ORDER BY incidents_per_sqmi DESC
'''

In [9]:
df2 = pquery(QUERY2)

Opened database successfully
SQL QUERY = select s.fma, ST_Area(s.geom::geography)*(.0000003861) as fma_area_mi, count(i.incident_id) as num_incidents,
count(i.incident_id)/(ST_Area(s.geom::geography)*(.0000003861)) as incidents_per_sqmi
 FROM fma_shapes s
 INNER JOIN
 incident i
   ON
   s.fma = i.fmarespcomp
 GROUP BY s.fma, s.geom
 ORDER BY incidents_per_sqmi DESC



In [10]:
df2

Unnamed: 0,fma,fma_area_mi,num_incidents,incidents_per_sqmi
0,1,1.024407,43139,42111.178792
1,21,0.952173,12641,13275.945865
2,4,2.367524,29829,12599.238744
3,3,2.444352,30723,12568.974149
4,13,3.335275,29499,8844.547307
5,7,5.70456,42676,7481.032716
6,31,4.220988,24451,5792.719035
7,11,5.306509,30118,5675.671046
8,30,4.163065,21593,5186.804093
9,19,4.188032,21263,5077.08678


In [11]:
# look at just medical incidents
QUERY3='''select s.fma, ST_Area(s.geom::geography)*(.0000003861) as fma_area_mi, count(i.incident_id) as num_incidents,
count(i.incident_id)/(ST_Area(s.geom::geography)*(.0000003861)) as incidents_per_sqmi
 FROM fma_shapes s
 INNER JOIN
 incident i
   ON
   s.fma = i.fmarespcomp
  INNER JOIN incsitfound
    ON i.incsitfoundprm_id = incsitfound.incsitfound_id
  LEFT JOIN incsitfoundsub
    ON incsitfound.incsitfoundsub_id = incsitfoundsub.incsitfoundsub_id
  LEFT JOIN incsitfoundclass
    ON incsitfoundsub.incsitfoundclass_id = incsitfoundclass.incsitfoundclass_id 
 WHERE incsitfoundclass.incsitfoundclass_id = 3
 GROUP BY s.fma, s.geom
 ORDER BY incidents_per_sqmi DESC;
'''


In [12]:
df3 = pquery(QUERY3)

Opened database successfully
SQL QUERY = select s.fma, ST_Area(s.geom::geography)*(.0000003861) as fma_area_mi, count(i.incident_id) as num_incidents,
count(i.incident_id)/(ST_Area(s.geom::geography)*(.0000003861)) as incidents_per_sqmi
 FROM fma_shapes s
 INNER JOIN
 incident i
   ON
   s.fma = i.fmarespcomp
  INNER JOIN incsitfound
    ON i.incsitfoundprm_id = incsitfound.incsitfound_id
  LEFT JOIN incsitfoundsub
    ON incsitfound.incsitfoundsub_id = incsitfoundsub.incsitfoundsub_id
  LEFT JOIN incsitfoundclass
    ON incsitfoundsub.incsitfoundclass_id = incsitfoundclass.incsitfoundclass_id 
 WHERE incsitfoundclass.incsitfoundclass_id = 3
 GROUP BY s.fma, s.geom
 ORDER BY incidents_per_sqmi DESC;



In [13]:
df3

Unnamed: 0,fma,fma_area_mi,num_incidents,incidents_per_sqmi
0,1,1.024407,33984,33174.303996
1,21,0.952173,9233,9696.765143
2,4,2.367524,20167,8518.181895
3,3,2.444352,19821,8108.896807
4,13,3.335275,20171,6047.776661
5,7,5.70456,32937,5773.80201
6,11,5.306509,22245,4192.021463
7,30,4.163065,16836,4044.136235
8,19,4.188032,15518,3705.320635
9,9,3.45164,11703,3390.561845


In [14]:
# look at non-medical incidents
QUERY4='''select s.fma, ST_Area(s.geom::geography)*(.0000003861) as fma_area_mi, count(i.incident_id) as num_incidents,
count(i.incident_id)/(ST_Area(s.geom::geography)*(.0000003861)) as incidents_per_sqmi
 FROM fma_shapes s
 INNER JOIN
 incident i
   ON
   s.fma = i.fmarespcomp
  INNER JOIN incsitfound
    ON i.incsitfoundprm_id = incsitfound.incsitfound_id
  LEFT JOIN incsitfoundsub
    ON incsitfound.incsitfoundsub_id = incsitfoundsub.incsitfoundsub_id
  LEFT JOIN incsitfoundclass
    ON incsitfoundsub.incsitfoundclass_id = incsitfoundclass.incsitfoundclass_id 
 WHERE incsitfoundclass.incsitfoundclass_id != 3
 GROUP BY s.fma, s.geom
 ORDER BY incidents_per_sqmi DESC;
'''

In [15]:
df4 = pquery(QUERY4)

Opened database successfully
SQL QUERY = select s.fma, ST_Area(s.geom::geography)*(.0000003861) as fma_area_mi, count(i.incident_id) as num_incidents,
count(i.incident_id)/(ST_Area(s.geom::geography)*(.0000003861)) as incidents_per_sqmi
 FROM fma_shapes s
 INNER JOIN
 incident i
   ON
   s.fma = i.fmarespcomp
  INNER JOIN incsitfound
    ON i.incsitfoundprm_id = incsitfound.incsitfound_id
  LEFT JOIN incsitfoundsub
    ON incsitfound.incsitfoundsub_id = incsitfoundsub.incsitfoundsub_id
  LEFT JOIN incsitfoundclass
    ON incsitfoundsub.incsitfoundclass_id = incsitfoundclass.incsitfoundclass_id 
 WHERE incsitfoundclass.incsitfoundclass_id != 3
 GROUP BY s.fma, s.geom
 ORDER BY incidents_per_sqmi DESC;



In [16]:
df4

Unnamed: 0,fma,fma_area_mi,num_incidents,incidents_per_sqmi
0,1,1.024407,8762,8553.238336
1,3,2.444352,10695,4375.392329
2,4,2.367524,9124,3853.815223
3,21,0.952173,3299,3464.705752
4,13,3.335275,9127,2736.505755
5,7,5.70456,9353,1639.56554
6,11,5.306509,7575,1427.492137
7,19,4.188032,5637,1345.978375
8,9,3.45164,4574,1325.167041
9,25,5.0031,5978,1194.859293


In [17]:
# look at just fire/explosion incidents
QUERY5='''select s.fma, ST_Area(s.geom::geography)*(.0000003861) as fma_area_mi, count(i.incident_id) as num_incidents,
count(i.incident_id)/(ST_Area(s.geom::geography)*(.0000003861)) as incidents_per_sqmi
 FROM fma_shapes s
 INNER JOIN
 incident i
   ON
   s.fma = i.fmarespcomp
  INNER JOIN incsitfound
    ON i.incsitfoundprm_id = incsitfound.incsitfound_id
  LEFT JOIN incsitfoundsub
    ON incsitfound.incsitfoundsub_id = incsitfoundsub.incsitfoundsub_id
  LEFT JOIN incsitfoundclass
    ON incsitfoundsub.incsitfoundclass_id = incsitfoundclass.incsitfoundclass_id 
 WHERE incsitfoundclass.incsitfoundclass_id = 1
 GROUP BY s.fma, s.geom
 ORDER BY incidents_per_sqmi DESC;
'''

In [18]:
df5 = pquery(QUERY5)

Opened database successfully
SQL QUERY = select s.fma, ST_Area(s.geom::geography)*(.0000003861) as fma_area_mi, count(i.incident_id) as num_incidents,
count(i.incident_id)/(ST_Area(s.geom::geography)*(.0000003861)) as incidents_per_sqmi
 FROM fma_shapes s
 INNER JOIN
 incident i
   ON
   s.fma = i.fmarespcomp
  INNER JOIN incsitfound
    ON i.incsitfoundprm_id = incsitfound.incsitfound_id
  LEFT JOIN incsitfoundsub
    ON incsitfound.incsitfoundsub_id = incsitfoundsub.incsitfoundsub_id
  LEFT JOIN incsitfoundclass
    ON incsitfoundsub.incsitfoundclass_id = incsitfoundclass.incsitfoundclass_id 
 WHERE incsitfoundclass.incsitfoundclass_id = 1
 GROUP BY s.fma, s.geom
 ORDER BY incidents_per_sqmi DESC;



In [19]:
df5

Unnamed: 0,fma,fma_area_mi,num_incidents,incidents_per_sqmi
0,1,1.024407,820,800.462844
1,21,0.952173,556,583.927371
2,3,2.444352,897,366.968389
3,4,2.367524,812,342.974349
4,13,3.335275,936,280.636506
5,7,5.70456,1182,207.202659
6,11,5.306509,1008,189.955389
7,9,3.45164,587,170.064069
8,19,4.188032,653,155.920504
9,23,2.002927,297,148.283016


In [20]:
# inner join df3 and df5 together on 'fma'
joined = df3.join(df5.set_index('fma'), on='fma', how = 'inner',lsuffix='_med', rsuffix='_fire')
joined['med_fire_ratio'] = joined['num_incidents_med']/joined['num_incidents_fire'] #add ratio of medical:fire incidents
joined.sort_values(by='med_fire_ratio',ascending=False)

Unnamed: 0,fma,fma_area_mi_med,num_incidents_med,incidents_per_sqmi_med,fma_area_mi_fire,num_incidents_fire,incidents_per_sqmi_fire,med_fire_ratio
0,1,1.024407,33984,33174.303996,1.024407,820,800.462844,41.443902
7,30,4.163065,16836,4044.136235,4.163065,448,107.613034,37.580357
18,29,6.610662,10540,1594.39406,6.610662,355,53.701128,29.690141
19,16,2.177882,3260,1496.867396,2.177882,113,51.885281,28.849558
5,7,5.70456,32937,5773.80201,5.70456,1182,207.202659,27.865482
11,31,4.220988,12722,3013.985995,4.220988,468,110.874504,27.183761
21,18,6.162694,8852,1436.3849,6.162694,330,53.548014,26.824242
10,25,5.0031,16688,3335.532265,5.0031,631,126.121816,26.44691
2,4,2.367524,20167,8518.181895,2.367524,812,342.974349,24.836207
23,5,4.047208,5300,1309.544835,4.047208,217,53.617213,24.423963


In [26]:
# export csv to data folder for import to db
df_import = joined.sort_values(by='med_fire_ratio',ascending=False)
df_import.pop('fma_area_mi_fire') # delete the redundant data column
#df_import 
df_import.to_csv("responseTimeMetricsData/incidents_persqmi_fma.csv")