# Creation of a 911 emergency response db


In [1]:
import os.path

# first let's load up the dataset
# the unzipped csv is too large for github so teammates will need to unzip it locally before continuing

assert os.path.isfile("911.csv"), "You must unzip the raw data before continuing"



In [2]:
import pandas as pd

# load the data into a pandas dataframe

df = pd.read_csv("911.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 663522 entries, 0 to 663521
Data columns (total 9 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   lat        663522 non-null  float64
 1   lng        663522 non-null  float64
 2   desc       663522 non-null  object 
 3   zip        583323 non-null  float64
 4   title      663522 non-null  object 
 5   timeStamp  663522 non-null  object 
 6   twp        663229 non-null  object 
 7   addr       663522 non-null  object 
 8   e          663522 non-null  int64  
dtypes: float64(3), int64(1), object(5)
memory usage: 45.6+ MB


In [3]:
# let's take a look at the first few rows'
df.head()

Unnamed: 0,lat,lng,desc,zip,title,timeStamp,twp,addr,e
0,40.297876,-75.581294,REINDEER CT & DEAD END; NEW HANOVER; Station ...,19525.0,EMS: BACK PAINS/INJURY,2015-12-10 17:10:52,NEW HANOVER,REINDEER CT & DEAD END,1
1,40.258061,-75.26468,BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP...,19446.0,EMS: DIABETIC EMERGENCY,2015-12-10 17:29:21,HATFIELD TOWNSHIP,BRIAR PATH & WHITEMARSH LN,1
2,40.121182,-75.351975,HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St...,19401.0,Fire: GAS-ODOR/LEAK,2015-12-10 14:39:21,NORRISTOWN,HAWS AVE,1
3,40.116153,-75.343513,AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;...,19401.0,EMS: CARDIAC EMERGENCY,2015-12-10 16:47:36,NORRISTOWN,AIRY ST & SWEDE ST,1
4,40.251492,-75.60335,CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S...,,EMS: DIZZINESS,2015-12-10 16:56:52,LOWER POTTSGROVE,CHERRYWOOD CT & DEAD END,1


In [4]:
df["e"].value_counts()

e
1    663522
Name: count, dtype: int64

In [5]:
# I want to develop some more nuance in the title column.
# perhaps can split on the colon (:) to determine the category of call

def get_type(title):
    return title.split(":")[0].strip()


df["type"] = df["title"].apply(get_type)
df["type"].value_counts()

type
EMS        332692
Traffic    230208
Fire       100622
Name: count, dtype: int64

In [6]:
# now let's clean the title up (i.e., remove the category)

def clean_title(title):
    txt = title.split(":")[-1].strip()
    if txt[-1] == "-":
        txt = txt[:-1].strip()
    return txt


df["clean_title"] = df["title"].apply(clean_title)
df["clean_title"].value_counts()

clean_title
VEHICLE ACCIDENT         184749
DISABLED VEHICLE          47917
FIRE ALARM                38452
FALL VICTIM               34683
RESPIRATORY EMERGENCY     34250
                          ...  
SUICIDE ATTEMPT               2
HIT + RUN                     1
PRISONER IN CUSTODY           1
ANIMAL COMPLAINT              1
FOOT PATROL                   1
Name: count, Length: 88, dtype: int64

In [7]:
# what's going on with the "RESCUE" titles?

mask = df["title"].str.contains("RESCUE")
df.loc[mask, :]

Unnamed: 0,lat,lng,desc,zip,title,timeStamp,twp,addr,e,type,clean_title
354,40.216190,-75.238794,ARBOUR GREEN CIR & CLOVER LEAF LN; MONTGOMERY;...,18936.0,Fire: RESCUE - GENERAL,2015-12-11 16:32:13,MONTGOMERY,ARBOUR GREEN CIR & CLOVER LEAF LN,1,Fire,RESCUE - GENERAL
356,40.216190,-75.238794,ARBOUR GREEN CIR & CLOVER LEAF LN; MONTGOMERY...,18936.0,EMS: RESCUE - GENERAL,2015-12-11 16:32:27,MONTGOMERY,ARBOUR GREEN CIR & CLOVER LEAF LN,1,EMS,RESCUE - GENERAL
523,40.089604,-75.148337,BARKER RD & GREENWOOD AVE; CHELTENHAM; Statio...,19095.0,EMS: RESCUE - GENERAL,2015-12-12 03:03:57,CHELTENHAM,BARKER RD & GREENWOOD AVE,1,EMS,RESCUE - GENERAL
524,40.089604,-75.148337,BARKER RD & GREENWOOD AVE; CHELTENHAM; 2015-12...,19095.0,Fire: RESCUE - GENERAL,2015-12-12 03:03:46,CHELTENHAM,BARKER RD & GREENWOOD AVE,1,Fire,RESCUE - GENERAL
579,40.182048,-75.481795,HOPWOOD RD & LONGACRE DR; UPPER PROVIDENCE; 20...,19426.0,Fire: RESCUE - ELEVATOR,2015-12-12 09:44:56,UPPER PROVIDENCE,HOPWOOD RD & LONGACRE DR,1,Fire,RESCUE - ELEVATOR
...,...,...,...,...,...,...,...,...,...,...,...
660126,40.250806,-75.682868,FAIRVIEW ST & JEFFERSON ST; WEST POTTSGROVE; 2...,19464.0,Fire: RESCUE - GENERAL,2020-07-19 22:46:44,WEST POTTSGROVE,FAIRVIEW ST & JEFFERSON ST,1,Fire,RESCUE - GENERAL
660586,40.121497,-75.339905,WALNUT ST & DEAD END; NORRISTOWN; Station 308...,,EMS: RESCUE - GENERAL,2020-07-21 11:59:54,NORRISTOWN,WALNUT ST & DEAD END,1,EMS,RESCUE - GENERAL
660587,40.121497,-75.339905,WALNUT ST & DEAD END; NORRISTOWN; 2020-07-21 @...,,Fire: RESCUE - GENERAL,2020-07-21 11:59:49,NORRISTOWN,WALNUT ST & DEAD END,1,Fire,RESCUE - GENERAL
662058,40.375774,-75.487864,WALNUT ST & W 6TH ST; RED HILL; Station 369; ...,18076.0,EMS: RESCUE - GENERAL,2020-07-25 08:48:52,RED HILL,WALNUT ST & W 6TH ST,1,EMS,RESCUE - GENERAL


In [8]:
import numpy as np


# also, I want to clean up the zip column. It should be an int

def to_int(x):
    try:
        return int(x)
    except:
        return np.nan


df["zip"] = df["zip"].apply(to_int)
df["zip"]

0         19525.0
1         19446.0
2         19401.0
3         19401.0
4             NaN
           ...   
663517    19403.0
663518    19403.0
663519    19041.0
663520    19401.0
663521    19041.0
Name: zip, Length: 663522, dtype: float64

In [9]:
df.head()

Unnamed: 0,lat,lng,desc,zip,title,timeStamp,twp,addr,e,type,clean_title
0,40.297876,-75.581294,REINDEER CT & DEAD END; NEW HANOVER; Station ...,19525.0,EMS: BACK PAINS/INJURY,2015-12-10 17:10:52,NEW HANOVER,REINDEER CT & DEAD END,1,EMS,BACK PAINS/INJURY
1,40.258061,-75.26468,BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP...,19446.0,EMS: DIABETIC EMERGENCY,2015-12-10 17:29:21,HATFIELD TOWNSHIP,BRIAR PATH & WHITEMARSH LN,1,EMS,DIABETIC EMERGENCY
2,40.121182,-75.351975,HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St...,19401.0,Fire: GAS-ODOR/LEAK,2015-12-10 14:39:21,NORRISTOWN,HAWS AVE,1,Fire,GAS-ODOR/LEAK
3,40.116153,-75.343513,AIRY ST & SWEDE ST; NORRISTOWN; Station 308A;...,19401.0,EMS: CARDIAC EMERGENCY,2015-12-10 16:47:36,NORRISTOWN,AIRY ST & SWEDE ST,1,EMS,CARDIAC EMERGENCY
4,40.251492,-75.60335,CHERRYWOOD CT & DEAD END; LOWER POTTSGROVE; S...,,EMS: DIZZINESS,2015-12-10 16:56:52,LOWER POTTSGROVE,CHERRYWOOD CT & DEAD END,1,EMS,DIZZINESS


In [10]:
import re




In [11]:
item = "station av"
x = re.search(r'\bstation\b\s[\bave\b|"cr"]', item)
print(x)


<re.Match object; span=(0, 9), match='station a'>


In [12]:
import os

import pandas as pd

os.chdir("../application")

os.environ["DJANGO_SETTINGS_MODULE"] = "ERDash.settings"
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

import django

django.setup()

from dashboard import models

In [16]:
# first create administrative areas
areas = df.loc[:, ["zip", "twp"]].value_counts()

create_list = list()
for zip_code, name in areas.index:
    name = name.upper()
    try:
        zip_code = int(zip_code)
    except:
        zip_code = None
    area = models.AdministrativeArea( name=name)

    create_list.append(area)


models.AdministrativeArea.objects.bulk_create(create_list[:])

[<AdministrativeArea: AdministrativeArea object (2236)>,
 <AdministrativeArea: AdministrativeArea object (2237)>,
 <AdministrativeArea: AdministrativeArea object (2238)>,
 <AdministrativeArea: AdministrativeArea object (2239)>,
 <AdministrativeArea: AdministrativeArea object (2240)>,
 <AdministrativeArea: AdministrativeArea object (2241)>,
 <AdministrativeArea: AdministrativeArea object (2242)>,
 <AdministrativeArea: AdministrativeArea object (2243)>,
 <AdministrativeArea: AdministrativeArea object (2244)>,
 <AdministrativeArea: AdministrativeArea object (2245)>,
 <AdministrativeArea: AdministrativeArea object (2246)>,
 <AdministrativeArea: AdministrativeArea object (2247)>,
 <AdministrativeArea: AdministrativeArea object (2248)>,
 <AdministrativeArea: AdministrativeArea object (2249)>,
 <AdministrativeArea: AdministrativeArea object (2250)>,
 <AdministrativeArea: AdministrativeArea object (2251)>,
 <AdministrativeArea: AdministrativeArea object (2252)>,
 <AdministrativeArea: Administr

In [15]:
models.AdministrativeArea.objects.create(name="hello")

<AdministrativeArea: AdministrativeArea object (2235)>

In [None]:
models.AdministrativeArea.objects.all().delete()
for item in areas.index:
    print(item)