### Data Exploration on emergency and non-emergency dataset in Washington D.C.
* Dataset: 
    1. [All 311 data in the last 30 days (collected on 3/29/2021)](https://opendata.dc.gov/datasets/all-311-city-service-requests-last-30-days?geometry=-77.651%2C38.811%2C-76.372%2C38.998)
    2. [311 Service requests in the last 30 days assigned to the DC Fire and Emergency Medical Services agency (collected on 3/29/2021)] (https://opendata.dc.gov/datasets/fems-service-requests-in-last-30-days?geometry=-77.612%2C38.823%2C-76.334%2C39.010)

In [63]:
import numpy as np
import pandas as pd
import os
import re
import glob
import csv
EM_DATA = '/Users/julia/Box/Data_311/DMV_311/DMV_EM/DC/FEMS_Service_Requests_30days_20210329.csv'
nonEM_DATA = '/Users/julia/Box/Data_311/DMV_311/DMV_nonEM/DC/311_Washington_DC_30days_20210329.csv'

In [64]:
def read_data():
    em_df = pd.read_csv(EM_DATA)
    nonEM_df = pd.read_csv(nonEM_DATA)
    return em_df, nonEM_df
em_df, nonEM_df = read_data()

### Emergent 311 reports

In [65]:
em_df[0:5]

Unnamed: 0,X,Y,OBJECTID,SERVICECODE,SERVICECODEDESCRIPTION,SERVICETYPECODEDESCRIPTION,ORGANIZATIONACRONYM,SERVICECALLCOUNT,ADDDATE,RESOLUTIONDATE,...,STREETADDRESS,XCOORD,YCOORD,LATITUDE,LONGITUDE,CITY,STATE,ZIPCODE,MARADDRESSREPOSITORYID,WARD
0,-76.941531,38.881247,1,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/10 09:57:39+00,2021/03/16 15:17:00+00,...,4334 GORMAN TERRACE SE,405073.37,134824.32,38.881239,-76.941529,WASHINGTON,DC,20019,23028,7
1,-76.995936,38.930586,2,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/04 13:35:12+00,2021/03/05 16:11:35+00,...,3301 7TH STREET NE,400352.54,140299.69,38.930578,-76.995934,WASHINGTON,DC,20017,37180,5
2,-76.989061,38.900873,3,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/05 16:09:05+00,2021/03/09 09:46:37+00,...,1232 WYLIE STREET NE,400949.1,137001.37,38.900865,-76.989058,WASHINGTON,DC,20002,70457,6
3,-76.947104,38.869324,4,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/02/25 16:20:25+00,2021/03/04 16:20:23+00,...,4023 ALABAMA AVENUE SE,404590.6,133500.46,38.869316,-76.947101,WASHINGTON,DC,20020,27845,7
4,-76.996413,38.901537,5,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/04 11:22:43+00,2021/03/05 15:34:05+00,...,902 7TH STREET NE,400311.3,137075.07,38.90153,-76.996411,WASHINGTON,DC,20002,81728,6


### All 311 reports

In [32]:
nonEM_df[0:5]

Unnamed: 0,X,Y,OBJECTID,SERVICECODE,SERVICECODEDESCRIPTION,SERVICETYPECODEDESCRIPTION,ORGANIZATIONACRONYM,SERVICECALLCOUNT,ADDDATE,RESOLUTIONDATE,...,XCOORD,YCOORD,LATITUDE,LONGITUDE,CITY,STATE,ZIPCODE,MARADDRESSREPOSITORYID,WARD,DETAILS
0,-77.027916,38.910516,1,S0031,Bulk Collection,SWMA- Solid Waste Management Admistration,DPW,1,2021/03/02 22:53:44+00,2021/03/05 06:34:32+00,...,397578.99,138072.12,38.910508,-77.027914,WASHINGTON,DC,20005.0,225329,2.0,
1,-77.020893,38.951184,2,CONTREMO,Container Removal,SWMA- Solid Waste Management Admistration,DPW,1,2021/03/02 23:01:53+00,1970/01/01 00:00:00+00,...,398189.19,142586.47,38.951176,-77.02089,WASHINGTON,DC,20011.0,247046,4.0,
2,-76.98541,38.902046,3,SPSTDAMA,Traffic Safety Investigation,Department of Transportation,DDOT,1,2021/03/02 23:01:54+00,1970/01/01 00:00:00+00,...,401265.72,137131.62,38.902038,-76.985408,WASHINGTON,DC,20002.0,72301,5.0,
3,-77.011224,38.915367,4,S0316,Recycling Cart Delivery,SWMA- Solid Waste Management Admistration,DPW,1,2021/03/02 23:05:07+00,2021/03/18 10:22:32+00,...,399026.76,138610.36,38.91536,-77.011222,WASHINGTON,DC,20001.0,227289,5.0,
4,-76.991989,38.8791,5,S0441,Trash Collection - Missed,SWMA- Solid Waste Management Admistration,DPW,1,2021/03/08 13:09:55+00,2021/03/10 08:58:55+00,...,400695.32,134584.35,38.879092,-76.991987,WASHINGTON,DC,20003.0,75576,6.0,


### Columns names from two datasets

In [35]:
em_cols = em_df.columns.tolist()
nonEM_cols = nonEM_df.columns.tolist()
print("emergency columns:\n"+ str(em_cols) + "\nlength: "  + str( len(em_cols) )) 
print("non emergency columns:\n"+ str(nonEM_cols) +  "\nlength: "  + str( len(nonEM_cols) ))  

emergency columns:
['X', 'Y', 'OBJECTID', 'SERVICECODE', 'SERVICECODEDESCRIPTION', 'SERVICETYPECODEDESCRIPTION', 'ORGANIZATIONACRONYM', 'SERVICECALLCOUNT', 'ADDDATE', 'RESOLUTIONDATE', 'SERVICEDUEDATE', 'SERVICEORDERDATE', 'INSPECTIONFLAG', 'INSPECTIONDATE', 'INSPECTORNAME', 'SERVICEORDERSTATUS', 'STATUS_CODE', 'SERVICEREQUESTID', 'PRIORITY', 'STREETADDRESS', 'XCOORD', 'YCOORD', 'LATITUDE', 'LONGITUDE', 'CITY', 'STATE', 'ZIPCODE', 'MARADDRESSREPOSITORYID', 'WARD']
length: 29
non emergency columns:
['X', 'Y', 'OBJECTID', 'SERVICECODE', 'SERVICECODEDESCRIPTION', 'SERVICETYPECODEDESCRIPTION', 'ORGANIZATIONACRONYM', 'SERVICECALLCOUNT', 'ADDDATE', 'RESOLUTIONDATE', 'SERVICEDUEDATE', 'SERVICEORDERDATE', 'INSPECTIONFLAG', 'INSPECTIONDATE', 'INSPECTORNAME', 'SERVICEORDERSTATUS', 'STATUS_CODE', 'SERVICEREQUESTID', 'PRIORITY', 'STREETADDRESS', 'XCOORD', 'YCOORD', 'LATITUDE', 'LONGITUDE', 'CITY', 'STATE', 'ZIPCODE', 'MARADDRESSREPOSITORYID', 'WARD', 'DETAILS']
length: 30


* Details column is not included in the emergent 311 reports dataset
* OBJECT IDs are different, which means "OBJECTID" column cannot be used to merge two tables
* Merge the table using columns of locations, SERVICE CODE DESCRIPTION, ADD DATE and others..,

In [36]:
diff_cols = list(set(em_cols) ^ set(nonEM_cols))
print("different columns:\n"+ str(diff_cols))  

different columns:
['DETAILS']


In [67]:
cols = ['X', 'Y', 'SERVICECODE', 'SERVICECODEDESCRIPTION', 'SERVICETYPECODEDESCRIPTION', 'ORGANIZATIONACRONYM', 'SERVICECALLCOUNT', 'ADDDATE', 'RESOLUTIONDATE', 'SERVICEDUEDATE', 'SERVICEORDERDATE', 'INSPECTIONFLAG', 'INSPECTIONDATE', 'INSPECTORNAME', 'SERVICEORDERSTATUS', 'STATUS_CODE', 'SERVICEREQUESTID', 'PRIORITY', 'STREETADDRESS', 'XCOORD', 'YCOORD', 'LATITUDE', 'LONGITUDE', 'CITY', 'STATE', 'ZIPCODE', 'MARADDRESSREPOSITORYID', 'WARD']
# get the records that have ORGANIZATION ACRONYM == 'FEMS'
nonEM_fems = nonEM_df.loc[nonEM_df['ORGANIZATIONACRONYM'] == 'FEMS']
merged_df = pd.merge(nonEM_fems[cols], em_df[cols], how='outer', indicator=True)
merged_df

Unnamed: 0,X,Y,SERVICECODE,SERVICECODEDESCRIPTION,SERVICETYPECODEDESCRIPTION,ORGANIZATIONACRONYM,SERVICECALLCOUNT,ADDDATE,RESOLUTIONDATE,SERVICEDUEDATE,...,XCOORD,YCOORD,LATITUDE,LONGITUDE,CITY,STATE,ZIPCODE,MARADDRESSREPOSITORYID,WARD,_merge
0,-76.987107,38.912102,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/16 13:10:28+00,2021/03/18 15:05:10+00,2021/04/05 13:10:28+00,...,401118.42,138247.97,38.912095,-76.987104,WASHINGTON,DC,20002.0,54899,5.0,both
1,-76.996413,38.901537,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/04 11:22:43+00,2021/03/05 15:34:05+00,2021/03/24 12:22:43+00,...,400311.3,137075.07,38.90153,-76.996411,WASHINGTON,DC,20002.0,81728,6.0,both
2,-77.025731,38.93954,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/11 12:36:48+00,2021/03/11 15:18:11+00,2021/03/31 13:36:48+00,...,397769.45,141294.0,38.939532,-77.025728,WASHINGTON,DC,20011.0,225107,4.0,both
3,-76.941531,38.881247,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/10 09:57:39+00,2021/03/16 15:17:00+00,2021/03/30 10:57:39+00,...,405073.37,134824.32,38.881239,-76.941529,WASHINGTON,DC,20019.0,23028,7.0,both
4,-76.995936,38.930586,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/04 13:35:12+00,2021/03/05 16:11:35+00,2021/03/24 14:35:12+00,...,400352.54,140299.69,38.930578,-76.995934,WASHINGTON,DC,20017.0,37180,5.0,both
5,-76.989061,38.900873,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/05 16:09:05+00,2021/03/09 09:46:37+00,2021/03/25 17:09:05+00,...,400949.1,137001.37,38.900865,-76.989058,WASHINGTON,DC,20002.0,70457,6.0,both
6,-76.963655,38.86141,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/01 09:59:33+00,2021/03/04 16:27:15+00,2021/03/19 10:59:33+00,...,403154.64,132621.29,38.861403,-76.963652,WASHINGTON,DC,20020.0,41615,7.0,both
7,-77.000866,38.956075,FEMSFIREINSP,FEMS - Fire Safety Inspection,FEMS,FEMS,1,2021/03/17 19:30:27+00,2021/03/19 12:46:40+00,2021/04/29 19:30:27+00,...,399925.12,143129.29,38.956068,-77.000864,WASHINGTON,DC,20011.0,331773,4.0,left_only
8,-76.941562,38.870856,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/02/27 18:40:02+00,2021/03/04 16:25:06+00,2021/03/18 19:40:02+00,...,405071.4,133670.82,38.870848,-76.94156,WASHINGTON,DC,20019.0,26472,7.0,both
9,-77.001257,38.960708,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/25 17:20:50+00,1970/01/01 00:00:00+00,2021/04/14 17:20:50+00,...,399891.26,143643.59,38.960701,-77.001255,WASHINGTON,DC,20011.0,34203,4.0,left_only


### Records that appear in both dataset

In [69]:
both_df = merged_df.loc[(merged_df['_merge'] == 'both') ]
both_df

Unnamed: 0,X,Y,SERVICECODE,SERVICECODEDESCRIPTION,SERVICETYPECODEDESCRIPTION,ORGANIZATIONACRONYM,SERVICECALLCOUNT,ADDDATE,RESOLUTIONDATE,SERVICEDUEDATE,...,XCOORD,YCOORD,LATITUDE,LONGITUDE,CITY,STATE,ZIPCODE,MARADDRESSREPOSITORYID,WARD,_merge
0,-76.987107,38.912102,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/16 13:10:28+00,2021/03/18 15:05:10+00,2021/04/05 13:10:28+00,...,401118.42,138247.97,38.912095,-76.987104,WASHINGTON,DC,20002.0,54899,5.0,both
1,-76.996413,38.901537,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/04 11:22:43+00,2021/03/05 15:34:05+00,2021/03/24 12:22:43+00,...,400311.3,137075.07,38.90153,-76.996411,WASHINGTON,DC,20002.0,81728,6.0,both
2,-77.025731,38.93954,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/11 12:36:48+00,2021/03/11 15:18:11+00,2021/03/31 13:36:48+00,...,397769.45,141294.0,38.939532,-77.025728,WASHINGTON,DC,20011.0,225107,4.0,both
3,-76.941531,38.881247,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/10 09:57:39+00,2021/03/16 15:17:00+00,2021/03/30 10:57:39+00,...,405073.37,134824.32,38.881239,-76.941529,WASHINGTON,DC,20019.0,23028,7.0,both
4,-76.995936,38.930586,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/04 13:35:12+00,2021/03/05 16:11:35+00,2021/03/24 14:35:12+00,...,400352.54,140299.69,38.930578,-76.995934,WASHINGTON,DC,20017.0,37180,5.0,both
5,-76.989061,38.900873,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/05 16:09:05+00,2021/03/09 09:46:37+00,2021/03/25 17:09:05+00,...,400949.1,137001.37,38.900865,-76.989058,WASHINGTON,DC,20002.0,70457,6.0,both
6,-76.963655,38.86141,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/01 09:59:33+00,2021/03/04 16:27:15+00,2021/03/19 10:59:33+00,...,403154.64,132621.29,38.861403,-76.963652,WASHINGTON,DC,20020.0,41615,7.0,both
8,-76.941562,38.870856,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/02/27 18:40:02+00,2021/03/04 16:25:06+00,2021/03/18 19:40:02+00,...,405071.4,133670.82,38.870848,-76.94156,WASHINGTON,DC,20019.0,26472,7.0,both
13,-77.016871,38.959354,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/01 13:52:14+00,2021/03/04 16:29:52+00,2021/03/19 14:52:14+00,...,398537.96,143493.37,38.959346,-77.016869,WASHINGTON,DC,20011.0,249374,4.0,both
14,-76.998229,38.901141,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/02/28 18:37:05+00,2021/03/04 16:22:23+00,2021/03/18 19:37:05+00,...,400153.82,137031.1,38.901134,-76.998227,WASHINGTON,DC,20002.0,81162,6.0,both


### Records that is missing from the emergent 311 dataset, but have ORGANIZATION ACRONYM == 'FEMS'

In [71]:
left_df = merged_df.loc[(merged_df['_merge'] == 'left_only') ]
left_df

Unnamed: 0,X,Y,SERVICECODE,SERVICECODEDESCRIPTION,SERVICETYPECODEDESCRIPTION,ORGANIZATIONACRONYM,SERVICECALLCOUNT,ADDDATE,RESOLUTIONDATE,SERVICEDUEDATE,...,XCOORD,YCOORD,LATITUDE,LONGITUDE,CITY,STATE,ZIPCODE,MARADDRESSREPOSITORYID,WARD,_merge
7,-77.000866,38.956075,FEMSFIREINSP,FEMS - Fire Safety Inspection,FEMS,FEMS,1,2021/03/17 19:30:27+00,2021/03/19 12:46:40+00,2021/04/29 19:30:27+00,...,399925.12,143129.29,38.956068,-77.000864,WASHINGTON,DC,20011.0,331773,4.0,left_only
9,-77.001257,38.960708,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/25 17:20:50+00,1970/01/01 00:00:00+00,2021/04/14 17:20:50+00,...,399891.26,143643.59,38.960701,-77.001255,WASHINGTON,DC,20011.0,34203,4.0,left_only
10,-77.013739,38.918348,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/26 11:15:50+00,1970/01/01 00:00:00+00,2021/04/15 11:15:50+00,...,398808.77,138941.34,38.918341,-77.013736,WASHINGTON,DC,20001.0,227870,5.0,left_only
11,-77.026977,38.982168,FEMSFIREINSP,FEMS - Fire Safety Inspection,FEMS,FEMS,1,2021/03/23 11:52:38+00,2021/03/25 09:49:39+00,2021/05/05 11:52:38+00,...,397662.83,146026.13,38.98216,-77.026974,WASHINGTON,DC,20012.0,253572,4.0,left_only
12,-77.027473,38.978198,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/03/25 09:14:35+00,2021/03/25 10:35:16+00,2021/04/14 09:14:35+00,...,397619.67,145585.43,38.97819,-77.027471,WASHINGTON,DC,20012.0,253434,4.0,left_only


### Records that only appear in the emergent 311 dataset

In [72]:
right_df = merged_df.loc[(merged_df['_merge'] == 'right_only') ]
right_df

Unnamed: 0,X,Y,SERVICECODE,SERVICECODEDESCRIPTION,SERVICETYPECODEDESCRIPTION,ORGANIZATIONACRONYM,SERVICECALLCOUNT,ADDDATE,RESOLUTIONDATE,SERVICEDUEDATE,...,XCOORD,YCOORD,LATITUDE,LONGITUDE,CITY,STATE,ZIPCODE,MARADDRESSREPOSITORYID,WARD,_merge
15,-76.947104,38.869324,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/02/25 16:20:25+00,2021/03/04 16:20:23+00,2021/03/17 17:20:25+00,...,404590.6,133500.46,38.869316,-76.947101,WASHINGTON,DC,20020.0,27845,7.0,right_only
16,-77.02077,38.972563,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/02/25 10:23:06+00,2021/03/04 16:11:56+00,2021/03/17 11:23:06+00,...,398200.35,144959.8,38.972556,-77.020768,WASHINGTON,DC,20012.0,251276,4.0,right_only
17,-76.987259,38.863699,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/02/24 12:37:47+00,2021/03/04 16:31:30+00,2021/03/16 13:37:47+00,...,401105.95,132874.78,38.863691,-76.987257,WASHINGTON,DC,20020.0,150328,8.0,right_only
18,-76.919791,38.88831,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/02/25 15:41:44+00,2021/03/04 16:17:35+00,2021/03/17 16:41:44+00,...,406959.01,135609.83,38.888303,-76.919788,WASHINGTON,DC,20019.0,303544,7.0,right_only
19,-76.957704,38.886982,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/02/25 13:07:27+00,2021/03/03 15:59:14+00,2021/03/17 14:07:27+00,...,403669.79,135460.17,38.886974,-76.957702,WASHINGTON,DC,20019.0,35843,7.0,right_only
20,-77.014024,38.909865,FESAADNU,FEMS - Smoke Alarm Application,FEMS-Smoke Alarms,FEMS,1,2021/02/25 13:50:20+00,2021/03/04 16:14:19+00,2021/03/17 14:50:20+00,...,398783.91,137999.59,38.909857,-77.014021,WASHINGTON,DC,20001.0,237832,5.0,right_only
