In [1]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf

import plotly.graph_objects as go

In [2]:
df = pd.read_csv("../input/montcoalert/911.csv")

### Understanding the columns of the data

In [3]:
# Looking at some data

df.head(3)

Unnamed: 0,lat,lng,desc,zip,title,timeStamp,twp,addr,e
0,40.297876,-75.581294,REINDEER CT & DEAD END; NEW HANOVER; Station ...,19525.0,EMS: BACK PAINS/INJURY,2015-12-10 17:10:52,NEW HANOVER,REINDEER CT & DEAD END,1
1,40.258061,-75.26468,BRIAR PATH & WHITEMARSH LN; HATFIELD TOWNSHIP...,19446.0,EMS: DIABETIC EMERGENCY,2015-12-10 17:29:21,HATFIELD TOWNSHIP,BRIAR PATH & WHITEMARSH LN,1
2,40.121182,-75.351975,HAWS AVE; NORRISTOWN; 2015-12-10 @ 14:39:21-St...,19401.0,Fire: GAS-ODOR/LEAK,2015-12-10 14:39:21,NORRISTOWN,HAWS AVE,1


Now lets look at the columns

In [4]:
df.columns

Index(['lat', 'lng', 'desc', 'zip', 'title', 'timeStamp', 'twp', 'addr', 'e'], dtype='object')

We shall not be dealing with the desc, zip, addr. So lets remove them

In [5]:
df = df[['lat', 'lng', 'title', 'timeStamp', 'twp', 'e']]

df.head(3)

Unnamed: 0,lat,lng,title,timeStamp,twp,e
0,40.297876,-75.581294,EMS: BACK PAINS/INJURY,2015-12-10 17:10:52,NEW HANOVER,1
1,40.258061,-75.26468,EMS: DIABETIC EMERGENCY,2015-12-10 17:29:21,HATFIELD TOWNSHIP,1
2,40.121182,-75.351975,Fire: GAS-ODOR/LEAK,2015-12-10 14:39:21,NORRISTOWN,1


Check for the nulls


In [6]:
print(df.isnull().sum())
print(len(df))

lat            0
lng            0
title          0
timeStamp      0
twp          293
e              0
dtype: int64
663522


There are 663522 rows. If we remove the rows with twp null, it wont be much harm. After all, I cannot get data for these rows. 
Other option would be to introduce the "Unspecified" town here.

In [7]:
df['twp'].fillna(value="Unspecified", inplace = True)

In [8]:
df.isnull().sum()

lat          0
lng          0
title        0
timeStamp    0
twp          0
e            0
dtype: int64

Handling values end here!

In [9]:
df['title'].unique()

array(['EMS: BACK PAINS/INJURY', 'EMS: DIABETIC EMERGENCY',
       'Fire: GAS-ODOR/LEAK', 'EMS: CARDIAC EMERGENCY', 'EMS: DIZZINESS',
       'EMS: HEAD INJURY', 'EMS: NAUSEA/VOMITING',
       'EMS: RESPIRATORY EMERGENCY', 'EMS: SYNCOPAL EPISODE',
       'Traffic: VEHICLE ACCIDENT -', 'EMS: VEHICLE ACCIDENT',
       'Traffic: DISABLED VEHICLE -', 'Fire: APPLIANCE FIRE',
       'EMS: GENERAL WEAKNESS', 'Fire: CARBON MONOXIDE DETECTOR',
       'EMS: UNKNOWN MEDICAL EMERGENCY', 'EMS: UNRESPONSIVE SUBJECT',
       'Fire: VEHICLE ACCIDENT', 'EMS: ALTERED MENTAL STATUS',
       'Fire: FIRE ALARM', 'EMS: CVA/STROKE',
       'Traffic: ROAD OBSTRUCTION -', 'EMS: SUBJECT IN PAIN',
       'EMS: HEMORRHAGING', 'EMS: FALL VICTIM', 'EMS: ASSAULT VICTIM',
       'EMS: SEIZURES', 'EMS: MEDICAL ALERT ALARM',
       'EMS: ABDOMINAL PAINS', 'Fire: PUMP DETAIL',
       'Fire: FIRE INVESTIGATION', 'EMS: OVERDOSE', 'EMS: MATERNITY',
       'EMS: UNCONSCIOUS SUBJECT', 'EMS: CHOKING', 'EMS: LACERATIONS',
     

IF we see above, we can see that title first displays the type of call and then information. We need to segregate this information

In [10]:
df['type'] = df['title'].apply(lambda title: title.split(":")[0])
df['type explanation'] = df['title'].apply(lambda title: title.split(":")[1])

Now we need to do something with time.

In [11]:
type(df['timeStamp'].iloc[0])

str

We do not want to have it in string. So lets convert it to date time.

In [12]:
df['timeStamp'] = pd.to_datetime(df['timeStamp'])

In [13]:
time = df['timeStamp'].iloc[0]

In [14]:
df.set_index(df['timeStamp'])

Unnamed: 0_level_0,lat,lng,title,timeStamp,twp,e,type,type explanation
timeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-12-10 17:10:52,40.297876,-75.581294,EMS: BACK PAINS/INJURY,2015-12-10 17:10:52,NEW HANOVER,1,EMS,BACK PAINS/INJURY
2015-12-10 17:29:21,40.258061,-75.264680,EMS: DIABETIC EMERGENCY,2015-12-10 17:29:21,HATFIELD TOWNSHIP,1,EMS,DIABETIC EMERGENCY
2015-12-10 14:39:21,40.121182,-75.351975,Fire: GAS-ODOR/LEAK,2015-12-10 14:39:21,NORRISTOWN,1,Fire,GAS-ODOR/LEAK
2015-12-10 16:47:36,40.116153,-75.343513,EMS: CARDIAC EMERGENCY,2015-12-10 16:47:36,NORRISTOWN,1,EMS,CARDIAC EMERGENCY
2015-12-10 16:56:52,40.251492,-75.603350,EMS: DIZZINESS,2015-12-10 16:56:52,LOWER POTTSGROVE,1,EMS,DIZZINESS
...,...,...,...,...,...,...,...,...
2020-07-29 15:46:51,40.157956,-75.348060,Traffic: VEHICLE ACCIDENT -,2020-07-29 15:46:51,EAST NORRITON,1,Traffic,VEHICLE ACCIDENT -
2020-07-29 15:52:19,40.136306,-75.428697,EMS: GENERAL WEAKNESS,2020-07-29 15:52:19,LOWER PROVIDENCE,1,EMS,GENERAL WEAKNESS
2020-07-29 15:52:52,40.013779,-75.300835,EMS: VEHICLE ACCIDENT,2020-07-29 15:52:52,LOWER MERION,1,EMS,VEHICLE ACCIDENT
2020-07-29 15:54:08,40.121603,-75.351437,Fire: BUILDING FIRE,2020-07-29 15:54:08,NORRISTOWN,1,Fire,BUILDING FIRE


In [15]:
df['Day of Week'] = df['timeStamp'].apply(lambda time: time.dayofweek)
df['Day of Week'] = df['Day of Week'].map({0: 'Monday', 1: 'Tuesday', 2: 'Wednesday', 3: 'Thursday', 4: 'Friday', 5: 'Saturday', 6: 'Sunday'})
df['Month No'] = df['timeStamp'].apply(lambda time: time.month)
df['Month'] = df['Month No'].map({1: 'January', 2: 'Febuary', 3: 'March', 4: 'April', 5: 'May', 6: 'June',7: 'July', 8: 'August', 9: 'September', 10: 'October', 11: 'November',12: 'December'})
df['Hour'] = df['timeStamp'].apply(lambda time: time.hour)

In [16]:
df.loc[(df.Hour >= 6) & (df.Hour < 12) , 'Time of Day'] = 'Morning'
df.loc[(df.Hour >= 12) & (df.Hour < 15) , 'Time of Day'] = 'Afternoon'
df.loc[(df.Hour >= 15) & (df.Hour < 18) , 'Time of Day'] = 'Evening'
df.loc[(df.Hour >= 18) | (df.Hour < 6) , 'Time of Day'] = 'Night'

#### Plot Data Geographically

We plot the data geographically just to see which visually from where calls are made more

In [17]:
init_notebook_mode(connected=True)
cf.go_offline()

### Exploring Type

First lets see which type of calls are received most

In [18]:
df['type'].value_counts().iplot(kind='bar', )

Now lets see pick one by one each type and see its trends per day

In [19]:
type_vs_days = pd.crosstab(df['type'], df['Day of Week'])

In [20]:
type_vs_days.iplot(kind='bar')

#type_vs_days

#### Type vs Day of Time

In [21]:
type_vs_daytime = pd.crosstab(df['Time of Day'], df['type'])

In [22]:
type_vs_daytime.iplot(kind="bar")

#### Type vs Months

In [23]:
type_vs_month = pd.crosstab(df['Month No'], df['type'] ) #df['Month']


months = ["January", "Febuary", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
type_vs_month['Month'] = months



In [24]:
type_vs_month

type,EMS,Fire,Traffic,Month
Month No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,30970,9087,22279,January
2,28329,8092,19006,Febuary
3,29989,9135,20903,March
4,28485,8132,17054,April
5,29473,8691,19345,May
6,29512,9879,20643,June
7,30027,9665,20140,July
8,24410,7598,16351,August
9,23982,7258,15973,September
10,25025,7683,18532,October


In [25]:
type_vs_month.iplot(x="Month")

### Type VS Hour

In [26]:
type_vs_hour = pd.crosstab(df['Hour'], df['type'] ) #df['Month']

In [27]:
type_vs_hour.iplot()

### Finally we have the Plot for the Geography

In [28]:
geo_data = df.groupby(['lat', 'lng']).size().reset_index().rename(columns={0: "no of calls"})


fig = go.Figure(data=go.Scattergeo(
        locationmode = 'USA-states',
        lon = geo_data['lng'],
        lat = geo_data['lat'],
        text = geo_data['no of calls'],
        mode = 'markers',
        marker = dict(
            size = 2,
            opacity = 0.8,
            reversescale = True,
            autocolorscale = False,
            line = dict(
                width=1,
                color='rgba(102, 102, 102)'
            ),
            colorscale = 'Blues',
             cmin = 0,
            color = geo_data['no of calls'],
            cmax = geo_data['no of calls'].max(),
            colorbar_title="Plot of the Calls"
        )))

fig.update_layout(
        title = 'Hover for County Name',
        geo = dict(
            scope='usa',
            projection_type='albers usa',
            showland = True,
            landcolor = "rgb(250, 250, 250)",
            subunitcolor = "rgb(217, 217, 217)",
            countrycolor = "rgb(217, 217, 217)",
            countrywidth = 0.5,
            subunitwidth = 0.5
        ),
    )
fig.show()