# Import Libraries

In [1]:
%%capture
!pip3 install numpy pandas

In [2]:
import numpy as np, pandas as pd
import math
import os

## Graph Plotting Libraries

In [3]:
%%capture
!pip3 install matplotlib seaborn plotly

In [4]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly
plotly.offline.init_notebook_mode (connected = True)

### Calendar Heapmap

In [5]:
%%capture
!pip3 install calmap
import calmap

### Geospatial libraries

In [6]:
%%capture
!pip3 install geopandas geopy folium

In [7]:
import geopandas as gpd
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

In [8]:
%matplotlib inline

# Load Dataset

## Load Shooting data

In [9]:
df = pd.read_csv('data/fatal-police-shootings-data.csv', parse_dates=['date'])
df.head()

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera
0,3,Tim Elliot,2015-01-02,shot,gun,53.0,M,A,Shelton,WA,True,attack,Not fleeing,False
1,4,Lewis Lee Lembke,2015-01-02,shot,gun,47.0,M,W,Aloha,OR,False,attack,Not fleeing,False
2,5,John Paul Quintero,2015-01-03,shot and Tasered,unarmed,23.0,M,H,Wichita,KS,False,other,Not fleeing,False
3,8,Matthew Hoffman,2015-01-04,shot,toy weapon,32.0,M,W,San Francisco,CA,True,attack,Not fleeing,False
4,9,Michael Rodriguez,2015-01-04,shot,nail gun,39.0,M,H,Evans,CO,False,attack,Not fleeing,False


## Load City Data

In [10]:
city = pd.read_csv("data/cities.csv")

In [11]:
city.head()

Unnamed: 0.1,Unnamed: 0,city,state,address,geom,latitude,longitude
0,0,Shelton,WA,"Shelton,WA,USA","(47.2150945, -123.1007066)",47.215094,-123.100707
1,1,Aloha,OR,"Aloha,OR,USA","(45.4942838, -122.8670451)",45.494284,-122.867045
2,2,Wichita,KS,"Wichita,KS,USA","(37.6922361, -97.3375448)",37.692236,-97.337545
3,3,San Francisco,CA,"San Francisco,CA,USA","(37.7790262, -122.4199061)",37.779026,-122.419906
4,4,Evans,CO,"Evans,CO,USA","(40.3763701, -104.6921874)",40.37637,-104.692187


In [12]:
city.drop(["Unnamed: 0","geom","address"], axis=1, inplace=True)

## Merge City Data (latitude, longitude) with shooting dataset

In [13]:
df_city = pd.merge(df, city, on=["city","state"])

In [14]:
df_city.head()

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera,latitude,longitude
0,3,Tim Elliot,2015-01-02,shot,gun,53.0,M,A,Shelton,WA,True,attack,Not fleeing,False,47.215094,-123.100707
1,5685,Kathryn R. Hale,2020-03-24,shot,vehicle,32.0,F,W,Shelton,WA,False,attack,Car,False,47.215094,-123.100707
2,4,Lewis Lee Lembke,2015-01-02,shot,gun,47.0,M,W,Aloha,OR,False,attack,Not fleeing,False,45.494284,-122.867045
3,890,Phyllis Ilene Jepsen,2015-10-02,shot,knife,55.0,F,W,Aloha,OR,True,other,Not fleeing,False,45.494284,-122.867045
4,5,John Paul Quintero,2015-01-03,shot and Tasered,unarmed,23.0,M,H,Wichita,KS,False,other,Not fleeing,False,37.692236,-97.337545


# Data Preprocessing

## See Datatypes

In [15]:
df_city.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5388 entries, 0 to 5387
Data columns (total 16 columns):
id                         5388 non-null int64
name                       5388 non-null object
date                       5388 non-null datetime64[ns]
manner_of_death            5388 non-null object
armed                      5163 non-null object
age                        5154 non-null float64
gender                     5386 non-null object
race                       4871 non-null object
city                       5388 non-null object
state                      5388 non-null object
signs_of_mental_illness    5388 non-null bool
threat_level               5388 non-null object
flee                       5139 non-null object
body_camera                5388 non-null bool
latitude                   5388 non-null float64
longitude                  5388 non-null float64
dtypes: bool(2), datetime64[ns](1), float64(3), int64(1), object(9)
memory usage: 641.9+ KB


## Check if null values exists

In [16]:
df_city.isnull().sum()

id                           0
name                         0
date                         0
manner_of_death              0
armed                      225
age                        234
gender                       2
race                       517
city                         0
state                        0
signs_of_mental_illness      0
threat_level                 0
flee                       249
body_camera                  0
latitude                     0
longitude                    0
dtype: int64

## Drop rows with null values

In [17]:
df_city = df_city.dropna(axis=0)
df_city.isnull().sum()

id                         0
name                       0
date                       0
manner_of_death            0
armed                      0
age                        0
gender                     0
race                       0
city                       0
state                      0
signs_of_mental_illness    0
threat_level               0
flee                       0
body_camera                0
latitude                   0
longitude                  0
dtype: int64

In [18]:
df.head()

Unnamed: 0,id,name,date,manner_of_death,armed,age,gender,race,city,state,signs_of_mental_illness,threat_level,flee,body_camera
0,3,Tim Elliot,2015-01-02,shot,gun,53.0,M,A,Shelton,WA,True,attack,Not fleeing,False
1,4,Lewis Lee Lembke,2015-01-02,shot,gun,47.0,M,W,Aloha,OR,False,attack,Not fleeing,False
2,5,John Paul Quintero,2015-01-03,shot and Tasered,unarmed,23.0,M,H,Wichita,KS,False,other,Not fleeing,False
3,8,Matthew Hoffman,2015-01-04,shot,toy weapon,32.0,M,W,San Francisco,CA,True,attack,Not fleeing,False
4,9,Michael Rodriguez,2015-01-04,shot,nail gun,39.0,M,H,Evans,CO,False,attack,Not fleeing,False
