## NC Traffic Stop Data
https://opendatapolicingnc.com/

In [1]:
## Python packages - you may have to pip install sqlalchemy, sqlalchemy_utils, and psycopg2.
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import psycopg2
import pandas as pd

In [2]:
dbname = 'traffic_stops_nc'
username = 'along528'
pswd = 'password'
con = None
con = psycopg2.connect(database = dbname, user = username, host='localhost', password=pswd)

# Stops by all races and ethnicities by year

In [None]:
sql_query = """
SELECT count(person_id),
       p.race,
       extract(YEAR FROM s.date) AS year
FROM nc_person p
JOIN nc_stop s ON p.stop_id = s.stop_id
WHERE p.type='D'
  AND s.agency_id = 78
GROUP BY p.race,
         year
ORDER BY year ASC, p.race DESC;
"""
stops = pd.read_sql_query(sql_query,con)
stops

# Likelihood-of-search by stop-reason

In [None]:
sql_query = """
SELECT count(se.person_id),
       p.race,
       s.purpose,
       extract(YEAR FROM s.date) AS year
FROM  nc_person p
JOIN nc_stop s ON p.stop_id = s.stop_id
JOIN nc_search se ON s.stop_id = se.stop_id
WHERE p.type='D'
  AND s.agency_id = 78
GROUP BY p.race,
         s.purpose,
         year
ORDER BY year ASC,
         s.purpose ASC,
         p.race DESC;
"""
search = pd.read_sql_query(sql_query,con)
search

# Use-of-force

In [None]:
sql_query="""
SELECT count(se.person_id),
       p.race,
       extract(YEAR FROM s.date) AS year
FROM nc_person p
JOIN nc_stop s ON p.stop_id = s.stop_id
JOIN nc_search se ON s.stop_id = se.stop_id
WHERE p.type='D'
  AND s.agency_id = 78
  AND s.engage_force = 't'
GROUP BY p.race,
         year
ORDER BY p.race DESC,
         year ASC;
"""
force = pd.read_sql_query(sql_query,con)
force

# Search rate

In [None]:
sql_query="""
SELECT count(se.person_id),
       p.race,
       extract(YEAR FROM s.date) AS year
FROM nc_person p
JOIN nc_stop s ON p.stop_id = s.stop_id
JOIN nc_search se ON s.stop_id = se.stop_id
WHERE p.type='D'
  AND s.agency_id = 78
GROUP BY p.race,
         year
ORDER BY year ASC,
         p.race DESC;
"""
search_rate = pd.read_sql_query(sql_query,con)
search_rate

# Contraband Hit Rate

In [None]:
sql_query="""
SELECT count(c.person_id),
       p.race,
       extract(YEAR FROM s.date) AS year
FROM nc_person p
JOIN nc_stop s ON p.stop_id = s.stop_id
JOIN nc_search se ON s.stop_id = se.stop_id
JOIN nc_contraband c ON se.search_id = c.search_id
WHERE p.type='D'
  AND s.agency_id = 78
GROUP BY p.race,
         year
ORDER BY year ASC,
         p.race DESC;
"""
hit_rate = pd.read_sql_query(sql_query,con)
hit_rate

In [None]:
# The usual preamble
%matplotlib inline
#%pylab inline
import matplotlib.pyplot as plt
import numpy as np

# Make the graphs a bit prettier, and bigger
pd.set_option('display.mpl_style', 'default')

# This is necessary to show lots of columns in pandas 0.12. 
# Not necessary in pandas 0.13.
pd.set_option('display.width', 5000) 
pd.set_option('display.max_columns', 60)
pd.set_option('display.max_rows', 20)

plt.rcParams['figure.figsize'] = (15, 5)

In [None]:
stops_black = stops[stops['race']=='B']\
              .set_index('year').drop(['race'],axis=1).plot(kind='Bar')


# General Stops data

In [None]:
sql_query = """
SELECT count(s.stop_city),s.stop_city,s.agency_description,s.agency_id FROM nc_stop s
WHERE s.agency_id = 78
GROUP BY s.agency_id,s.agency_description,s.stop_city
"""
stops_gen = pd.read_sql_query(sql_query,con)
stops_gen