# 2.3 with Traffic violations


In [1]:
from sqlalchemy import create_engine
import psycopg2 as db
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import requests
import json
import plotly.express as px
import dash
from datetime import timedelta

In [2]:
postPass=os.environ["POSTGRES_PASS"]

In [3]:
engine = create_engine("postgresql+psycopg2://{user}:{pw}@localhost/{db}"
.format(user="jupyter", pw=postPass, db="expunge"))

## Creating new conviction rate table

In [4]:
con_query = """
SELECT race, c.fips, COUNT(*) AS con_people, f.locality
FROM charges c
LEFT JOIN fips f
    ON CAST(c.fips AS INT) = f.fips
WHERE ((disposition_code='Guilty' OR disposition_code='Guilty In Absentia')) AND not (code_section='MISSING')
GROUP BY race, c.fips, f.locality
ORDER BY con_people desc

"""

convictions = pd.read_sql(con_query, con=engine)

In [5]:
 convictions 

Unnamed: 0,race,fips,con_people,locality
0,White,059,212992,Fairfax
1,White,810,175586,Virginia Beach
2,Black,810,143274,Virginia Beach
3,White,153,127434,Prince William
4,Black,087,122573,Henrico
...,...,...,...,...
818,Asian or Pacific Islander,640,1,Galax
819,American Indian or Alaskan Native,029,1,Buckingham
820,Black,703,1,Newport News
821,American Indian or Alaskan Native,103,1,Lancaster


In [6]:
charged_query = """
SELECT race, c.fips, COUNT(*) AS charged_people, f.locality
FROM charges c
LEFT JOIN fips f
    ON CAST (c.fips AS INT) = f.fips
WHERE not (code_section='MISSING')
GROUP BY race, c.fips, f.locality
ORDER BY charged_people desc
"""

charged = pd.read_sql(charged_query, con=engine)

In [7]:
charged 

Unnamed: 0,race,fips,charged_people,locality
0,White,059,347197,Fairfax
1,White,810,256646,Virginia Beach
2,Black,810,206968,Virginia Beach
3,White,153,198657,Prince William
4,Black,087,183674,Henrico
...,...,...,...,...
822,American Indian or Alaskan Native,045,1,Craig
823,American Indian or Alaskan Native,027,1,Buchanan
824,American Indian or Alaskan Native,133,1,Northumberland
825,American Indian or Alaskan Native,620,1,Franklin


In [8]:
con_charge_table = pd.merge(convictions, charged, on=['race', 'fips', 'locality'])

In [9]:
conviction_rate_without_codes = con_charge_table.assign(con_rate = con_charge_table.con_people/ con_charge_table.charged_people)

In [10]:
conviction_rate_without_codes.sort_values(by='con_rate', ascending = False)
#conviction_rate['nontraffic'] = ['46.' not in x for x in conviction_rate['code_section']]
#nontraffic = ['46.' not in x for x in conviction_rate['code_section']]
#conviction_rate.loc[nontraffic].query("charged_people > 500").sort_values(by='con_rate', ascending = False)
conviction_rate_without_codes.to_sql('conviction_rate_without_codes', con=engine, chunksize=1000, if_exists='replace', index=False)

In [12]:
query = """
SELECT race AS Race, fips as FIPS, locality AS Jurisdiction, con_people AS Total_Convictions, charged_people AS Total_Charges, con_rate AS Conviction_Rate
FROM conviction_rate_without_codes
WHERE charged_people >= 30 AND not race='Unknown'
GROUP BY race, fips, con_people, locality, charged_people, con_rate
ORDER BY con_rate DESC

"""

traffic_cr = pd.read_sql(query, con=engine)

In [13]:
traffic_cr

Unnamed: 0,race,fips,jurisdiction,total_convictions,total_charges,conviction_rate
0,American Indian or Alaskan Native,025,Brunswick,382,387,0.987080
1,Asian or Pacific Islander,025,Brunswick,1222,1242,0.983897
2,American Indian or Alaskan Native,021,Bland,48,49,0.979592
3,Asian or Pacific Islander,081,Greensville,559,571,0.978984
4,Hispanic,081,Greensville,3086,3159,0.976891
...,...,...,...,...,...,...
583,White,122,Montgomery/Blacksburg,2196,6318,0.347578
584,Asian or Pacific Islander,701,Newport News,163,496,0.328629
585,Asian or Pacific Islander,750,Radford,48,147,0.326531
586,Asian or Pacific Islander,761,Richmond,164,642,0.255452


# Conviction Rate Map for Black People


In [14]:
Black_cr_query = """
SELECT race AS Race, CAST(fips AS INT) as FIPS, locality AS Jurisdiction, con_people AS Total_Convictions, charged_people AS Total_Charges, con_rate AS Conviction_Rate
FROM conviction_rate_without_codes
WHERE charged_people >= 30 
GROUP BY race, fips, con_people, locality, charged_people, con_rate
ORDER BY con_rate DESC

"""

Black_conviction_rate = pd.read_sql(Black_cr_query, con=engine)

In [15]:
Black_conviction_rate.conviction_rate = Black_conviction_rate.conviction_rate.round(2)

Unnamed: 0,race,fips,jurisdiction,total_convictions,total_charges,conviction_rate
0,Black,25,Brunswick,41065,44560,0.92
1,Black,81,Greensville,40630,44730,0.91
2,Black,21,Bland,3155,3510,0.90
3,Black,173,Smyth,8292,9468,0.88
4,Black,183,Sussex,28449,32565,0.87
...,...,...,...,...,...,...
134,Black,620,Franklin,2890,5943,0.49
135,Black,51,Dickenson,47,103,0.46
136,Black,185,Tazewell,2659,5952,0.45
137,Black,761,Richmond,27501,70811,0.39


In [21]:
cr_query = """
SELECT race AS Race, CAST(fips AS INT) as FIPS, locality AS Jurisdiction, con_people AS Total_Convictions, charged_people AS Total_Charges, con_rate AS Conviction_Rate
FROM conviction_rate_without_codes
WHERE charged_people >= 30 AND NOT race='Unknown'
GROUP BY race, fips, con_people, locality, charged_people, con_rate
ORDER BY con_rate DESC

"""

all_conviction_rate = pd.read_sql(cr_query, con=engine)

In [22]:
all_conviction_rate.conviction_rate = all_conviction_rate.conviction_rate.round(2)

In [24]:
all_conviction_rate.to_csv('all_conviction_rate.csv', index=False)

In [None]:
Black_cr_query = """
SELECT race AS Race, CAST(fips AS INT) as FIPS, locality AS Jurisdiction, con_people AS Total_Convictions, charged_people AS Total_Charges, con_rate AS Conviction_Rate
FROM conviction_rate_without_codes
WHERE charged_people >= 30 AND race='Black'
GROUP BY race, fips, con_people, locality, charged_people, con_rate
ORDER BY con_rate DESC
LIMIT 20
"""
table_black_cr = pd.read_sql(Black_cr_query, con=engine)

In [None]:
table_black_cr.conviction_rate = Black_conviction_rate.conviction_rate.round(2)
table_black_cr

In [None]:
Black_conviction_rate['fips'] = Black_conviction_rate['fips'] + 51000
Black_conviction_rate['fips'] = Black_conviction_rate['fips'].astype('int').astype('str')

In [None]:
r = requests.get('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json')
counties = json.loads(r.text)
# plt.xticks(rotation='vertical’) if you want to do that

In [None]:
fig1 = px.choropleth(Black_conviction_rate, geojson=counties, locations='fips', 
                    color='conviction_rate',
                    color_continuous_scale="Viridis",
                    scope="usa",
                    labels={'jurisdiction':'conviction_rate'},
                     hover_name= 'jurisdiction'
                   )
fig1.update_geos(fitbounds="locations")
fig1.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig1.show()
plt.show(fig1)
#plt.sns.show()
%matplotlib inline

# Conviction Rate for Hispanic People

In [None]:
Hisp_cr_query = """
SELECT race AS Race, CAST(fips AS INT) as FIPS, locality AS Jurisdiction, con_people AS Total_Convictions, charged_people AS Total_Charges, con_rate AS Conviction_Rate
FROM conviction_rate_without_codes
WHERE charged_people >= 30 AND race='Hispanic'
GROUP BY race, fips, con_people, locality, charged_people, con_rate
ORDER BY con_rate DESC

"""

hisp_conviction_rate = pd.read_sql(Hisp_cr_query, con=engine)

In [None]:
hisp_conviction_rate['fips'] = hisp_conviction_rate['fips'] + 51000
hisp_conviction_rate['fips'] = hisp_conviction_rate['fips'].astype('int').astype('str')

In [None]:
hisp_conviction_rate.conviction_rate = hisp_conviction_rate.conviction_rate.round(2)
hisp_conviction_rate.head(10)

In [None]:
fig2 = px.choropleth(hisp_conviction_rate, geojson=counties, locations='fips', 
                    color='conviction_rate',
                    color_continuous_scale="Viridis",
                    scope="usa",
                    labels={'jurisdiction':'conviction_rate'},
                     hover_name= 'jurisdiction'
                   )
fig2.update_geos(fitbounds="locations")
fig2.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig2.show()
plt.show(fig2)
#plt.sns.show()
%matplotlib inline