In [None]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
import squarify
sns.set_theme()

In [None]:
pd.options.mode.chained_assignment = None

In [None]:
pd.set_option('display.max_rows',25)
pd.set_option('display.max_colwidth', 500)

In [None]:
with open('../data/data_final_dtypes.json', 'r') as f:
    dtypes = json.load(f)

In [None]:
df = pd.read_csv('../data/data_final.csv', dtype=dtypes)

In [None]:
df.date = pd.to_datetime(df.date)

In [None]:
c_w =sns.color_palette("deep")[9] # white
c_b =sns.color_palette("deep")[4] # black
c_na =sns.color_palette("deep")[1] # native american
c_a =sns.color_palette("deep")[2] # asian
c_me =sns.color_palette("deep")[3] # middle eastern
c_h =sns.color_palette("deep")[6] # hispanic

In [None]:
def change_width(ax, new_value) :
    for patch in ax.patches :
        current_width = patch.get_width()
        diff = current_width - new_value
        # we change the bar width
        patch.set_width(new_value)
        # we recenter the bar
        patch.set_x(patch.get_x() + diff * .5)

In [None]:
# total deaths per race 2000-2021
plt.rcParams['figure.figsize'] = 10,8
sns.set(font_scale=1.5, style='dark')
plt.style.use("dark_background")

fig = sns.countplot(data=df, x='race', 
                    order = df['race'].value_counts().index, 
                    palette=[c_w,c_b,c_h,c_a,c_na,c_me])

values = df['race'].value_counts().values
values = [x for x in values.tolist()]
pos = range(len(values))
for tick,label in zip(pos,fig.get_xticklabels()):
    fig.text(pos[tick], values[tick]+170, values[tick], horizontalalignment='center', size=15, color=(1,1,1), weight='semibold')

        
change_width(fig, .7)
        
fig.set(xticklabels=['white','black','hispanic','asian/\npacific islander','native\n american','middle eastern'])
plt.xlabel('', size=30)
plt.ylabel('', size=30)
plt.title('Total Deaths per race', size=20)
#plt.xticks(rotation=30)

plt.savefig('../presentation/total_deaths_per__race.png', dpi=200)
plt.savefig('../presentation/total_deaths_per__race_transparent.png', dpi=200, transparent=True)

In [None]:
# Race population percentages in US: https://en.wikipedia.org/wiki/Demographics_of_the_United_States#Race
race_perc = {
'white':0.615,
'black':0.123,
'hispanic':0.176,
'asian':0.053,
'native american':0.007,
'middle eastern':0.003,}

In [None]:
race_counts = df.race.value_counts()
race_counts = pd.DataFrame(race_counts)
race_counts.rename(index={'asian/pacific islander': 'asian'}, inplace=True)
race_counts['pop_perc']=race_counts.index.map(race_perc)
# Average per million of population killed every year by police per race (average total population 310m 2000-2020)
race_counts['death_perc']=race_counts.race*1000000/(310000000*race_counts.pop_perc*20)

In [None]:
plt.rcParams['figure.figsize'] = 10,8
sns.set(font_scale=1.5, style='dark')
plt.style.use("dark_background")

fig = sns.barplot(data=race_counts, x=race_counts.index, y='death_perc',
                  order = race_counts['death_perc'].sort_values(ascending=False).index, 
                  palette=[c_b,c_na,c_h,c_w,c_me,c_a])

values = race_counts['death_perc'].sort_values(ascending=False).values.round(2)
values = [x for x in values.tolist()]
pos = range(len(values))
for tick,label in zip(pos,fig.get_xticklabels()):
    fig.text(pos[tick], values[tick]+0.1, values[tick], horizontalalignment='center', size=15, color=(1,1,1), weight='semibold')

plt.xlabel('', size=30)
plt.ylabel('Deaths per million', size=20)
fig.set(xticklabels=['black','native\n american','hispanic','white','middle eastern','asian/\npacific islander'])
change_width(fig, .7)
plt.title('Average yearly deaths / population of each race', size=20)

plt.savefig('../presentation/average_yearly_deaths_per_population_each_race.png', dpi=200)
plt.savefig('../presentation/average_yearly_deaths_per_population_each_race_transparent.png', dpi=200, transparent=True)

In [None]:
# Gender
data = [29318, 3048]
labels = ['Male', 'Female']
plt.rcParams['figure.figsize'] = 10,10
sns.set(font_scale=3, style='dark')
plt.style.use("dark_background")

plt.pie(data, labels = labels, colors =['b','r'] , autopct='%.0f%%')

plt.title('Total deaths: 32391', size=30)

plt.savefig('../presentation/gender_pie.png', dpi=200)
plt.savefig('../presentation/gender_pie_transparent.png', dpi=200, transparent=True)

In [None]:
# Age distribution of deaths of each race.
age_race = df.pivot_table(index = df.index, columns=['race'], values='age')
age_race = age_race[age_race<=90]
age_race = age_race[['white','black','hispanic','asian/pacific islander','native american','middle eastern']]

plt.rcParams['figure.figsize'] = 19,10
sns.set(font_scale=2.5, style='dark')
plt.style.use("dark_background")

fig = sns.violinplot(data=age_race,cut=1, palette=[c_w,c_b,c_h,c_a,c_na,c_me])

plt.title('Age distribution of deaths', size=30)
plt.xlabel('', size=30)
plt.ylabel('Age', size=30)
fig.set(yticks=range(0,95,10))

# Calculate median to position labels
medians = age_race.median().values
nobs = medians
nobs = [str(int(x)) for x in nobs.tolist()]
nobs = ["       " + i for i in nobs] 

# Add text to the figure
pos = range(len(nobs))
for tick, label in zip(pos, fig.get_xticklabels()):
    fig.text(pos[tick], medians[tick]-0.8, nobs[tick],
            horizontalalignment='center',
            size=20,
            color=(1,1,1),
            weight='semibold')
    
fig.set(xticklabels=['white','black','hispanic','asian/\npacific islander','native\n american','middle eastern'])
#plt.xticks(rotation=30)

plt.savefig('../presentation/age_destribution_per_race.png', dpi=200)
plt.savefig('../presentation/age_destribution_per_race_transparent.png', dpi=200, transparent=True)

In [None]:
yearly_deaths

In [None]:
df['year']=pd.DatetimeIndex(df['date']).year
yearly_deaths = df.groupby('year').count().race.drop(2021)

plt.rcParams['figure.figsize'] = 19,10
sns.set(font_scale=2, style='dark')
plt.style.use("dark_background")

fig = sns.lineplot(data=yearly_deaths, linewidth = 5)

fig.set(xticks=range(2000,2021,1))
plt.xlabel(' ', size=30)
plt.xticks(rotation=90)
plt.ylabel(' ', size=30)
plt.title('Total deaths per year', size=30)

plt.savefig('../presentation/total_deaths_per_year.png', dpi=200)
plt.savefig('../presentation/total_deaths_per_year_transparent.png', dpi=200, transparent=True)

In [None]:
ho = list(a.sum().sort_values(ascending=False).index)

In [None]:
a = pd.crosstab(df['year'], df['race']).drop(2021)
plt.rcParams['figure.figsize'] = 19,10
sns.set(font_scale=2, style='dark')
plt.style.use("dark_background")

fig = sns.lineplot(data=a, linewidth = 6.5, palette=[c_w,c_b,c_h,c_a,c_na,c_me], hue_order = ho)

fig.set(xticks=range(2000,2021,1))
plt.xlabel('', size=30)
plt.xticks(rotation=90)
plt.ylabel('', size=30)
plt.title('Deaths per year per race', size=30)

plt.savefig('../presentation/deaths_per_race_per_year.png', dpi=200)
plt.savefig('../presentation/deaths_per_race_per_year_transparent.png', dpi=200, transparent=True)

In [None]:
sns.color_palette('deep')

In [None]:
color=(sns.color_palette('deep')[7],sns.color_palette('deep')[2],sns.color_palette('deep')[9],sns.color_palette('deep')[3],sns.color_palette('deep')[8])

In [None]:
# What were the official dispositions of killings? (Were the officers prosecuted?)
data = df.disposition.value_counts()
data.justified = data.justified+data['criminal charges']+data['civil suit']
data.accidental = data.accidental+data.suicide+data['medical emergency']
data.drop(labels=['suicide','criminal charges','civil suit','medical emergency'], inplace=True)
data.index = ['Pending investigation','Justified  / Criminal charges acquitted','Accidental /\n Suicide /\n Medical \nemergency',' ',' ']
color=(sns.color_palette('deep')[7],sns.color_palette('deep')[2],sns.color_palette('deep')[9],sns.color_palette('deep')[3],sns.color_palette('deep')[8])

plt.rcParams['figure.figsize'] = 19,10
sns.set(font_scale = 3)

squarify.plot(data, label = data.index, color=color)

#plt.title('Dispositions', size=30)
plt.axis('off')
plt.tight_layout()
plt.savefig('../presentation/disposistions_boxplot.png', dpi=200)


In [None]:
df2=df.copy()
df2['death_cat']=''
df2.death_cat[df2.death_cause=='gunshot']='Gunshot'
df2.death_cat[df2.death_cause=='vehicle']='Vehicle'
df2.death_cat[df2.death_cause=='taser']='Taser/ Gas/\n Pepper spray'
df2.death_cat[df2.death_cause=='medical emergency']='Medical\n emergency \ne.g. heart attack'
df2.death_cat[df2.death_cause=='asphyxia']='Beaten/ Stabbed/\n Restrained'
df2.death_cat[df2.death_cause=='drowned']='Accidental'
df2.death_cat[df2.death_cause=='beaten']='Beaten/ Stabbed/\n Restrained'
df2.death_cat[df2.death_cause=='drug overdose']='Drug\n overdose'
df2.death_cat[df2.death_cause=='fell from a height']='Accidental'
df2.death_cat[df2.death_cause=='stabbed']='Beaten/ Stabbed/\n Restrained'
df2.death_cat[df2.death_cause=='burned/smoke inhalation']='Accidental'
df2.death_cat[df2.death_cause=='chemical']='Taser/ Gas/\n Pepper spray'
df2.death_cat[df2.death_cause=='restraint']='Beaten/ Stabbed/\n Restrained'
df2.death_cat[df2.death_cause=='beanbag']='Gunshot'
df2.death_cat[df2.death_cause=='bomb']='Gunshot'
df2 = df2[df2.death_cat != '']
df2=df2.groupby('death_cat')['name'].count()
df2 = 100*df2 / df2.sum()
df2 = df2.reset_index().sort_values('name', ascending=False)

In [None]:
sns.set(font_scale=1.5, style='dark')
plt.style.use("dark_background")
plt.figure(figsize=(30, 20))

sns.catplot(x='name', y='death_cat' , orient='h', kind="bar", palette=sns.color_palette('deep'), data=df2, aspect=1.5, height=8)

plt.ylabel('')
plt.xlabel('%')
#plt.title('Cause of death', size=20)

plt.savefig('../presentation/cause_of_deaths_catplot.png', dpi=200)
plt.savefig('../presentation/cause_of_deaths_catplot_transparent.png', dpi=200, transparent=True)
