In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import seaborn as sns

In [None]:
# Load the roster data
roster = pd.read_csv('../final/roster.csv')
roster.head()

In [None]:
# Load the salary data
salary  = pd.read_csv('../final/salary.csv')
salary.head()

In [None]:
# create a mapping of position shortnames
positions_short = {'POLICE OFFICER' : 'Officer', 
             'POLICE OFFICER (ASSIGNED AS DETECTIVE)' : 'Detective',
             'SERGEANT' : 'Sergeant', 
             'LIEUTENANT' : 'Lieutenant', 
             'CAPTAIN' : 'Captain', 
             'COMMANDER' : 'Commander', 
             'DEPUTY CHIEF' : 'Dep. Chief', 
             'CHIEF' : 'Chief',
             'DEPUTY SUPERINTENDENT' : 'Dep. Sup.', 
             'FIRST DEPUTY SUPERINTENDENT' : '1st Dep. Sup.'}

# create a map of shortnames of races
races_short = {'WHITE' : 'White', 
              'BLACK' : 'Black',
              'BLACK HISPANIC' : 'Bl. Hisp.',
              'WHITE HISPANIC' : 'Wh. Hisp.',
              'ASIAN/PACIFIC ISLANDER' : 'Asian/P.I.',
              'AMER IND/ALASKAN NATIVE' : 'Indig.',
              'Other' : 'Other'}

In [None]:
# build a map of UID -> (race, gender)
trait_map = {}
traits = ['race', 'gender']

for i in range(roster.shape[0]):
    trait_map[roster.uid.iloc[i]] = {tr : roster[tr].iloc[i] for tr in traits}

# for each trait, add a column to the awards data using the map
for tr in traits:
    salary[tr] = salary.uid.map(lambda x : trait_map[x][tr])
    
# in the below plots, Asian/Pacific Islander, Indigenous, and Black Hispanic categories are too small to visualize individually.
# group them into "Other"
salary.race = salary.race.map(lambda x : 'Other' if x in ['ASIAN/PACIFIC ISLANDER', 'AMER IND/ALASKAN NATIVE', 'BLACK HISPANIC'] else x)
       
salary

In [None]:
import warnings
warnings.filterwarnings('ignore')

# plot salary vs years in posn for these positions
positions = ['POLICE OFFICER', 'POLICE OFFICER (ASSIGNED AS DETECTIVE)', 'SERGEANT', 'LIEUTENANT', 'COMMANDER']

plt.figure(figsize = (10,5.5))
for posn in positions:
    # restrict to the particular position
    rows = salary[salary.position_description == posn]
    
    # compute the number of years the officer has spent in their current position for each record
    rows['posn_start_year'] = list(map(lambda x : x.year, pd.to_datetime(rows.present_posn_start_date.values)))
    rows['years_in_posn'] = rows['year'] - rows['posn_start_year']   

    # remove very old entries
    rows = rows[rows.years_in_posn <= 30]
    # aggregate and compute stats
    agg = rows.groupby('years_in_posn').salary.agg(['mean', 'std', 'count'])
    agg.reset_index(inplace=True)
    # only plot when there were a large enough group of officers (at least 3)
    agg = agg[agg['count'] >= 3]
    plt.errorbar(agg.index, agg['mean'], agg['std'], label=positions_short[posn], lw = 3)
plt.xlabel('Years in Position', fontsize = 30)
plt.ylabel('Salary (USD)', fontsize = 30)
plt.xticks(fontsize = 25)
plt.yticks(fontsize = 25)

plt.legend(fontsize = 18)
plt.tight_layout()
plt.savefig('../doc/figs/salary.pdf', bbox_inches = 'tight', dpi = 1000)
plt.show()

In [None]:
# plot salary all positions across race + gender
sns.set_style('whitegrid')
plt.figure(figsize = (7,5))
tmp = salary.race.copy()
salary.race = salary.race.map(races_short)
sns.boxplot(x='race', y='salary', hue='gender', data=salary)
salary.race = tmp
plt.xlabel('CPD Race Category', fontsize = 20)
plt.ylabel('Salary (USD)', fontsize = 20)
plt.yticks(fontsize = 20)
plt.xticks(rotation=45, fontsize = 20, ha='right', rotation_mode="anchor")
plt.legend(fontsize = 20)
plt.tight_layout()
plt.savefig('../doc/figs/salary_by_race_gender.pdf')
plt.show()

In [None]:
# restrict to the above posns
salary = salary[salary.position_description.isin(positions_short.keys())]

# in the below plots, Lieutenant/Commander/Captain and (deputy) chief/superintendent get grouped
salary.position_description = salary.position_description.map(lambda x : 'Cpt/Cmd/Lt' if x in ['COMMANDER', 'LIEUTENANT', 'CAPTAIN'] else x)
salary.position_description = salary.position_description.map(lambda x : 'Chief/Super' if x in ['CHIEF', 'DEPUTY CHIEF', 'DEPUTY SUPERINTENDENT', 'FIRST DEPUTY SUPERINTENDENT'] else x)
positions_short['Cpt/Cmd/Lt'] = 'Cpt/Cmd/Lt'
positions_short['Chief/Super'] = 'Chief/Super'
plot_positions = ['POLICE OFFICER', 
                   'POLICE OFFICER (ASSIGNED AS DETECTIVE)',
                   'SERGEANT', 
                   'Cpt/Cmd/Lt', 
                   'Chief/Super']
    
#     
sal = salary.groupby(['gender', 'position_description']).agg(['count'])
sal.reset_index(inplace=True)
sal['count'] = sal['uid']['count']
# restrict the dataframe to just (gender, posn, count)
sal = sal[['gender', 'position_description', 'count']]

# fix tuple colnames
sal.columns = sal.columns.map(''.join)
# fill in missing rows with count = 0
for gender in sal.gender.unique():
    for posn in plot_positions:
        if sal[(sal.gender == gender) & (sal.position_description == posn)].shape[0] == 0:
            sal = sal.append({'gender' : gender, 'position_description' : posn, 'count' : 0}, ignore_index=True)

sal['frac'] = 0.
for posn in plot_positions:
    salp = sal[sal.position_description == posn]
    ct = salp['count'].sum()
    salp['frac'] = salp['count']/ct
    sal.loc[sal.position_description == posn, 'frac'] = salp['frac'].values

# sort by rank
sorter = dict(zip(plot_positions, range(len(plot_positions))))
sal['rank'] = sal['position_description'].map(sorter)
sal.sort_values(['rank'], ascending = [True], inplace = True)
sal.reset_index(inplace=True)
sal.drop('index', axis=1, inplace=True)

# rename some of the longer titles
sal.position_description = sal.position_description.map(positions_short)
sal

In [None]:
plt.figure(figsize = (7,5))
plt.plot(sal[sal.gender == 'F'].position_description, sal[sal.gender=='F'].frac, lw = 3, label='F')
plt.plot(sal[sal.gender == 'M'].position_description, sal[sal.gender=='M'].frac, lw = 3, label='M')
plt.legend(loc='best', fontsize = 20)
plt.xlabel('Position Title', fontsize = 30)
plt.ylabel('Fraction of Officers', fontsize = 20)
plt.xticks(rotation=45, ha='right', rotation_mode="anchor", fontsize = 20)
plt.yticks(fontsize = 20)
plt.ylim([0,1])
plt.tight_layout()
plt.savefig('../doc/figs/position_gender.pdf')
plt.show()

In [None]:
sal = salary.groupby(['race', 'position_description']).agg(['count'])
sal.reset_index(inplace=True)
sal['count'] = sal['uid']['count']
# restrict the dataframe to just (race, gender, count)
sal = sal[['race', 'position_description', 'count']]
# restrict to the above posns
sal = sal[sal.position_description.isin(plot_positions)]
# fix tuple colnames
sal.columns = sal.columns.map(''.join)
# fill in missing rows with count = 0
for race in sal.race.unique():
    for posn in plot_positions:
        if sal[(sal.race == race) & (sal.position_description == posn)].shape[0] == 0:
            sal = sal.append({'race' : race, 'position_description' : posn, 'count' : 0}, ignore_index=True)

sal['frac'] = 0.
for posn in positions_short.keys():
    salp = sal[sal.position_description == posn]
    ct = salp['count'].sum()
    salp['frac'] = salp['count']/ct
    sal.loc[sal.position_description == posn, 'frac'] = salp['frac'].values
   
# sort by rank
sorter = dict(zip(plot_positions, range(len(plot_positions))))
sal['rank'] = sal['position_description'].map(sorter)
sal.sort_values(['rank'], ascending = [True], inplace = True)
sal.reset_index(inplace=True)
sal.drop('index', axis=1, inplace=True)

# rename some of the longer titles
sal.position_description = sal.position_description.map(positions_short)
sal

In [None]:
plt.figure(figsize = (7,5))
for race in sal.race.unique():
    plt.plot(sal[sal.race == race].position_description, sal[sal.race == race].frac, label=races_short[race], lw = 3)
plt.legend(loc='upper center', fontsize = 20, ncol = 2)
plt.xlabel('Position Title', fontsize = 30)
plt.ylabel('Fraction of Officers', fontsize = 20)
plt.yticks(fontsize = 20)
plt.ylim([0,1.1])
plt.xticks(rotation=45, ha='right', rotation_mode="anchor", fontsize = 20)
plt.tight_layout()
plt.savefig('../doc/figs/position_race.pdf')
plt.show()