In [None]:
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook
import matplotlib.pyplot as plt
from matplotlib import rc as pltrc
from matplotlib.gridspec import GridSpec

def flat_list(list_of_lists):
    '''
    Input:
        list_of_lists <list> takes as input a list of lists
    Output:
        Outputs a flattened list
    '''
    return [item for sublist in list_of_lists for item in sublist]

def convert_to_year(range_time, low, high):

    delta_time = high-low
    full_date = low + range_time*delta_time
    return full_date.year

In [None]:
roster = pd.read_csv('../final/roster.csv')
roster.head()

In [None]:
appointment_dates = pd.to_datetime(roster['appointment_date'].values)
resignation_dates = pd.to_datetime(roster['resignation_date'].values)

pltrc('xtick', labelsize= 25) 
pltrc('ytick', labelsize= 25) 

plt.figure(figsize = (21,5))


ax = plt.subplot(131)
ax.xaxis.set_major_locator(plt.MaxNLocator(5))
ax.yaxis.set_major_locator(plt.MaxNLocator(6))

plt.hist(appointment_dates.year, bins = 20)
plt.xlabel('Year', fontsize = 28)
plt.ylabel('# Appointed Officers', fontsize = 26)

ax = plt.subplot(132)
ax.xaxis.set_major_locator(plt.MaxNLocator(5))
ax.yaxis.set_major_locator(plt.MaxNLocator(6))


plt.hist(resignation_dates.year, bins=20)
plt.xlabel('Year', fontsize = 28)
plt.ylabel('# Resigned Officers', fontsize = 26)
    
ax = plt.subplot(133)
ax.xaxis.set_major_locator(plt.MaxNLocator(5))
ax.yaxis.set_major_locator(plt.MaxNLocator(6))

first_appointment_date, last_appointment_date = min(appointment_dates), max(appointment_dates)
delta_time = last_appointment_date - first_appointment_date # largest delta time
actives = (pd.to_datetime(roster['appointment_date'].values) - first_appointment_date)/delta_time
inactives = (pd.to_datetime(roster['resignation_date'].values) - first_appointment_date)/delta_time
inactives = np.nan_to_num(inactives, nan=1)
how_many_active = []
range_time = np.linspace(0,1,50)
years = [convert_to_year(r, first_appointment_date, last_appointment_date) for r in range_time]
for r in range_time:
    how_many_active.append(np.sum((actives<r)*(inactives>r)))

plt.plot(years[1:-1], how_many_active[1:-1], lw = 3)
plt.ylabel('# Active officers', fontsize = 26)
plt.xlabel('Year', fontsize = 28)

plt.tight_layout()
plt.savefig('../doc/figs/history.pdf', dpi=1000, bbox_inches = 'tight')
plt.show()

In [None]:
roster_by = roster.drop(roster[roster['birthyear'] > 1998].index, inplace = False)
yearz = roster_by['birthyear'].sort_values().dropna()
min_year = min(yearz.values) 
max_year = max(yearz.values)
width_year_windows = 5

binned = np.bincount((yearz - min_year)//width_year_windows)
bins = np.arange(min_year, max_year, width_year_windows, dtype = int)
bins_text = [str(bins[t])+'-'+str(bins[t]+width_year_windows-1)[-2:] for t in range(len(bins))]

act_roster = roster_by.fillna({'resignation_date':'2019-01-01'})
act_roster.drop(act_roster[act_roster['resignation_date'] < '2019-01-01'].index, inplace = True)
act_yearz = act_roster['birthyear'].sort_values().dropna()
act_binned = np.bincount((act_yearz - min_year)//width_year_windows)

In [None]:
gender_count, race_count, birthyear_count, status_count = roster['gender'].value_counts(), roster['race'].value_counts(), roster['birthyear'].sort_values().value_counts(), roster['status'].value_counts()
gender_count, race_count

In [None]:
act_gender_count, act_race_count, act_birthyear_count, act_status_count = act_roster['gender'].value_counts(), act_roster['race'].value_counts(), act_roster['birthyear'].sort_values().value_counts(), act_roster['status'].value_counts()
act_gender_count, act_race_count

In [None]:
plt.figure(figsize = (21,2.5))
ax = plt.subplot(111)

plt.scatter(bins-1, binned, color = 'blue')
plt.vlines(x = bins-1, ymin = np.zeros(len(bins)), ymax = binned, color = 'blue')

plt.scatter(bins+1, act_binned, color = 'r', label = 'Not resigned as of Jan 1st 2019')
plt.vlines(x = bins+1, ymin = np.zeros(len(bins)), ymax = act_binned, color = 'r')

plt.xticks(bins, bins_text)
plt.setp(ax.xaxis.get_majorticklabels(),rotation=25, ha="right", rotation_mode="anchor") 
plt.ylabel('# Officers', fontsize = 25)
plt.xlabel('Birthyear', fontsize = 25)
plt.xticks(fontsize = 25)
plt.yticks(fontsize = 25)
plt.legend(fontsize = 20, loc = 'upper left')
plt.savefig('../doc/figs/history_by.pdf', dpi=1000, bbox_inches = 'tight')
plt.show()