# Murder offenders: age, sex, race

Combines tables from the Supplemental Homicide Report (SHR) from 2003-2013 into a few different CSV files for easier analysis.

## Notes

* Ethnicity not used as the footnote (1) states that "ethnicity totals are representative of those agencies that provided ethnicity breakdowns.  Not all agencies provide ethnicity data, therefore the race and ethnicity totals will not equal" and are not reported for all years

In [107]:
%matplotlib inline
from collections import OrderedDict
import textwrap
import pandas as pd
import seaborn as sns

In [108]:
TABLE_3_FILE_MAPPING = {
    #'2000': 'cius_data/2000/table2-5_vicage00.xls', # https://ucr.fbi.gov/crime-in-the-u.s/2000
#     '2001': 'cius_data/2001/table2-5_vicage01.xls', # https://ucr.fbi.gov/crime-in-the-u.s/2001
#     '2002': 'cius_data/2002/table2-5_vicage02.xls', # https://ucr.fbi.gov/crime-in-the-u.s/2002
    '2003': 'cius_data/2003/Copy of 03tbl2-3.xls', # https://ucr.fbi.gov/crime-in-the-u.s/2003/
    '2004': 'cius_data/2004/04tbl2-3a.xls', # https://www2.fbi.gov/ucr/cius_04/offenses_reported/violent_crime/murder.html
    '2005': 'cius_data/2005/05shrtab1.xls', #https://www2.fbi.gov/ucr/05cius/offenses/expanded_information/data/documents/05shrtab1.xls
    '2006': 'cius_data/CIUS2006datatables/06shrtable3.xls',
    '2007': 'cius_data/CIUS2007datatables/07shrtbl3.xls',
    '2008': 'cius_data/CIUS2008datatables/08shrtbl03.xls',
    '2009': 'cius_data/CIUS2009datatables/09shrtbl03.xls',
    '2010': 'cius_data/CIUS2010datatables/10shrtbl03.xls',
    '2011': 'cius_data/CIUS2011datatables/SHR Tables 2011/Expanded_Homicide_Data_Table_3_Murder_Offenders_by_Age_Sex_and_Race_2011.xls',
    '2012': 'cius_data/cius2012datatables/Expanded_Homicide_Data_Table_3_Murder_Offenders_by_Age_Sex_and_Race_2012.xls',
    '2013': 'cius_data/cius2013datatables/Expanded_Homicide_Data_Table_3_Murder_Offenders_by_Age_Sex_and_Race_2013.xls'
}

YEARS = ['2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013']

In [109]:
POPULATION_FILE = 'cius_data/cius2013datatables/Table_1_Crime_in_the_United_States_by_Volume_and_Rate_per_100000_Inhabitants_1994-2013.xls'

us_population = pd.read_excel(POPULATION_FILE)
us_population = us_population.loc[3:22, ['Table 1', ' ']]
us_population.columns = ['Year', 'US Population']
us_population.loc[10, 'Year'] = 2001
us_population.loc[21, 'Year'] = 2012
us_population['Year'] = us_population['Year'].apply(str)
us_population.set_index('Year', inplace=True)
us_population.to_csv('cius_data/processed/us-population-1994-2013.csv')
us_population = us_population.loc[YEARS, :]

txt = textwrap.dedent("""\
# Population of the US

Generated from the Table_1_Crime_in_the_United_States_by_Volume_and_Rate_per_100000_Inhabitants_1994-2013.xls file in the 2013 Crime in the US report data table download
""")

with open('cius_data/processed/us-population-1994-2013.txt', 'w') as f:
    f.write(txt)


In [110]:
def offenders_by_age(df, columns):
    df = df.loc[4:, columns]
    df = df.reset_index()
    df = df.loc[:, columns]
    df.columns = ['Age bin', 'Count']
    d = OrderedDict()
    d['Total'] = df.loc[0, 'Count']
    d['Under 18'] = df.loc[2, 'Count']
    d['Under 22'] = df.loc[3, 'Count']
    d['18 and over'] = df.loc[4, 'Count']
    d['Infant (under 1)'] = df.loc[5, 'Count']
    d['1 to 4'] = df.loc[6, 'Count']
    d['5 to 8'] = df.loc[7, 'Count']
    d['9 to 12'] = df.loc[8, 'Count']
    d['13 to 16'] = df.loc[9, 'Count']
    d['17 to 19'] = df.loc[10, 'Count']
    d['20 to 24'] = df.loc[11, 'Count']
    d['25 to 29'] = df.loc[12, 'Count']
    d['30 to 34'] = df.loc[13, 'Count']
    d['35 to 39'] = df.loc[14, 'Count']
    d['40 to 44'] = df.loc[15, 'Count']
    d['45 to 49'] = df.loc[16, 'Count']
    d['50 to 54'] = df.loc[17, 'Count']
    d['55 to 59'] = df.loc[18, 'Count']
    d['60 to 64'] = df.loc[19, 'Count']
    d['65 to 69'] = df.loc[20, 'Count']
    d['70 to 74'] = df.loc[21, 'Count']
    d['75 and over'] = df.loc[22, 'Count']
    d['Unknown age'] = df.loc[23, 'Count']
    return d

def offenders_by_age_by_year(year, columns):
    file_path = TABLE_3_FILE_MAPPING[year]
    raw = pd.read_excel(file_path)
    age = offenders_by_age(raw, columns)
    return pd.DataFrame([age], index=[year], columns=age.keys())

def male_offenders_by_year(year):
    return offenders_by_age_by_year(year, ['Expanded Homicide Data Table 3', 'Unnamed: 2'])

def female_offenders_by_year(year):
    return offenders_by_age_by_year(year, ['Expanded Homicide Data Table 3', 'Unnamed: 3'])

def unknown_sex_offenders_by_year(year):
    return offenders_by_age_by_year(year, ['Expanded Homicide Data Table 3', 'Unnamed: 4'])

def white_offenders_by_year(year):
    return offenders_by_age_by_year(year, ['Expanded Homicide Data Table 2', 'Unnamed: 5'])

def black_offenders_by_year(year):
    return offenders_by_age_by_year(year, ['Expanded Homicide Data Table 2', 'Unnamed: 6'])

def other_race_offenders_by_year(year):
    return offenders_by_age_by_year(year, ['Expanded Homicide Data Table 2', 'Unnamed: 7'])

def unknown_race_offenders_by_year(year):
    return offenders_by_age_by_year(year, ['Expanded Homicide Data Table 2', 'Unnamed: 8'])

In [111]:
def process(fn):
    data = pd.concat(map(fn, YEARS))
    return us_population.join(data)

male = process(male_offenders_by_year)
female = process(female_offenders_by_year)
unknown_sex = process(unknown_sex_offenders_by_year)
white = process(white_offenders_by_year)
black = process(black_offenders_by_year)
other_race = process(other_race_offenders_by_year)
unknown_race = process(unknown_race_offenders_by_year)

In [112]:
by_sex = pd.concat([us_population, male['Total'], female['Total'], unknown_sex['Total']], axis=1)
by_sex.columns = ['US Population', 'Total male', 'Total female', 'Total unknown sex']
by_sex['Total'] = by_sex['Total male'] + by_sex['Total female'] + by_sex['Total unknown sex']

by_race = pd.concat([us_population, white['Total'], black['Total'], other_race['Total'], unknown_race['Total']], axis=1)
by_race.columns = ['US Population', 'Total white', 'Total black', 'Total other race', 'Total unknown race']
by_race['Total'] = by_race['Total white'] + by_race['Total black'] + by_race['Total other race'] + by_race['Total unknown race']

In [113]:
footnotes = [
    'Age bins such as under 18, under 22, and 18 and over do not include unknown ages'
]
files = []
for year in YEARS:
    table_3_file = TABLE_3_FILE_MAPPING[year].split('/')[-1]
    files.append('%s: %s' % (year, table_3_file))
               
txt = textwrap.dedent("""\
# Age, sex, and race for murder offenders from %s to %s

Generated from the following tables in the Supplemental Homicide Report (SHR) from the corresponding year's Crime in the US data table download:
%s

Footnotes/caveats:
%s
""" % (YEARS[0], YEARS[-1], '\n'.join(files), '\n'.join(footnotes)))

with open('cius_data/processed/shr-table-3-murder-offenders-asr/murder-offenders-age-sex-race.txt', 'w') as f:
    f.write(txt)

In [114]:
dest_path = 'cius_data/processed/shr-table-3-murder-offenders-asr/%s.csv'
male.to_csv(dest_path % 'male-offenders-by-age-and-race-2003-2013')
female.to_csv(dest_path % 'female-offenders-by-age-and-race-2003-2013')
unknown_sex.to_csv(dest_path % 'unknown-sex-offenders-by-age-and-race-2003-2013')
white.to_csv(dest_path % 'white-offenders-by-age-2003-2013')
black.to_csv(dest_path % 'black-offenders-by-age-2003-2013')
other_race.to_csv(dest_path % 'other-race-offenders-by-age-2003-2013')
unknown_race.to_csv(dest_path % 'unknown-race-offenders-by-age-2003-2013')
by_sex.to_csv(dest_path % 'offenders-by-sex-2003-2013')
by_race.to_csv(dest_path % 'offenders-by-race-2003-2013')