### Load and format gun deaths data

In [9]:
import csv
import pprint

# Load crime data
crimes = list(csv.reader(open("data/guns.csv", "r")))

print(crimes[:4])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4']]


In [10]:
# Remove header
crimes = crimes[1:]

print(crimes[:4])

[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


### Gun deaths by year

In [11]:
# Group gun deaths by year
year_counts = {}
for crime in crimes:
    year = crime[1]    
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1

print(year_counts)

{'2012': 33563, '2013': 33636, '2014': 33599}


### Gun deaths by date

In [12]:
import datetime

# Group gun deaths by date (day fixed to 1)
date_counts = {}
for crime in crimes:
    try:
        date = datetime.datetime(year=int(crime[1]), month=int(crime[2]), day=1)
        if date in date_counts:
            date_counts[date] += 1
        else:
            date_counts[date] = 1
    except Exception:
        pass

print(date_counts)

{datetime.datetime(2014, 7, 1, 0, 0): 2884, datetime.datetime(2014, 9, 1, 0, 0): 2914, datetime.datetime(2014, 12, 1, 0, 0): 2857, datetime.datetime(2014, 2, 1, 0, 0): 2361, datetime.datetime(2014, 1, 1, 0, 0): 2651, datetime.datetime(2012, 6, 1, 0, 0): 2826, datetime.datetime(2012, 5, 1, 0, 0): 2999, datetime.datetime(2012, 4, 1, 0, 0): 2795, datetime.datetime(2012, 8, 1, 0, 0): 2954, datetime.datetime(2013, 11, 1, 0, 0): 2758, datetime.datetime(2014, 10, 1, 0, 0): 2865, datetime.datetime(2013, 2, 1, 0, 0): 2375, datetime.datetime(2012, 3, 1, 0, 0): 2743, datetime.datetime(2014, 11, 1, 0, 0): 2756, datetime.datetime(2014, 6, 1, 0, 0): 2931, datetime.datetime(2013, 12, 1, 0, 0): 2765, datetime.datetime(2014, 8, 1, 0, 0): 2970, datetime.datetime(2013, 5, 1, 0, 0): 2806, datetime.datetime(2013, 8, 1, 0, 0): 2859, datetime.datetime(2012, 7, 1, 0, 0): 3026, datetime.datetime(2012, 12, 1, 0, 0): 2791, datetime.datetime(2012, 1, 1, 0, 0): 2758, datetime.datetime(2012, 11, 1, 0, 0): 2729, dat

### Sex and Race

In [13]:
sex_counts = {}
race_counts = {}
homicide_race_count = {}

for crime in crimes:
    sex = crime[5]
    race = crime[7]
    intent = crime[3]
    
    if sex in sex_counts:
        sex_counts[sex] += 1
    else:
        sex_counts[sex] = 1
        
    if race in race_counts:
        race_counts[race] += 1
    else:
        race_counts[race] = 1
        
    if intent == "Homicide": 
        if race in homicide_race_count:
            homicide_race_count[race] += 1
        else:
            homicide_race_count[race] = 1
        
print(sex_counts)
print(race_counts)

{'F': 14449, 'M': 86349}
{'Native American/Native Alaskan': 917, 'Asian/Pacific Islander': 1326, 'White': 66237, 'Black': 23296, 'Hispanic': 9022}


### Load and format sensus deaths data

In [14]:
import csv
import pprint

# Load crime data
populations = list(csv.reader(open("data/census.csv", "r")))

print(populations[:4])

[['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races'], ['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]


In [15]:
# Remove header
populations = populations[1]

print(populations)

['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']


In [16]:
# Map the total population by race
mapping = {
    'Hispanic': int(populations[11]),
    'White': int(populations[10]),
    'Asian/Pacific Islander': int(populations[14]) + int(populations[15]),
    'Native American/Native Alaskan': int(populations[13]),
    'Black': int(populations[12])
}
print(mapping)

{'Hispanic': 44618105, 'Asian/Pacific Islander': 15834141, 'White': 197318956, 'Black': 40250635, 'Native American/Native Alaskan': 3739506}


### Gun deaths per 10000 people for each racial category

In [17]:
race_per_hundredk = {}

for race in race_counts:
    if race in mapping:
        proportion = race_counts[race] / mapping[race]
        race_per_hundredk[race] = proportion * 100000 # propotion by 10000 habitants
    
print(race_per_hundredk)

{'Native American/Native Alaskan': 24.521955573811088, 'Asian/Pacific Islander': 8.374309664161762, 'White': 33.56849303419181, 'Black': 57.8773477735196, 'Hispanic': 20.220491210910907}


### Homicide gun deaths per 10000 people for each racial category

In [18]:
homicide_race_per_hundredk = {}

for race in homicide_race_count:
    if race in mapping:
        proportion = homicide_race_count[race] / mapping[race]
        homicide_race_per_hundredk[race] = proportion * 100000 # propotion by 10000 habitants
    
print(homicide_race_per_hundredk)

{'Native American/Native Alaskan': 8.717729026240365, 'Asian/Pacific Islander': 3.530346230970155, 'White': 4.6356417981453335, 'Black': 48.471284987180944, 'Hispanic': 12.627161104219914}
