In [2]:
import csv
# converting data into a list of list
data = list(csv.reader(open('guns.csv','r')))

In [3]:
# keeping headers in a separate variable
headers = data[0]
print(headers)
# extracting headers
data = data[1:]

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']


In [4]:
# counting gun deaths by year
years = [year[1] for year in data]
year_counts = dict()
for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1
year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

In [5]:
# explore monthly gun deaths by converting data into datetime objects
import datetime as dt
dates = [dt.datetime(year=int(date[1]), month=int(date[2]), day=1) for date in data]
date_counts = dict()
for date in dates:
    if date in date_counts:
        date_counts[date] += 1
    else:
        date_counts[date] = 1
date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

In [6]:
# creating unique counts for gun deaths by gender and race
sex_counts = dict()
race_counts = dict()
gender = [sex[5] for sex in data]
races = [race[7] for race in data]

for sex in gender:
    if sex in sex_counts:
        sex_counts[sex] += 1
    else:
        sex_counts[sex] = 1

for race in races:
    if race in race_counts:
        race_counts[race] += 1
    else:
        race_counts[race] = 1

print(race_counts)
print(sex_counts)
    

{'Black': 23296, 'White': 66237, 'Hispanic': 9022, 'Native American/Native Alaskan': 917, 'Asian/Pacific Islander': 1326}
{'F': 14449, 'M': 86349}


In [7]:
census = list(csv.reader(open("census.csv",'r')))
census

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

In order for the gun death per race statistics to be meaning, we need to know the proportion of those deaths to the population of their respective race. Using data from census.csv, we can normalize the data and calculate deaths per hundred thousand

In [22]:
# data from census
race_dictionary = {
    'Asian/Pacific Islander' : 15159516+3739506,
    'Black' : 40250635,
    'Native American/Native Alaskan' : 3739506,
    'Hispanic' : 44618105,
    'White' : 197318956
}
# calculating deaths per 100,000
race_per_hundredk = dict()
for race in race_counts:
    race_per_hundredk[race] = (
        race_counts[race] / race_dictionary[race]) * 100000

# sanity check    
print(race_per_hundredk)
print('black: ',23296 / 40250635 * 100000 )    

{'Asian/Pacific Islander': 7.016236078247858, 'White': 33.56849303419181, 'Hispanic': 20.220491210910907, 'Native American/Native Alaskan': 24.521955573811088, 'Black': 57.8773477735196}
black:  57.8773477735196


In [65]:
intents = [ intent[3] for intent in data ]
races = [ race[7] for race in data ]
homicide_race_counts = dict()

for i in enumerate(intents):
    if i[1] == 'Homicide':
        if races[i[0]] in homicide_race_counts:
            homicide_race_counts[races[i[0]]] += 1
        else:
            homicide_race_counts[races[i[0]]] = 1

homicide_race_counts

{'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Hispanic': 5634,
 'Native American/Native Alaskan': 326,
 'White': 9147}

In [68]:
# calculating deaths per 100,000
homicide_race_per_hundredk = dict()
for race in homicide_race_counts:
    homicide_race_per_hundredk[race] = (
        homicide_race_counts[race] / race_dictionary[race]) * 100000

# sanity check    
print(homicide_race_per_hundredk)
print('black: ',19510 / 40250635 * 100000 )   

{'White': 4.6356417981453335, 'Asian/Pacific Islander': 2.9578250133789994, 'Hispanic': 12.627161104219914, 'Native American/Native Alaskan': 8.717729026240365, 'Black': 48.471284987180944}
black:  48.471284987180944
