In [1]:
import csv
f = open('guns.csv', 'r')
data_header = list(csv.reader(f))
data = data_header[1:]


In [2]:
# Gun deaths by year

years = [row[1] for row in data]
year_counts = {}
for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1

print(year_counts)

# There is little difference from one year to the next.

{'2014': 33599, '2013': 33636, '2012': 33563}


In [3]:
# Gun deaths by year and month

import datetime

dates = [datetime.datetime(year = int(row[1]), month = int(row[2]), day = 1) for row in data]

date_counts = {}
for date in dates:
    if date in date_counts:
        date_counts[date] += 1
    else:
        date_counts[date] = 1

print(date_counts)

{datetime.datetime(2012, 2, 1, 0, 0): 2357, datetime.datetime(2013, 1, 1, 0, 0): 2864, datetime.datetime(2012, 3, 1, 0, 0): 2743, datetime.datetime(2014, 7, 1, 0, 0): 2884, datetime.datetime(2014, 12, 1, 0, 0): 2857, datetime.datetime(2012, 12, 1, 0, 0): 2791, datetime.datetime(2014, 3, 1, 0, 0): 2684, datetime.datetime(2014, 8, 1, 0, 0): 2970, datetime.datetime(2014, 6, 1, 0, 0): 2931, datetime.datetime(2013, 12, 1, 0, 0): 2765, datetime.datetime(2014, 5, 1, 0, 0): 2864, datetime.datetime(2013, 8, 1, 0, 0): 2859, datetime.datetime(2013, 10, 1, 0, 0): 2808, datetime.datetime(2013, 11, 1, 0, 0): 2758, datetime.datetime(2013, 4, 1, 0, 0): 2798, datetime.datetime(2014, 9, 1, 0, 0): 2914, datetime.datetime(2012, 5, 1, 0, 0): 2999, datetime.datetime(2013, 6, 1, 0, 0): 2920, datetime.datetime(2012, 8, 1, 0, 0): 2954, datetime.datetime(2013, 2, 1, 0, 0): 2375, datetime.datetime(2012, 9, 1, 0, 0): 2852, datetime.datetime(2014, 10, 1, 0, 0): 2865, datetime.datetime(2012, 4, 1, 0, 0): 2795, date

In [4]:
# Gun deaths by sex and race

sexes = [row[5] for row in data]
sex_counts = {}
for sex in sexes:
    if sex in sex_counts:
        sex_counts[sex] += 1
    else:
        sex_counts[sex] = 1
        
print(sex_counts)

{'M': 86349, 'F': 14449}


In [5]:
races = [row[7] for row in data]
race_counts = {}
for race in races:
    if race in race_counts:
        race_counts[race] += 1
    else:
        race_counts[race] = 1

print(race_counts)

{'Black': 23296, 'Asian/Pacific Islander': 1326, 'White': 66237, 'Hispanic': 9022, 'Native American/Native Alaskan': 917}


In [6]:
# Gun deaths in the US seem to disproportionately affect men vs women. They also seem to disproportionately affect minorities, although having some data on the percentage of each race in the overall US population would help.
# There appears to be a minor seasonal correlation, with gun deaths peaking in the summer and declining in the winter. It might be useful to filter by intent, to see if different categories of intent have different correlations with season, race, or gender.

In [7]:
# We explored gun deaths by race. However, our analysis only gives us the total number of gun deaths by race in the US. Unless we know the proportion of each race in the US, we won't be able to meaningfully compare those numbers. What we really want to get is a rate of gun deaths per 100000 people of each race. In order to do this, we'll need to read in data about what percentage of the US population falls into each racial category.

census = list(csv.reader(open('census.csv', 'r')))
print(census)

[['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races'], ['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]


In [8]:
# Earlier, we computed the number of gun deaths per race, and created a dictionary, race_counts. In order to get from the raw counts of gun deaths by race to a rate of gun deaths per 100,000 people in each race, we'll need to divide the total number of gun deaths, extracted from "data", by the population of each race given to us by the census dataset.
# The racial categories in each dataset are named differently, so first we need to manually construct a dictionary that will allow us to map between the 2 datasets and calculate the rate of gun deaths per 100,000 people in each race.

mapping = {
    "Native American/Native Alaskan": 3739506,
    "Asian/Pacific Islander": 15159516 + 674625,
    "Black": 40250635,
    "Hispanic": 44618105,
    "White": 197318956
}

race_per_hundredk = {}
for k, v in race_counts.items():
    race_per_hundredk[k] = (v/mapping[k])*100000
    
print(race_per_hundredk)
    

{'Black': 57.8773477735196, 'Asian/Pacific Islander': 8.374309664161762, 'White': 33.56849303419181, 'Native American/Native Alaskan': 24.521955573811088, 'Hispanic': 20.220491210910907}


In [9]:
# To determine what the gun-related murder rate per 100000 people in each racial category is, we will filter our results and restrict them to the "Homicide" intent.

intents = [row[3] for row in data]
homicide_race_counts = {}

for i, race in enumerate(races):
    if race not in homicide_race_counts:
        homicide_race_counts[race] = 0
    if intents[i] == "Homicide":
        homicide_race_counts[race] += 1
       
print(homicide_race_counts)

{'Black': 19510, 'Asian/Pacific Islander': 559, 'White': 9147, 'Hispanic': 5634, 'Native American/Native Alaskan': 326}


In [10]:
for k, v in homicide_race_counts.items():
    race_per_hundredk[k] = (v/mapping[k])*100000
    
print(race_per_hundredk)

{'Black': 48.471284987180944, 'Asian/Pacific Islander': 3.530346230970155, 'White': 4.6356417981453335, 'Native American/Native Alaskan': 8.717729026240365, 'Hispanic': 12.627161104219914}
