Reading and preprocessing "guns.csv" file

In [23]:
import csv

In [24]:
f = open("guns.csv")
guns = csv.reader(f)
data = list(guns)

In [25]:
print(data[:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


In [26]:
headers = data[0]
data = data[1:]

In [27]:
print(headers)
print(data[:5])

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']
[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


Creating a dictionary of death counts by year

In [28]:
years = [record[1] for record in data]
year_counts = {}
for year in years:
    if year in year_counts.keys():
        year_counts[year] += 1
    else:
        year_counts[year] = 1
print(year_counts)

{'2013': 33636, '2012': 33563, '2014': 33599}


Creating a dictionary of death counts by date, utilizing the datetime.datetime class

In [29]:
import datetime
dates = [datetime.datetime(int(record[1]), int(record[2]), day=1) for record in data]
print(dates[:5])

[datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0)]


In [30]:
date_counts = {}
for date in dates:
    if date in date_counts.keys():
        date_counts[date] += 1
    else:
        date_counts[date] = 1
print(date_counts)

{datetime.datetime(2014, 5, 1, 0, 0): 2864, datetime.datetime(2013, 2, 1, 0, 0): 2375, datetime.datetime(2014, 11, 1, 0, 0): 2756, datetime.datetime(2012, 6, 1, 0, 0): 2826, datetime.datetime(2013, 4, 1, 0, 0): 2798, datetime.datetime(2014, 6, 1, 0, 0): 2931, datetime.datetime(2013, 1, 1, 0, 0): 2864, datetime.datetime(2012, 12, 1, 0, 0): 2791, datetime.datetime(2014, 2, 1, 0, 0): 2361, datetime.datetime(2012, 3, 1, 0, 0): 2743, datetime.datetime(2013, 10, 1, 0, 0): 2808, datetime.datetime(2012, 8, 1, 0, 0): 2954, datetime.datetime(2013, 5, 1, 0, 0): 2806, datetime.datetime(2013, 11, 1, 0, 0): 2758, datetime.datetime(2014, 3, 1, 0, 0): 2684, datetime.datetime(2012, 2, 1, 0, 0): 2357, datetime.datetime(2013, 6, 1, 0, 0): 2920, datetime.datetime(2014, 1, 1, 0, 0): 2651, datetime.datetime(2012, 7, 1, 0, 0): 3026, datetime.datetime(2012, 11, 1, 0, 0): 2729, datetime.datetime(2012, 10, 1, 0, 0): 2733, datetime.datetime(2014, 8, 1, 0, 0): 2970, datetime.datetime(2013, 8, 1, 0, 0): 2859, date

Creating dictionaries of death counts by race and by sex

In [37]:
sex_counts = {}
for record in data:
    if record[5] in sex_counts.keys():
        sex_counts[record[5]] += 1
    else:
        sex_counts[record[5]] = 1
print(sex_counts)

{'F': 14449, 'M': 86349}


It appears that males killed by guns far exceed females killed by guns.  This may suggest that males are disproportionately involved in dangerous activity compared to females, but would require further research.

In [36]:
race_counts = {}
for record in data:
    if record[7] in race_counts.keys():
        race_counts[record[7]] += 1
    else:
        race_counts[record[7]] = 1
print(race_counts)

{'Black': 23296, 'White': 66237, 'Native American/Native Alaskan': 917, 'Hispanic': 9022, 'Asian/Pacific Islander': 1326}


As expected due to their being the largest ethnic groups in the US, Blacks and Whites who are killed by guns far outnumber those of other ethnic groups.

However, we have access to census and racial population data, which we can use to calculate rates of gun death within each racial population.  We will try this below by calculating the rate of gun deaths within a racial population per 100,000 people.

In [42]:
f = open("census.csv")
census = csv.reader(f)
census = list(census)

In [43]:
print(census[:5])

[['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races'], ['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]


In [47]:
#Create a mapping between the census data and our
#previously calculated race_counts dictionary
mapping = {
    'Asian/Pacific Islander': 15159516 + 674625,
    'Black': 40250635,
    'Hispanic': 44618105,
    'Native American/Native Alaskan': 3739506,
    'White': 197318956
}

In [49]:
race_per_hundredk = {key: ((value/mapping[key])*100000) for key, value in race_counts.items()}

In [50]:
print(race_per_hundredk)

{'Black': 57.8773477735196, 'White': 33.56849303419181, 'Native American/Native Alaskan': 24.521955573811088, 'Hispanic': 20.220491210910907, 'Asian/Pacific Islander': 8.374309664161762}


We can continue to analyze our data by limiting our homicide counts to those deaths that were deemed homicides.

In [51]:
intents = [record[3] for record in data]
races = [record[7] for record in data]
homicide_race_counts = {}
for i, race in enumerate(races):
    if intents[i] == 'Homicide':
        if race in homicide_race_counts.keys():
            homicide_race_counts[race] += 1
        else:
            homicide_race_counts[race] = 1
print(homicide_race_counts)

{'Black': 19510, 'White': 9147, 'Native American/Native Alaskan': 326, 'Hispanic': 5634, 'Asian/Pacific Islander': 559}


In [52]:
homicide_race_per_hundredk = {key: ((value/mapping[key])*100000) for key, value in homicide_race_counts.items()}

In [53]:
print(homicide_race_per_hundredk)

{'Black': 48.471284987180944, 'White': 4.6356417981453335, 'Native American/Native Alaskan': 8.717729026240365, 'Hispanic': 12.627161104219914, 'Asian/Pacific Islander': 3.530346230970155}


By further analyzing the data in this way, we uncover some deeper insights regarding the situation of gun deaths in the US.  We see very clearly that specific races, namely Hispanics and in particular Blacks, suffer homicide by gun at a much greater rate than other races.