# Exploring Gun Deaths in the US

## contains information on gun deaths in the US from 2012 to 2014.

Here, we explore the dataset and try to find patterns in the demographics of the victims.

In [1]:
import csv
f = open("guns.csv", "r")
csvreader = csv.reader(f)
data = list(csvreader)
data[:5]

[['',
  'year',
  'month',
  'intent',
  'police',
  'sex',
  'age',
  'race',
  'hispanic',
  'place',
  'education'],
 ['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]

In [2]:
headers = data[0]
data = data[1:]
print(headers)
data[:5]

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']


[['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'],
 ['5',
  '2012',
  '02',
  'Suicide',
  '0',
  'M',
  '31',
  'White',
  '100',
  'Other specified',
  '2']]

In [3]:
years = [row[1] for row in data]
year_counts = dict()
for each in years:
    if each in year_counts:
        year_counts[each] += 1
    else:
        year_counts[each] = 1
year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

In [4]:
import datetime
dates = [ datetime.datetime(year=int(row[1]), month=int(row[2]), day=1) for row in data]
date_counts = dict()
for each in dates:
    if each in date_counts:
        date_counts[each] += 1
    else:
        date_counts[each] = 1
date_counts


{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

In [5]:
sex = [row[5] for row in data]
sex_counts = dict()
for each in sex:
    if each in sex_counts:
        sex_counts[each] += 1
    else:
        sex_counts[each] = 1
sex_counts

{'F': 14449, 'M': 86349}

In [6]:
race = [row[7] for row in data]
race_counts = dict()
for each in race:
    if each in race_counts:
        race_counts[each] += 1
    else:
        race_counts[each] = 1
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

In this project, we loaded the guns.csv data using csv module csvreader. There is a header row, so we saved it in headers and sliced the list of data. By using list conprehension, we can create lists with the data in specific column, then we can parse in the data and count the frequencies. We can see that there are a lot more male than female (86349 vs 14449), and lots of white victims.

We should take a look at the education status and also see if there are correlations between race and age.

In [7]:
f = open("census.csv", "r")
csvreader = csv.reader(f)
census = list(csvreader)
census_headers = census[0]
census = census[1]
print(census_headers)
census

['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races']


['cen42010',
 'April 1, 2010 Census',
 'totsex',
 'Both Sexes',
 'tothisp',
 'Total',
 '0100000US',
 '',
 'United States',
 '308745538',
 '197318956',
 '44618105',
 '40250635',
 '3739506',
 '15159516',
 '674625',
 '6984195']

In [8]:
mapping = {"Asian/Pacific Islander": int(census[13]) + int(census[14]) , "Black": int(census[12]) , "Native American/Native Alaskan": int(census[13]) , "Hispanic": int(census[11]) , "White": int(census[10]) }
race_per_hundredk = dict()
for key, value in race_counts.items():
    race_per_hundredk[key] = value / mapping[key] * 100000 
print(race_per_hundredk)

{'Asian/Pacific Islander': 7.016236078247858, 'White': 33.56849303419181, 'Native American/Native Alaskan': 24.521955573811088, 'Black': 57.8773477735196, 'Hispanic': 20.220491210910907}


In [10]:
intents = [row[3] for row in data]
races = [row[7] for row in data]
homicide_race_counts = dict()
for i, race in enumerate(races):
    if intents[i] == "Homicide":
        if race in homicide_race_counts:
            homicide_race_counts[race] += 1
        else:
            homicide_race_counts[race] = 1
homicide_race_hk = dict()
for key, value in homicide_race_counts.items():
    homicide_race_counts[key] = value / mapping[key] * 100000
homicide_race_counts

{'Asian/Pacific Islander': 2.9578250133789994,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}

After accounting for only homicides, we can see that 48 blacks die from homicide per 100000. Earlier we see that White has the most people dead, but that's because there are more Whites in USA. Percentage wise, black people are more likely to be victims in a homicide from 2012-2014.

I would like to see if Asian/Pacific Islander are more likely to commit suicide because they are considered more reserved than other races. 

Figure out the link, if any, between month and homicide rate.
Explore the homicide rate by gender.
Explore the rates of other intents, like Accidental, by gender and race.
Find out if gun death rates correlate to location and education.

In [11]:
suicide_race_counts = dict()
for i, race in enumerate(races):
    if intents[i] == "Suicide":
        if race in suicide_race_counts:
            suicide_race_counts[race] += 1
        else:
            suicide_race_counts[race] = 1
suicide_race_hk = dict()
for key, value in suicide_race_counts.items():
    suicide_race_counts[key] = value / mapping[key] * 100000
suicide_race_counts

{'Asian/Pacific Islander': 3.942002924807432,
 'Black': 8.278130270491385,
 'Hispanic': 7.106980451097149,
 'Native American/Native Alaskan': 14.841532544673013,
 'White': 28.06217969245692}