# Importing and Prepping Data on Gun Deaths

In [3]:
import csv
f = open("guns.csv",'r')
data = list(csv.reader(f))
data[:5]

[['',
  'year',
  'month',
  'intent',
  'police',
  'sex',
  'age',
  'race',
  'hispanic',
  'place',
  'education'],
 ['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]

In [4]:
headers = data[0]
data = data[1:]
print(headers)
data[:5]

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']


[['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'],
 ['5',
  '2012',
  '02',
  'Suicide',
  '0',
  'M',
  '31',
  'White',
  '100',
  'Other specified',
  '2']]

# Gun Deaths by Year

In [5]:
years = [row[1] for row in data]
year_counts = {}
for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1
year_counts


{'2012': 33563, '2013': 33636, '2014': 33599}

In [6]:
import datetime
dates = [datetime.datetime(year=int(row[1]),month=int(row[2]),day=1) for row in data]
dates[:5]

[datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0)]

# Gun Deaths by Month and year

In [7]:
date_counts = {}
for date in dates:
    if date in date_counts:
        date_counts[date] += 1
    else:
        date_counts[date] = 1
date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

# Gun Deaths by Sex and Race

In [8]:
sex = [row[5] for row in data]
sex_counts = {}
for item in sex:
    if item in sex_counts:
        sex_counts[item] += 1
    else:
        sex_counts[item] = 1
sex_counts

{'F': 14449, 'M': 86349}

In [9]:
race = [row[7] for row in data]
race_counts = {}
for item in race:
    if item in race_counts:
        race_counts[item] += 1
    else:
        race_counts[item] = 1
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

## Findings so far

We've seen that gun deaths remained fairly steady by year from 2012 through 2014 with deaths being a bit higher in summer months.

The data indicates higher gun death rates among men and minorities relative to their representation in the total population. This point will be shown below.

# Import Census Data

In [10]:
f = open("census.csv",'r')
census = list(csv.reader(f))
census

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

# Calculating Gun Deaths Rates by Race

In [11]:
mapping = {
    "Asian/Pacific Islander": 15159516 + 674625,
    "Native American/Native Alaskan": 3739506,
    "Black": 40250635,
    "Hispanic": 44618105,
    "White": 197318956
}

race_per_hundredk = {}
for race in race_counts:
    race_per_hundredk[race] = 100000*race_counts[race]/mapping[race]
race_per_hundredk

{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.877347773519595,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}

# Homicide Rate Gun Deaths by Race

In [12]:
intents = [row[3] for row in data]
races = [row[7] for row in data]
homicide_race_per_hundredk = {}
for i,race in enumerate(races):
    if intents[i] == 'Homicide':
        if race in homicide_race_per_hundredk:
            homicide_race_per_hundredk[race] += 1
        else:
            homicide_race_per_hundredk[race] = 1
for race in homicide_race_per_hundredk:
    homicide_race_per_hundredk[race] = (homicide_race_per_hundredk[race]/mapping[race]) * 100000
homicide_race_per_hundredk

{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}

## Findings

The previous cell shows us that gun-related homicide rates are as much as 10 times higher in minority groups.

# Homicide Rate by Month

In [13]:
intents = [row[3] for row in data]
months = [row[2] for row in data]
homicides_per_month = {}
for i,month in enumerate(months):
    if intents[i] == 'Homicide':
        if month in homicides_per_month:
            homicides_per_month[month] += 1
        else:
            homicides_per_month[month] = 1
homicides_per_month

{'01': 2829,
 '02': 2178,
 '03': 2780,
 '04': 2845,
 '05': 2976,
 '06': 3130,
 '07': 3269,
 '08': 3125,
 '09': 2966,
 '10': 2968,
 '11': 2919,
 '12': 3191}

## Finding
Homicide rates are fairly consistent each month, with a dip early in the year and a peak in July.

# Homicides by Gender

In [14]:
#From 2010 census data
sex_mapping = {'F': 156964212,'M': 151781326}

intents = [row[3] for row in data]
sexes = [row[5] for row in data]
homicides_by_gender = {}
for i,sex in enumerate(sexes):
    if intents[i] == 'Homicide':
        if sex in homicides_by_gender:
            homicides_by_gender[sex] += 1
        else:
            homicides_by_gender[sex] = 1
for k,v in homicides_by_gender.items():
    homicides_by_gender[k] = (v/sex_mapping[k])*100000
homicides_by_gender

{'F': 3.423073279914277, 'M': 19.635485329730223}

# Finding
About six times as many men were murdered in this three year span than women.

In [15]:
set(intents)

{'Accidental', 'Homicide', 'NA', 'Suicide', 'Undetermined'}

# Accidental Deaths

## By Gender

In [16]:
sex_mapping = {'F': 156964212,'M': 151781326}

intents = [row[3] for row in data]
sexes = [row[5] for row in data]
accidental_deaths_by_gender = {}
for i,sex in enumerate(sexes):
    if intents[i] == 'Accidental':
        if sex in accidental_deaths_by_gender:
            accidental_deaths_by_gender[sex] += 1
        else:
            accidental_deaths_by_gender[sex] = 1
for k,v in accidental_deaths_by_gender.items():
    accidental_deaths_by_gender[k] = (v/sex_mapping[k]) * 100000
accidental_deaths_by_gender

{'F': 0.13888516192468128, 'M': 0.9362153022697931}

Men are over six times as likely to be killed in an accidental gun-related death

## By Race

In [17]:
intents = [row[3] for row in data]
races = [row[7] for row in data]
ad_race_per_hundredk = {}
for i,race in enumerate(races):
    if intents[i] == 'Accidental':
        if race in ad_race_per_hundredk:
            ad_race_per_hundredk[race] += 1
        else:
            ad_race_per_hundredk[race] = 1
for race in ad_race_per_hundredk:
    ad_race_per_hundredk[race] = (ad_race_per_hundredk[race]/mapping[race]) * 100000
ad_race_per_hundredk

{'Asian/Pacific Islander': 0.07578560782046845,
 'Black': 0.814893976206835,
 'Hispanic': 0.3249801846133985,
 'Native American/Native Alaskan': 0.5883130017708221,
 'White': 0.5736904466492313}

## Findings
While the accidental gun-death rate does vary by race, it does not as severely impact the same minority groups that homicide gun-death does.

# Suicide Deaths

## By Gender

In [18]:
sex_mapping = {'F': 156964212,'M': 151781326}

intents = [row[3] for row in data]
sexes = [row[5] for row in data]
deaths_by_gender = {}
for i,sex in enumerate(sexes):
    if intents[i] == 'Suicide':
        if sex in deaths_by_gender:
            deaths_by_gender[sex] += 1
        else:
            deaths_by_gender[sex] = 1
for k,v in deaths_by_gender.items():
    deaths_by_gender[k] = (v/sex_mapping[k]) * 100000
deaths_by_gender

{'F': 5.535656752126402, 'M': 35.8976966639493}

## By Race

In [19]:
intents = [row[3] for row in data]
races = [row[7] for row in data]
race_per_hundredk = {}
for i,race in enumerate(races):
    if intents[i] == 'Suicide':
        if race in race_per_hundredk:
            race_per_hundredk[race] += 1
        else:
            race_per_hundredk[race] = 1
for race in race_per_hundredk:
    race_per_hundredk[race] = (race_per_hundredk[race]/mapping[race]) * 100000
race_per_hundredk

{'Asian/Pacific Islander': 4.705023152187416,
 'Black': 8.278130270491385,
 'Hispanic': 7.106980451097149,
 'Native American/Native Alaskan': 14.841532544673013,
 'White': 28.06217969245692}

## Findings
Suicide gun deaths are seven times  more common in men and disproportionately affect white and native americans.

# Undetermined and NA

# By Gender

In [20]:
sex_mapping = {'F': 156964212,'M': 151781326}

intents = [row[3] for row in data]
sexes = [row[5] for row in data]
deaths_by_gender = {}
for i,sex in enumerate(sexes):
    if intents[i] == 'Undetermined' or intents[i] == 'NA':
        if sex in deaths_by_gender:
            deaths_by_gender[sex] += 1
        else:
            deaths_by_gender[sex] = 1
for k,v in deaths_by_gender.items():
    deaths_by_gender[k] = (v/sex_mapping[k]) * 100000
deaths_by_gender

{'F': 0.10766785488656486, 'M': 0.42100040686164514}

# By Race

In [21]:
intents = [row[3] for row in data]
races = [row[7] for row in data]
race_per_hundredk = {}
for i,race in enumerate(races):
    if intents[i] == 'Undetermined' or intents[i] == 'NA':
        if race in race_per_hundredk:
            race_per_hundredk[race] += 1
        else:
            race_per_hundredk[race] = 1
for race in race_per_hundredk:
    race_per_hundredk[race] = (race_per_hundredk[race]/mapping[race]) * 100000
race_per_hundredk

{'Asian/Pacific Islander': 0.0631546731837237,
 'Black': 0.3130385396404305,
 'Hispanic': 0.16136947098044616,
 'Native American/Native Alaskan': 0.3743810011268868,
 'White': 0.29698109694032643}

# Findings
Nothing of particular note seeing as we're not exactly sure what types of death we are looking at.

# Other Correlations

## Deaths by Location

In [22]:
places = [row[9] for row in data]
set(places)

{'Farm',
 'Home',
 'Industrial/construction',
 'NA',
 'Other specified',
 'Other unspecified',
 'Residential institution',
 'School/instiution',
 'Sports',
 'Street',
 'Trade/service area'}

In [23]:
location_deaths = {}
for place in places:
    if place in location_deaths:
        location_deaths[place] += 1
    else:
        location_deaths[place] = 1
location_deaths

{'Farm': 470,
 'Home': 60486,
 'Industrial/construction': 248,
 'NA': 1384,
 'Other specified': 13751,
 'Other unspecified': 8867,
 'Residential institution': 203,
 'School/instiution': 671,
 'Sports': 128,
 'Street': 11151,
 'Trade/service area': 3439}

## Deaths by Education

In [24]:
edu_mapping = {'1': 'Less than High School',
              '2': 'Graduated from High School or equivalent',
              '3': 'Some College',
              '4': 'At least graduated from College',
              '5': 'Not available',
              'NA': 'Not available'}
educations = [row[10] for row in data]
edu_deaths = {}
for education in educations:
    if edu_mapping[education] in edu_deaths:
        edu_deaths[edu_mapping[education]] += 1
    else:
        edu_deaths[edu_mapping[education]] = 1
edu_deaths

{'At least graduated from College': 12946,
 'Graduated from High School or equivalent': 42927,
 'Less than High School': 21823,
 'Not available': 1422,
 'Some College': 21680}

## Findings
Gun-related deaths seem to occur most frequently at home with on the street coming and 'other' also quite high.

Gun-related death seems to occur more in lower-educated groups, but we don't have the relative comparison unless we get information on education levels in the total population. The most common survey was age 25+ and it seems like we might want that under 25 data to get accurate rates. This could be a next step.