## Exploring Gun Deaths in the US

In [2]:
import csv
f = open("guns.csv", "r")
csvreader = csv.reader(f)
data = list(csvreader)
print(data[:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


### Seperating out header row

In [3]:
headers = data[0]
data = data[1:]
print(headers)
print(data[:5])

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']
[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


### Gun Deaths Per Year

In [4]:
years = [row[1] for row in data]

year_counts = {}
for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1
        
print(year_counts)

{'2012': 33563, '2013': 33636, '2014': 33599}


### Gun Deaths Per Month

In [5]:
import datetime

dates = [datetime.datetime(year = int(row[1]), month = int(row[2]), day = 1) for row in data]
print(dates[:5])

[datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0)]


In [6]:
import pprint

date_counts = {}
for date in dates:
    if date in date_counts:
        date_counts[date] += 1
    else:
        date_counts[date] = 1
        
pprint.pprint(date_counts)

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

### Gun Deaths by Sex and Race

In [7]:
sexes = [row[5] for row in data]

sex_counts = {}
for item in sexes:
    if item in sex_counts:
        sex_counts[item] += 1
    else:
        sex_counts[item] = 1

pprint.pprint(sex_counts)

{'F': 14449, 'M': 86349}


In [8]:
races = [row[7] for row in data]

race_counts = {}
for item in races:
    if item in race_counts:
        race_counts[item] += 1
    else:
        race_counts[item] = 1

pprint.pprint(race_counts)

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}


### Observations

 - Gun Deaths by Year: Not too much overall difference in the year counts from 2012 to 2014.
 - Gun Deaths by Month: Seems to peak around Summer time- could be useful to know breakdown of intent during each month or Season. Lowest in Feburary across all three years. Further analysis needed to determine if there is a siginificant correlation between season and gun deaths.
 - Gun Deaths by Gender: Males are much more affected by gun deaths than females.
 - Gun Deaths by Race: Highest number is amoung Whites, with the other categories making up about half as many gun deaths combined. Could be useful to know population percentage of each race category to see how exactly gun deaths are distributed among different races. 

### Reading in Census Data

In [9]:
f = open("census.csv", "r")
csvreader = csv.reader(f)
census = list(csvreader)
print(census)

[['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races'], ['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]


In [10]:
#map each key from race_counts to pop count of race from census
Census_Asian = int(census[1][14])
Census_Islander = int(census[1][15])
Census_Black = int(census[1][12])
Census_Hispanic = int(census[1][11])
Census_Native = int(census[1][13])
Census_White = int(census[1][10])

mapping = {"Asian/Pacific Islander" : Census_Asian + Census_Islander,
          "Black" : Census_Black,
          "Hispanic" : Census_Hispanic,
          "Native American/Native Alaskan" : Census_Native,
          "White" : Census_White}

pprint.pprint(mapping)

{'Asian/Pacific Islander': 15834141,
 'Black': 40250635,
 'Hispanic': 44618105,
 'Native American/Native Alaskan': 3739506,
 'White': 197318956}


In [11]:
race_per_hunderdk = {}
for key in race_counts:
    race_per_hunderdk[key] = (race_counts[key] / mapping[key]) * 100000
    
pprint.pprint(race_per_hunderdk)

{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}


### Murder Rate per Race

In [12]:
intents = [row[3] for row in data]
races = [row[7] for row in data]
homicide_race_counts = {}
for i, race in enumerate(races):
    if intents[i] == "Homicide":
        if race in homicide_race_counts:
            homicide_race_counts[race] += 1
        else:
            homicide_race_counts[race] = 1
            
pprint.pprint(homicide_race_counts)

{'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Hispanic': 5634,
 'Native American/Native Alaskan': 326,
 'White': 9147}


In [13]:
homicide_per_hunderdk = {}

for key in homicide_race_counts:
    homicide_per_hunderdk[key] = (homicide_race_counts[key] / mapping[key]) * 100000

pprint.pprint(homicide_per_hunderdk)

{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}


### Observations - Homicide Rates per Race

 - Homicide rate most strongly effects the Black racial category
 - White racial category relatively unaffected compared to other miniority racail categories

### Next Steps

 - Explore correlation between time of year and homicide rate
 - Explore homicide rate by gender
 - Explore rates of intents by gender and race
 - Explore gun deaths by locations and education

### Homicide Rate and Month

In [16]:
month_homicide_rate = {}
for row in data:
    month = int(row[2])
    intent = row[3]
    if intent == "Homicide":
        if month in month_homicide_rate:
            month_homicide_rate[month] += 1
        else:
            month_homicide_rate[month] = 1

pprint.pprint(month_homicide_rate)

{1: 2829,
 2: 2178,
 3: 2780,
 4: 2845,
 5: 2976,
 6: 3130,
 7: 3269,
 8: 3125,
 9: 2966,
 10: 2968,
 11: 2919,
 12: 3191}


In [34]:
max_key = max(month_homicide_rate, key=lambda k: month_homicide_rate[k])
print("Maximum Homicide Deaths- " + str(max_key) + ": " + str(month_homicide_rate[max_key]))

Maximum Homicide Deaths- 7: 3269


In [37]:
min_key = min(month_homicide_rate, key=lambda k: month_homicide_rate[k])
print("Minimum Homicide Deaths- " + str(min_key) + ": " + str(month_homicide_rate[min_key]))

Minimum Homicide Deaths- 2: 2178


 - Maximum gun deaths occured in July, with higher values from June until August
 - Also have higher value in December
 - Lowest value in Feburary- much lower than values during the other months from 2012-2014
 - Would be useful to display this data graphically

### Homicide Rate and Gender

In [38]:
gender_homicide_rate = {}
for row in data:
    gender = row[5]
    intent = row[3]
    if intent == "Homicide":
        if gender in gender_homicide_rate:
            gender_homicide_rate[gender] += 1
        else:
            gender_homicide_rate[gender] = 1

pprint.pprint(gender_homicide_rate)

{'F': 5373, 'M': 29803}


 - Males are disproportionately effected by homicide rates compared to females

### Other Intents

In [39]:
unique_intents = set(intents)
print(unique_intents)

{'NA', 'Suicide', 'Accidental', 'Homicide', 'Undetermined'}


### Suicide Rates by Gender and Race

In [40]:
gender_suicide_rate = {}
for row in data:
    gender = row[5]
    intent = row[3]
    if intent == "Suicide":
        if gender in gender_suicide_rate:
            gender_suicide_rate[gender] += 1
        else:
            gender_suicide_rate[gender] = 1

pprint.pprint(gender_suicide_rate)

{'F': 8689, 'M': 54486}


In [43]:
suicide_race_counts = {}
for row in data:
    race = row[7]
    intent = row[3]
    if intent == "Suicide":
        if race in suicide_race_counts:
            suicide_race_counts[race] += 1
        else:
            suicide_race_counts[race] = 1
            
pprint.pprint(suicide_race_counts)

{'Asian/Pacific Islander': 745,
 'Black': 3332,
 'Hispanic': 3171,
 'Native American/Native Alaskan': 555,
 'White': 55372}


In [44]:
suicide_per_hunderdk = {}

for key in suicide_race_counts:
    suicide_per_hunderdk[key] = (suicide_race_counts[key] / mapping[key]) * 100000

pprint.pprint(suicide_per_hunderdk)

{'Asian/Pacific Islander': 4.705023152187416,
 'Black': 8.278130270491385,
 'Hispanic': 7.106980451097149,
 'Native American/Native Alaskan': 14.841532544673013,
 'White': 28.06217969245692}


 - Suicide is more common among Males than Females
 - The White racial category is the most effected by Suicide

### Accidental Death Rates by Gender and Race

In [45]:
gender_accident_rate = {}
for row in data:
    gender = row[5]
    intent = row[3]
    if intent == "Accidental":
        if gender in gender_accident_rate:
            gender_accident_rate[gender] += 1
        else:
            gender_accident_rate[gender] = 1

pprint.pprint(gender_accident_rate)

{'F': 218, 'M': 1421}


In [46]:
accident_race_counts = {}
for row in data:
    race = row[7]
    intent = row[3]
    if intent == "Accidental":
        if race in accident_race_counts:
            accident_race_counts[race] += 1
        else:
            accident_race_counts[race] = 1
            
pprint.pprint(accident_race_counts)

{'Asian/Pacific Islander': 12,
 'Black': 328,
 'Hispanic': 145,
 'Native American/Native Alaskan': 22,
 'White': 1132}


In [47]:
accident_per_hunderdk = {}

for key in accident_race_counts:
    accident_per_hunderdk[key] = (accident_race_counts[key] / mapping[key]) * 100000

pprint.pprint(accident_per_hunderdk)

{'Asian/Pacific Islander': 0.07578560782046845,
 'Black': 0.814893976206835,
 'Hispanic': 0.3249801846133985,
 'Native American/Native Alaskan': 0.5883130017708221,
 'White': 0.5736904466492313}


 - Overall, less accidental deaths compared to Homicides or Suicides
 - Males more strongly effected than Females
 - Black racial category most effected by accidental deaths
 
##### Obs: 
 - Could be useful to write a function that takes intent and race/gender arguement and returns dictionary with value by race/gender for specified intent

### Function to Calculate Death Rate by Intent and Grouping

In [60]:
def death_rate(intent, race_or_gender):
    deaths = {}
    per_hunderdk = False
    for row in data:
        if race_or_gender.lower() == "race": 
            group = row[7]
            per_hunderdk = True
        elif race_or_gender.lower() == "gender":
            group = row[5]
        else:
            raise Exception("Please input either 'race' or 'gender.'")
        death_type = row[3]
        if death_type == intent:
            if group in deaths:
                deaths[group] += 1
            else:
                deaths[group] = 1
            
    if per_hunderdk == True:
        death_per_hunderedk = {}
        for key in deaths:
            death_per_hunderdk[key] = (death[key] / mapping[key]) * 100000

        pprint.pprint(death_per_hundredk)
        
    else:
        pprint.pprint(deaths)
    

In [62]:
death_rate("Homicide", "gender")

{'F': 5373, 'M': 29803}


In [63]:
#Checking to see if function will break if input is invalid
death_rate("Homicide", "Gum")

Exception: Please input either 'race' or 'gender.'

### Gun Deaths by Location and Education

In [64]:
location = [row[9] for row in data]

location_counts = {}
for item in location:
    if item in location_counts:
        location_counts[item] += 1
    else:
        location_counts[item] = 1

pprint.pprint(location_counts)

{'Farm': 470,
 'Home': 60486,
 'Industrial/construction': 248,
 'NA': 1384,
 'Other specified': 13751,
 'Other unspecified': 8867,
 'Residential institution': 203,
 'School/instiution': 671,
 'Sports': 128,
 'Street': 11151,
 'Trade/service area': 3439}


In [65]:
education = [row[10] for row in data]

education_counts = {}
for item in education:
    if item in education_counts:
        education_counts[item] += 1
    else:
        education_counts[item] = 1

pprint.pprint(education_counts)

{'1': 21823, '2': 42927, '3': 21680, '4': 12946, '5': 1369, 'NA': 53}


 - Majority of gun deaths happened at home
 - Miniority of gun deaths happened through sports
 - Majority of gun deaths happened at 2 education category level- graduated from high school or equiv.
 - Minority of gun deaths at education level 4- at least graduated from college
 - Correlation between education level and gun deaths?
 <br><br>
 - Should combine education category 5- not available with NA category

In [67]:
#cleaning data
education = [row[10] for row in data]

education_counts = {}
for item in education:
    if item == "NA":
        item = "5"
    if item in education_counts:
        education_counts[item] += 1
    else:
        education_counts[item] = 1

pprint.pprint(education_counts)

{'1': 21823, '2': 42927, '3': 21680, '4': 12946, '5': 1422}
