## Exploring Gun Deaths in the US

In [2]:
import csv
f = open("guns.csv", "r")
csvreader = csv.reader(f)
data = list(csvreader)
print(data[:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


### Seperating out header row

In [3]:
headers = data[0]
data = data[1:]
print(headers)
print(data[:5])

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']
[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


### Gun Deaths Per Year

In [4]:
years = [row[1] for row in data]

year_counts = {}
for year in years:
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1
        
print(year_counts)

{'2013': 33636, '2014': 33599, '2012': 33563}


### Gun Deaths Per Month

In [12]:
import datetime

dates = [datetime.datetime(year = int(row[1]), month = int(row[2]), day = 1) for row in data]
print(dates[:5])

[datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0)]


In [18]:
import pprint

date_counts = {}
for date in dates:
    if date in date_counts:
        date_counts[date] += 1
    else:
        date_counts[date] = 1
        
pprint.pprint(date_counts)

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

### Gun Deaths by Sex and Race

In [20]:
sexes = [row[5] for row in data]

sex_counts = {}
for item in sexes:
    if item in sex_counts:
        sex_counts[item] += 1
    else:
        sex_counts[item] = 1

pprint.pprint(sex_counts)

{'F': 14449, 'M': 86349}


In [22]:
races = [row[7] for row in data]

race_counts = {}
for item in races:
    if item in race_counts:
        race_counts[item] += 1
    else:
        race_counts[item] = 1

pprint.pprint(race_counts)

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}


### Observations

 - Gun Deaths by Year: Not too much overall difference in the year counts from 2012 to 2014.
 - Gun Deaths by Month: Seems to peak around Summer time- could be useful to know breakdown of intent during each month or Season. Lowest in Feburary across all three years. Further analysis needed to determine if there is a siginificant correlation between season and gun deaths.
 - Gun Deaths by Gender: Males are much more affected by gun deaths than females.
 - Gun Deaths by Race: Highest number is amoung Whites, with the other categories making up about half as many gun deaths combined. Could be useful to know population percentage of each race category to see how exactly gun deaths are distributed among different races. 

### Reading in Census Data

In [23]:
f = open("census.csv", "r")
csvreader = csv.reader(f)
census = list(csvreader)
print(census)

[['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races'], ['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]


In [30]:
#map each key from race_counts to pop count of race from census
Census_Asian = int(census[1][14])
Census_Islander = int(census[1][15])
Census_Black = int(census[1][12])
Census_Hispanic = int(census[1][11])
Census_Native = int(census[1][13])
Census_White = int(census[1][10])

mapping = {"Asian/Pacific Islander" : Census_Asian + Census_Islander,
          "Black" : Census_Black,
          "Hispanic" : Census_Hispanic,
          "Native American/Native Alaskan" : Census_Native,
          "White" : Census_White}

pprint.pprint(mapping)

{'Asian/Pacific Islander': 15834141,
 'Black': 40250635,
 'Hispanic': 44618105,
 'Native American/Native Alaskan': 3739506,
 'White': 197318956}


In [31]:
race_per_hunderdk = {}
for key in race_counts:
    race_per_hunderdk[key] = (race_counts[key] / mapping[key]) * 100000
    
pprint.pprint(race_per_hunderdk)

{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}


### Murder Rate per Race

In [37]:
intents = [row[3] for row in data]
races = [row[7] for row in data]
homicide_race_counts = {}
for i, race in enumerate(races):
    if intents[i] == "Homicide":
        if race in homicide_race_counts:
            homicide_race_counts[race] += 1
        else:
            homicide_race_counts[race] = 1
            
pprint.pprint(homicide_race_counts)

{'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Hispanic': 5634,
 'Native American/Native Alaskan': 326,
 'White': 9147}


In [38]:
homicide_per_hunderdk = {}

for key in homicide_race_counts:
    homicide_per_hunderdk[key] = (homicide_race_counts[key] / mapping[key]) * 100000

pprint.pprint(homicide_per_hunderdk)

{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}


### Observations - Homicide Rates per Race

 - Homicide rate most strongly effects the Black racial category
 - White racial category relatively unaffected compared to other miniority racail categories

### Next Steps

 - Explore correlation between time of year and homicide rate
 - Explore homicide rate by gender
 - Explore rates of intents by gender and race
 - Explore gun deaths by locations and education