# Exploring Guns Death in the U.S


## Loading the csv file

In [3]:
import csv #importing the csv module

f = open("guns.csv")

data = list(csv.reader(f)) #returns a list of list

print(data[:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


## Removing the first row header

In [4]:
headers = data[0] #extracting header

data = data[1:len(data)] #removing header

print(data[:5]) #display first 5 rows

[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


In [7]:
print(headers) #headers extracted

['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']


## Counting gun deaths by year

**Observations**
* Gun deaths did not change much from 2012 to 2014

In [16]:
#extracting the year column
years = [row[1] for row in data]
years[0:5]

['2012', '2012', '2012', '2012', '2012']

In [18]:
#creating a dictionary for the years
year_counts = {}

for year in years:
    if year not in year_counts:
        year_counts[year] = 1
    else:
        year_counts[year] += 1

year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

## Exploring gun deaths by months and years

**Observations**
* By months and years, the deaths seems to be seasonal, peaking during the summer and declines in winter

In [20]:
headers

['',
 'year',
 'month',
 'intent',
 'police',
 'sex',
 'age',
 'race',
 'hispanic',
 'place',
 'education']

In [25]:
#creating datetime object for each row
import datetime
#list comprehension to create date time and extract
dates = [datetime.datetime(year = int(row[1]), month = int(row[2]), day = 1) for row in data]

dates[0:5] #these are now datetime instances 

[datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0)]

In [31]:
#counting the amount of unique dates 
date_counts = {}

for date in dates:
    if date not in date_counts:
        date_counts[date] = 1
    else:
        date_counts[date] += 1
        
date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

# Exploring Race and Sex

**Observations**
* Much more men compared to women involved
* Black American being the most involved among all races

In [32]:
headers

['',
 'year',
 'month',
 'intent',
 'police',
 'sex',
 'age',
 'race',
 'hispanic',
 'place',
 'education']

In [33]:
#counting deaths by sex
sex_counts = {}

for row in data:
    if row[5] not in sex_counts:
        sex_counts[row[5]] = 1
    else:
        sex_counts[row[5]] += 1

sex_counts


{'F': 14449, 'M': 86349}

In [36]:
#counting deaths by race
race_counts = {}

for row in data:
    if row[7] not in race_counts:
        race_counts[row[7]] = 1
    else:
        race_counts[row[7]] += 1

race_counts


{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

# Findings so far 
**Summary**
* Annual gun deaths do not change much
* Seasonal, peak during summer and decline in winter
* Much more men involved than women
* Black Americans being the most involved race

**Next Steps**
* Filter by intent
* Looking at it by race might be misleading as we do not know the proportions

# Improving Analysis with 2nd Dataset

The second dataset shows the proportion of each race in the US. It is useful to come up with rate of gun death per `100000` people in the US from year 2012 to 2014.

## Loading the 2nd dataset

In [37]:
import csv
f = open("census.csv")
census = list(csv.reader(f))

census

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

## Computing rates of gun deaths per race

* Since race names are slighty different in both datasets, a mapping dictionary needs to be created
* Race alone - Asian and Native Hawaiian and Other PAcific Islander are added together to form Asian/Pacific Islander
* Guns deaths by race will be per 100k

In [39]:
# creating a mapping dictionary
mapping = {
    "Asian/Pacific Islander": 15159516 + 674625,
    "Native American/Native Alaskan": 3739506,
    "Black": 40250635,
    "Hispanic": 44618105,
    "White": 197318956    
}

In [40]:
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

In [44]:
# creating a dictionary with death by races per 100k
race_per_hundredk = {}

for race in race_counts:
    race_per_hundredk[race] = race_counts[race]/mapping[race]*100000

print(race_per_hundredk)

{'White': 33.56849303419181, 'Asian/Pacific Islander': 8.374309664161762, 'Black': 57.8773477735196, 'Hispanic': 20.220491210910907, 'Native American/Native Alaskan': 24.521955573811088}


## Filtering by Intent
* restrict to homicide intent, excluding suicide and others
* tells us the gun related murder rate per `100000` people

In [50]:
# extracting the intent column
intents = [row[3] for row in data]

intents[:5]

['Suicide', 'Suicide', 'Suicide', 'Suicide', 'Suicide']

In [51]:
# extracting the race column
races = [row[7] for row in data]

races[:5]

['Asian/Pacific Islander', 'White', 'White', 'White', 'White']

In [53]:
homicide_race_per_hundredk = {}

#creating a dictionary 
for i,race in enumerate(races):
    if intents[i] == "Homicide":
        if race not in homicide_race_per_hundredk:
            homicide_race_per_hundredk[race] = 1
        else:
            homicide_race_per_hundredk[race] += 1


In [55]:
homicide_race_per_hundredk

{'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Hispanic': 5634,
 'Native American/Native Alaskan': 326,
 'White': 9147}

In [56]:
mapping

{'Asian/Pacific Islander': 15834141,
 'Black': 40250635,
 'Hispanic': 44618105,
 'Native American/Native Alaskan': 3739506,
 'White': 197318956}

In [57]:
# calculating gun deaths per hundred k by homicide
race_per_hundredk = {}

for race in homicide_race_per_hundredk:
    race_per_hundredk[race] = homicide_race_per_hundredk[race]/mapping[race]*100000
    

In [58]:
race_per_hundredk

{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}

## Findings
It appears that gun related homicides in the US disproportionately affect people in the Black and Hispanic racial categories.

**Some areas to investigate further:**
* The link between month and homicide rate.
* Homicide rate by gender.
* The rates of other intents by gender and race.
* Gun death rates by location and education.