In [14]:

## Introducing US Gun Deaths Data

import csv
import datetime
f = open("guns.csv", "r")
data = list(csv.reader(f))
headers = data[0]
data = data[1:] # remove headers
data [0]

['1',
 '2012',
 '01',
 'Suicide',
 '0',
 'M',
 '34',
 'Asian/Pacific Islander',
 '100',
 'Home',
 'BA+']

In [11]:
## Count the gun deaths by each year

years = [each[1] for each in data] # parse year column from data
year_counts = {}
for year in years: 
    if year in year_counts:
        year_counts[year] = year_counts[year] + 1
    else:
         year_counts[year] = 1
year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

In [15]:
## Count the gun deaths by each month

dates = []
for each in data: # split each year by month 
    year = int(each[1])
    month = int(each[2])
    date = datetime.datetime(year=year, month=month, day=1) # we don`t have days in table, so it`s constant 1
    dates.append(date)
    
str_dates = [] # convert datetime into list
for each in dates:
    each = str(each)
    str_dates.append(each)

dates_counts = {} # count the gun deathes in each month of each year
for date in str_dates:
    if date in dates_counts:
        dates_counts[date] += 1 
    else:
         dates_counts[date] = 1

dates_counts    

{'2012-01-01 00:00:00': 2758,
 '2012-02-01 00:00:00': 2357,
 '2012-03-01 00:00:00': 2743,
 '2012-04-01 00:00:00': 2795,
 '2012-05-01 00:00:00': 2999,
 '2012-06-01 00:00:00': 2826,
 '2012-07-01 00:00:00': 3026,
 '2012-08-01 00:00:00': 2954,
 '2012-09-01 00:00:00': 2852,
 '2012-10-01 00:00:00': 2733,
 '2012-11-01 00:00:00': 2729,
 '2012-12-01 00:00:00': 2791,
 '2013-01-01 00:00:00': 2864,
 '2013-02-01 00:00:00': 2375,
 '2013-03-01 00:00:00': 2862,
 '2013-04-01 00:00:00': 2798,
 '2013-05-01 00:00:00': 2806,
 '2013-06-01 00:00:00': 2920,
 '2013-07-01 00:00:00': 3079,
 '2013-08-01 00:00:00': 2859,
 '2013-09-01 00:00:00': 2742,
 '2013-10-01 00:00:00': 2808,
 '2013-11-01 00:00:00': 2758,
 '2013-12-01 00:00:00': 2765,
 '2014-01-01 00:00:00': 2651,
 '2014-02-01 00:00:00': 2361,
 '2014-03-01 00:00:00': 2684,
 '2014-04-01 00:00:00': 2862,
 '2014-05-01 00:00:00': 2864,
 '2014-06-01 00:00:00': 2931,
 '2014-07-01 00:00:00': 2884,
 '2014-08-01 00:00:00': 2970,
 '2014-09-01 00:00:00': 2914,
 '2014-10-

In [18]:
## Analyzing data by sex

sex_counts = {} # Count crimes by sex
for each in data:
    sex = each[5]
    if sex in sex_counts:
        sex_counts[sex] += 1
    else:
        sex_counts[sex] = 1
    
sex_counts
    

        

{'M': 86349, 'F': 14449}

In [17]:
## Analyzing data by race

race_counts = {}
for each in data:
    race = each[7]
    if race in race_counts:
        race_counts[race] += 1
    else:
        
        race_counts[race] = 1
        
race_counts

{'Asian/Pacific Islander': 1326,
 'White': 66237,
 'Native American/Native Alaskan': 917,
 'Black': 23296,
 'Hispanic': 9022}

In [22]:
## Let`s count the amount of killed by gun out of every 100 000 people from 2012 to 2014 years

f = open("census.csv", "r")
census = list(csv.reader(f))


# we need to create dictionary manualy because racial categories are named slightly differently in census and in data
mapping = {
    "Asian/Pacific Islander": int(census[1][14]) + int(census[1][15]),
    "Black": int(census[1][12]),
    "Hispanic": int(census[1][11]),
    "Native American/Native Alaskan": int(census[1][13]),
    "White": int(census[1][10])
    }

mapping

{'Asian/Pacific Islander': 15834141,
 'Black': 40250635,
 'Hispanic': 44618105,
 'Native American/Native Alaskan': 3739506,
 'White': 197318956}

In [23]:
## Computing Rates Of Gun Deaths Per Race

# using .item() method we can count the amount of killed by gun by race out of 100 000 people using values from 2 vocabularies

race_per_hundredk = {} 
for key, value in race_counts.items():  
    race_per_hundredk[key] = (value/mapping[key])*100000
    
race_per_hundredk

{'Asian/Pacific Islander': 8.374309664161762,
 'White': 33.56849303419181,
 'Native American/Native Alaskan': 24.521955573811088,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907}

In [25]:
## Let`s filter out results, and restrict them to the Homicide intent

intents = [each[3] for each in data] # Extract the intent column from data

races = [each[7] for each in data]  # Extract the race column from data


homicide_race_counts = {}

for i, race in enumerate(races): # Count the amount of people killed by Homicide intent by race
    if intents[i] == "Homicide":
        if race in homicide_race_counts:
            homicide_race_counts[race] += 1
        else:
            homicide_race_counts[race] = 1
            
homicide_race_counts



{'White': 9147,
 'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Native American/Native Alaskan': 326,
 'Hispanic': 5634}

In [26]:
homicide_race_per_hundredk = {}

# Let`s find the amount of killed by homicide out of 100 000 people by race
for key, value in homicide_race_counts.items():
    homicide_race_per_hundredk[key] = (value/mapping[key]*100000)
    
homicide_race_per_hundredk



{'White': 4.6356417981453335,
 'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Native American/Native Alaskan': 8.717729026240365,
 'Hispanic': 12.627161104219914}

In [None]:
## As we can see the highest amount of people killed by homicide are from black and hispanic races