# Exploring Gun Deaths in the US

## Data Preparing

In [7]:
import csv
from collections import defaultdict
class Dataset():
    def __init__(self, file_name):
        try:
            f = open(file_name, "r")
            data_with_header = list(csv.reader(f))
            self.header = data_with_header[0]
            self.data = data_with_header[1:]
        except Execptions:
            print("check the file name and type")
    
    def get_data(self):
        return self.data
    
    def get_headers(self):
        return self.header
    
    def _get_column_index(self, column_name):
        index = -1
        for i, name in enumerate(self.header):
            if name == column_name:
                index = i
                break
        return index
    
    def extract_column(self, column_name):
        column_index = self._get_column_index(column_name)
        column = []
        for row in self.data:
            column.append(row[column_index])
        return column
    
    def column_value_count(self, column_name):
        the_column = self.extract_column(column_name)
        column_dict = defaultdict(int)
        for value in the_column:
            column_dict[value] += 1
        return column_dict

In [8]:
guns_dataset = Dataset("guns.csv")
print("#### Headers:")
print(guns_dataset.get_headers())
data = guns_dataset.get_data()
print("#### Data:")
print(data[:5])

#### Headers:
['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']
#### Data:
[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


In [11]:
year_counts = guns_dataset.column_value_count('year')

In [12]:
year_counts

defaultdict(int, {'2012': 33563, '2013': 33636, '2014': 33599})

## Adding dates

In [14]:
import datetime

def get_dates(data):
    dates = []
    for row in data:
        y = int(row[1])
        m = int(row[2])
        d = 1
        date = datetime.datetime(year=y, month=m, day=d)
        dates.append(date)
    return dates

In [15]:
dates = get_dates(data)

In [16]:
print(dates[:5])

[datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0), datetime.datetime(2012, 2, 1, 0, 0)]


In [24]:
date_counts = {}
for date in dates:
    if date in date_counts:
        date_counts[date] += 1
    else:
        date_counts[date] = 1
date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

In [25]:
sex_counts = guns_dataset.column_value_count('sex')

In [26]:
sex_counts

defaultdict(int, {'F': 14449, 'M': 86349})

In [27]:
race_counts = guns_dataset.column_value_count('race')

In [28]:
race_counts

defaultdict(int,
            {'Asian/Pacific Islander': 1326,
             'Black': 23296,
             'Hispanic': 9022,
             'Native American/Native Alaskan': 917,
             'White': 66237})

While using functions blocks, we could easily analyse the counts for all columns and try to analyze its distributions.

## Census Information

In [29]:
census_dataset = Dataset("census.csv")

In [30]:
print(census_dataset.get_headers())

['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races']


In [31]:
print(census_dataset.get_data())

[['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]


In [32]:
for i, name in enumerate(census_dataset.get_headers()):
    print(i, name)

0 Id
1 Year
2 Id
3 Sex
4 Id
5 Hispanic Origin
6 Id
7 Id2
8 Geography
9 Total
10 Race Alone - White
11 Race Alone - Hispanic
12 Race Alone - Black or African American
13 Race Alone - American Indian and Alaska Native
14 Race Alone - Asian
15 Race Alone - Native Hawaiian and Other Pacific Islander
16 Two or More Races


In [34]:
population = census_dataset.get_data()[0]
population_count = {}
population_count['Asian/Pacific Islander'] = int(population[14])+int(population[15])
population_count['Black'] = int(population[12])
population_count['Hispanic'] = int(population[11])
population_count['Native American/Native Alaskan'] = int(population[13])
population_count['White'] = int(population[10])
population_count

{'Asian/Pacific Islander': 15834141,
 'Black': 40250635,
 'Hispanic': 44618105,
 'Native American/Native Alaskan': 3739506,
 'White': 197318956}

In [35]:
race_per_hundredk = {}
for race in race_counts:
    guns = race_counts[race]
    pop = population_count[race]
    race_per_hundredk[race] = float(guns)/float(pop) * 100000

In [36]:
race_per_hundredk

{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}

## Focus on Homicide

In [38]:
intent_column = guns_dataset.extract_column('intent')
race_column = guns_dataset.extract_column('race')
homicide_race_counts = defaultdict(int)
for i, intent in enumerate(intent_column):
    if intent == 'Homicide':
        homicide_race_counts[race_column[i]] += 1

In [None]:
race_homicide_per_hundredk = {}
for race in race_counts:
    guns = race_counts[race]
    pop = population_count[race]
    race_per_hundredk[race] = float(guns)/float(pop) * 100000