In [55]:
import csv
import datetime
import re

In [53]:
f_guns = open("guns.csv")
raw_data = csv.reader(f_guns)
data = list(raw_data)
data[:5]

[['',
  'year',
  'month',
  'intent',
  'police',
  'sex',
  'age',
  'race',
  'hispanic',
  'place',
  'education'],
 ['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]

In [3]:
headers = data[0]
data = data[1:]

In [4]:
headers

['',
 'year',
 'month',
 'intent',
 'police',
 'sex',
 'age',
 'race',
 'hispanic',
 'place',
 'education']

In [5]:
data[:5]

[['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'],
 ['5',
  '2012',
  '02',
  'Suicide',
  '0',
  'M',
  '31',
  'White',
  '100',
  'Other specified',
  '2']]

In [6]:
#the year column 
years = [row[1] for row in data]

In [7]:
#to see how much gun death occur each year
year_counts = {}
for row in data:
    year = row[1]
    if year in year_counts:
        year_counts[year] = year_counts[year] + 1
    else:
        year_counts[year] = 1
year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

In [8]:
#a list of datetime object
dates = [datetime.datetime(year = int(row[1]), month = int(row[2]), day = 1) for row in data]

In [9]:
#display the first five object
dates[:5]

[datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0)]

In [10]:
#to see num of unique dates
date_counts = {}
for item in dates:
    if item in date_counts:
        date_counts[item] = date_counts[item] + 1
    else:
        date_counts[item] = 1
date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

In [11]:
#to see num of male and female
sex_counts = {}
for row in data:
    sex = row[5]
    if sex in sex_counts:
        sex_counts[sex] = sex_counts[sex] + 1
    else:
        sex_counts[sex] = 1
sex_counts

{'F': 14449, 'M': 86349}

In [12]:
#num of race involved in gun violence
race_counts = {}
for row in data:
    race = row[7]
    if race in race_counts:
        race_counts[race] = race_counts[race] + 1
    else:
        race_counts[race] = 1
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

Looking at the numbers, white people and male have the highest death count due to guns. White alone makes up about 74% of the population in the US(https://en.wikipedia.org/wiki/Race_and_ethnicity_in_the_United_States), so it makes sense that there are more white gun death than any other race. However, this information alone is not enough for us to make any useful insight. We need to look at the proportions of gun death in each race in order to find out which race is more likely to fall victim to gun violence.

Although the number of female and male in the US are roughly the same(https://www.statista.com/statistics/301946/us-population-males-per-100-females-by-state/), there are more male death than female death due to guns. One possible reason may be that females are less likely to get involved in such situations, but we won't be able to get an answer without looking at more data. One important detail that might be helpful is how a person died to a gun. For example, women are less likely to use gun as a method because they tend to use less violent methods. Whereas men are more likely to use methods such as guns and sharp objects in their sucide attempt(https://www.verywellmind.com/gender-differences-in-suicide-methods-1067508). So, it is reasonable to assume that the majority of suicide due to gun in this data are male.

In [13]:
f_census = open("census.csv")
census = list(csv.reader(f_census))
census

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

In [14]:
mapping = {
    "Asian/Pacific Islander": 15159516 + 674625,
    "Native American/Native Alaskan": 3739506,
    "Black": 40250635,
    "Hispanic": 44618105,
    "White": 197318956
}

In [15]:
race_per_hundredk = {}
for key, value in race_counts.items():
    race_per_hundredk[key] = (value/mapping[key])*100000

In [16]:
race_per_hundredk

{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}

In [17]:
intents = [row[3] for row in data]
intents

['Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Undetermined',
 'Suicide',
 'Accidental',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Homicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Homicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Homicide',
 'Suicide',
 'Homicide',
 'Homicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Homicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Homicide',
 'Undetermined',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Homicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Homicide',
 'Homicide',
 'Homicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Undetermined',
 'Suicide',
 'Suicide',
 'Accidental',
 'Homicide',
 'Suicide',
 'Homicide',
 'Suicide',
 'Homicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Suicide',
 'Accidental',
 'Suicide',
 'Accidental',
 'Suicide',
 'Undetermined',
 'Suicide',
 'Suicide',
 'Suicide',
 'Homi

In [18]:
races = [row[7] for row in data]
races

['Asian/Pacific Islander',
 'White',
 'White',
 'White',
 'White',
 'Native American/Native Alaskan',
 'White',
 'Native American/Native Alaskan',
 'White',
 'Black',
 'White',
 'Native American/Native Alaskan',
 'White',
 'Native American/Native Alaskan',
 'White',
 'Native American/Native Alaskan',
 'White',
 'Native American/Native Alaskan',
 'Asian/Pacific Islander',
 'White',
 'Native American/Native Alaskan',
 'White',
 'Black',
 'White',
 'White',
 'White',
 'Native American/Native Alaskan',
 'White',
 'White',
 'White',
 'Native American/Native Alaskan',
 'White',
 'Native American/Native Alaskan',
 'White',
 'White',
 'White',
 'Native American/Native Alaskan',
 'Native American/Native Alaskan',
 'White',
 'White',
 'Native American/Native Alaskan',
 'Asian/Pacific Islander',
 'White',
 'Native American/Native Alaskan',
 'Native American/Native Alaskan',
 'White',
 'White',
 'White',
 'Native American/Native Alaskan',
 'Asian/Pacific Islander',
 'White',
 'White',
 'Native Ame

In [19]:
homicide_race_counts = {}
for i, race in enumerate(races):
    if intents[i] == "Homicide":
        if race in homicide_race_counts:
            homicide_race_counts[race] = homicide_race_counts[race] + 1
        else:
            homicide_race_counts[race] = 1
for key, value in mapping.items():
    homicide_race_counts[key] = (homicide_race_counts[key]/value)*100000
homicide_race_counts

{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}

About 48 Black out of every 100,000 died in a gun-related murder. Although white people had the most death due to guns, only 4 White out of every 100,000 died in a gun-related murder. This shows the importance of looking at proportions and not just the raw numbers.

In [31]:
#let's see if there is any link between the month and homicide rate
month = [date.month for date in dates]
homicide_month_counts = {}
for i, month in enumerate(month):
    if intents[i] == "Homicide":
        if month in homicide_month_counts:
            homicide_month_counts[month] = homicide_month_counts[month] + 1
        else:
            homicide_month_counts[month] = 1
homicide_month_counts

{1: 2829,
 2: 2178,
 3: 2780,
 4: 2845,
 5: 2976,
 6: 3130,
 7: 3269,
 8: 3125,
 9: 2966,
 10: 2968,
 11: 2919,
 12: 3191}

All of the months have roughly the same amount of gun-related murder except February. The number of gun-related murder in February is very different from that of the other months. To further explore this abnomally, we will take a look at the month and year to see if February is indeed a month where gun-related murder is at its lowest.

In [47]:
month_year = [date.strftime("%m/%Y") for date in dates]
month_year

['01/2012',
 '01/2012',
 '01/2012',
 '02/2012',
 '02/2012',
 '02/2012',
 '02/2012',
 '03/2012',
 '02/2012',
 '02/2012',
 '02/2012',
 '01/2012',
 '03/2012',
 '03/2012',
 '03/2012',
 '03/2012',
 '03/2012',
 '03/2012',
 '03/2012',
 '04/2012',
 '04/2012',
 '04/2012',
 '04/2012',
 '04/2012',
 '04/2012',
 '04/2012',
 '04/2012',
 '04/2012',
 '04/2012',
 '04/2012',
 '04/2012',
 '04/2012',
 '04/2012',
 '05/2012',
 '04/2012',
 '04/2012',
 '05/2012',
 '05/2012',
 '05/2012',
 '05/2012',
 '05/2012',
 '06/2012',
 '06/2012',
 '06/2012',
 '06/2012',
 '06/2012',
 '06/2012',
 '07/2012',
 '06/2012',
 '06/2012',
 '07/2012',
 '07/2012',
 '06/2012',
 '06/2012',
 '07/2012',
 '07/2012',
 '07/2012',
 '07/2012',
 '07/2012',
 '07/2012',
 '07/2012',
 '08/2012',
 '08/2012',
 '07/2012',
 '07/2012',
 '08/2012',
 '08/2012',
 '08/2012',
 '08/2012',
 '08/2012',
 '08/2012',
 '08/2012',
 '08/2012',
 '09/2012',
 '09/2012',
 '09/2012',
 '09/2012',
 '09/2012',
 '09/2012',
 '09/2012',
 '09/2012',
 '09/2012',
 '10/2012',
 '09

In [50]:
import re
re.findall("[2][0][1][2]", "this is the year 2012 and not and the year 2014")

['2012']

In [46]:
m_y_homicide_count = {}
for i, value in enumerate(month_year):
    if intents[i] == "Homicide":
        if value in m_y_homicide_count:
            m_y_homicide_count[value] = m_y_homicide_count[value] + 1
        else:
            m_y_homicide_count[value] = 1
m_y_homicide_count

{'01/2012': 972,
 '01/2013': 986,
 '01/2014': 871,
 '02/2012': 749,
 '02/2013': 721,
 '02/2014': 708,
 '03/2012': 966,
 '03/2013': 923,
 '03/2014': 891,
 '04/2012': 999,
 '04/2013': 916,
 '04/2014': 930,
 '05/2012': 1003,
 '05/2013': 955,
 '05/2014': 1018,
 '06/2012': 1044,
 '06/2013': 1066,
 '06/2014': 1020,
 '07/2012': 1160,
 '07/2013': 1137,
 '07/2014': 972,
 '08/2012': 1090,
 '08/2013': 1000,
 '08/2014': 1035,
 '09/2012': 1070,
 '09/2013': 954,
 '09/2014': 942,
 '10/2012': 979,
 '10/2013': 1009,
 '10/2014': 980,
 '11/2012': 978,
 '11/2013': 979,
 '11/2014': 962,
 '12/2012': 1083,
 '12/2013': 1028,
 '12/2014': 1080}

We will seperate the data by year so it will be easier to read

In [63]:
homicide_2012_count = {}
homicide_2013_count = {}
homicide_2014_count = {}

for i, value in m_y_homicide_count.items():
    if re.search("[2][0][1][2]", i) is not None:
        homicide_2012_count[i] = value
    if re.search("[2][0][1][3]", i) is not None:
        homicide_2013_count[i] = value
    if re.search("[2][0][1][4]", i) is not None:
        homicide_2014_count[i] = value

In [64]:
homicide_2012_count

{'01/2012': 972,
 '02/2012': 749,
 '03/2012': 966,
 '04/2012': 999,
 '05/2012': 1003,
 '06/2012': 1044,
 '07/2012': 1160,
 '08/2012': 1090,
 '09/2012': 1070,
 '10/2012': 979,
 '11/2012': 978,
 '12/2012': 1083}

In [65]:
homicide_2013_count

{'01/2013': 986,
 '02/2013': 721,
 '03/2013': 923,
 '04/2013': 916,
 '05/2013': 955,
 '06/2013': 1066,
 '07/2013': 1137,
 '08/2013': 1000,
 '09/2013': 954,
 '10/2013': 1009,
 '11/2013': 979,
 '12/2013': 1028}

In [66]:
homicide_2014_count

{'01/2014': 871,
 '02/2014': 708,
 '03/2014': 891,
 '04/2014': 930,
 '05/2014': 1018,
 '06/2014': 1020,
 '07/2014': 972,
 '08/2014': 1035,
 '09/2014': 942,
 '10/2014': 980,
 '11/2014': 962,
 '12/2014': 1080}

From looking at the number of gun-related murder by month in the three years, February is the month where there is significantly less gun-related murders than in other months. 