In [1]:
import csv
def open_csv(name):
    f = open(name, "r")
    csvreader = csv.reader(f)
    data = list(csvreader)
    f.close()
    return data

In [2]:
data = open_csv("guns.csv")
data[:5]

[['',
  'year',
  'month',
  'intent',
  'police',
  'sex',
  'age',
  'race',
  'hispanic',
  'place',
  'education'],
 ['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]

In [3]:
headers = data[0]
data = data[1:]

In [4]:
years = [row[1] for row in data]

In [5]:
def count_occurences(items):
    occ_counts = {}
    for item in items:
        if item in occ_counts:
            occ_counts[item] += 1
        else:
            occ_counts[item] = 1
    return occ_counts

year_counts = count_occurences(years)
year_counts

{'2012': 33563, '2013': 33636, '2014': 33599}

### No notable yearly patterns/trends detected

In [6]:
import datetime

In [7]:
dates = [datetime.datetime(year=int(row[1]), month=int(row[2]), day=1) for row in data]
#dates[-3:]

In [8]:
date_counts = count_occurences(dates)
date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

### No notable monthly patterns/trends detected

In [9]:
sex_counts = count_occurences([row[5] for row in data])
sex_counts

{'F': 14449, 'M': 86349}

### Notable: Male gun deaths 6 times more frequent than Female

In [11]:
race_counts = count_occurences([row[7] for row in data])
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

## Notable: 
* White gun deaths most frequent
* Black ones almost 3 times less frequent than White
* Hispanic more than 6 times less frequent than White
* Other ethnicities 60 times less frequent 
To be relevant, analysis should compare death percentages with population sizes

In [12]:
census = open_csv("census.csv")
census[:5]

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

In [13]:
race_count_mapping = {'White':197318956, 'Hispanic':44618105, 'Black':40250635, 'Native American/Native Alaskan':3739506, 'Asian/Pacific Islander':15159516+674625}

In [17]:
race_per_hundredk = {}
for race, deaths in race_counts.items():
    pop = race_count_mapping[race]
    race_per_hundredk[race] = deaths / pop * 100000
race_per_hundredk

{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}

### Notable:
When adjusted by population count, number of deaths in the Black population is the largest! Almost 1.7 times larger than White & 2.85 times larger than Hispanic

In [18]:
intents = [row[3] for row in data]
intents[:5]

['Suicide', 'Suicide', 'Suicide', 'Suicide', 'Suicide']

In [19]:
races = [row[7] for row in data]

In [21]:
homicide_race_counts = {}
for i, race in enumerate(races):
    if intents[i] == "Homicide":
        if race in homicide_race_counts:
            homicide_race_counts[race] += 1
        else:
            homicide_race_counts[race] = 1

homicide_race_counts

{'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Hispanic': 5634,
 'Native American/Native Alaskan': 326,
 'White': 9147}

In [22]:
homicide_race_per_hundredk = {}
for race, deaths in homicide_race_counts.items():
    pop = race_count_mapping[race]
    homicide_race_per_hundredk[race] = deaths / pop * 100000
homicide_race_per_hundredk

{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}

### Notable!
When filtering for gun deaths marked as Homicide, Black deaths lead. Hispanic become number 2 (4 times less frequent, though) while White move to number 4. 

Suicide rates in White population appear to be really high (perhaps worth investigating).

Interestingly, larger number of Homicides in native american/alaskan population than White or Asia/Pacific.

Worthwhile investigating: other causes of death, other features (location, education). Understand the purpose of a separate feature 'hispanic' 