## This project is about analyzing data on gun deaths in the US from 2012 to 2014.

## Import packages

In [20]:
import csv

## Load data

In [21]:
data = list(csv.reader(open('guns.csv', 'r')))

## Display the first 5 row to verify everything

In [22]:
data[:5]

[['',
  'year',
  'month',
  'intent',
  'police',
  'sex',
  'age',
  'race',
  'hispanic',
  'place',
  'education'],
 ['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]

## Seperate the header and data

In [23]:
headers = data[0]
data = data[1:]


In [24]:
headers

['',
 'year',
 'month',
 'intent',
 'police',
 'sex',
 'age',
 'race',
 'hispanic',
 'place',
 'education']

In [25]:
data[:5]

[['1',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '34',
  'Asian/Pacific Islander',
  '100',
  'Home',
  '4'],
 ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'],
 ['3',
  '2012',
  '01',
  'Suicide',
  '0',
  'M',
  '60',
  'White',
  '100',
  'Other specified',
  '4'],
 ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'],
 ['5',
  '2012',
  '02',
  'Suicide',
  '0',
  'M',
  '31',
  'White',
  '100',
  'Other specified',
  '2']]

## Calculate how many gun deaths happened in each year.

In [28]:
years = [int(row[1]) for row in data]

In [32]:
year_counts = {}
for row in data:
    year = row[1]
    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1
year_counts



{'2012': 33563, '2013': 33636, '2014': 33599}

## Looks like gun deaths didn't change much by year from 2012 to 2014,  then try to group data by month and year.

In [36]:
import datetime

dates = [datetime.datetime(year=int(row[1]), month=int(row[2]), day=1) for row in data]

In [40]:
date_counts = {}
for date in dates:
    if date in date_counts:
        date_counts[date]  += 1
    else:
        date_counts[date] = 1
        
date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

## Caculate sex counts

In [41]:
sex_counts = {}
sex = set([row[5] for row in data])

In [43]:
for row in data:
    sex = row[5]
    if sex in sex_counts:
        sex_counts[sex] += 1
    else:
        sex_counts[sex] = 1
sex_counts

{'F': 14449, 'M': 86349}

In [79]:
race_counts = {}
race = set([row[7] for row in data])

In [80]:
for row in data:
    race = row[7]
    if race in race_counts:
        race_counts[race] += 1
    else:
        race_counts[race] = 1

race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

## According to above anaysis, the gun death in US from 2012 to 2014 didn't change much from year to year and month to month. But femle is about 14.33%, White is about 65.71%, Black is about 23.11%, the rest 11.18% is Asian and Hispanic and Native American.


## The gun death difference among race may due to population differece, but the difference between female and male is out of expectation.

### Luckily we have data on population of the US to dig further

### Load population data

In [81]:
census = list(csv.reader(open('census.csv', 'r')))

In [82]:
census[:2]

[['Id',
  'Year',
  'Id',
  'Sex',
  'Id',
  'Hispanic Origin',
  'Id',
  'Id2',
  'Geography',
  'Total',
  'Race Alone - White',
  'Race Alone - Hispanic',
  'Race Alone - Black or African American',
  'Race Alone - American Indian and Alaska Native',
  'Race Alone - Asian',
  'Race Alone - Native Hawaiian and Other Pacific Islander',
  'Two or More Races'],
 ['cen42010',
  'April 1, 2010 Census',
  'totsex',
  'Both Sexes',
  'tothisp',
  'Total',
  '0100000US',
  '',
  'United States',
  '308745538',
  '197318956',
  '44618105',
  '40250635',
  '3739506',
  '15159516',
  '674625',
  '6984195']]

In [83]:
census[0][0]


'Id'

### Mapping each key from race_counts to the population count of the race from census

In [84]:
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

In [85]:
mapping = {}

mapping['Asian/Pacific Islander'] = int(census[1][14]) + int(census[1][15])
mapping['Black'] = int(census[1][12])
mapping['Hispanic'] = int(census[1][11])
mapping['Native American/Native Alaskan'] = int(census[1][13])
mapping['White'] = int(census[1][10])

mapping

{'Asian/Pacific Islander': 15834141,
 'Black': 40250635,
 'Hispanic': 44618105,
 'Native American/Native Alaskan': 3739506,
 'White': 197318956}

In [86]:
race_per_hundredk = {}
for race, counts in race_counts.items():
    race_per_hundredk[race] = counts / mapping[race] *100000
    
race_per_hundredk

{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}

In [107]:
### Explor data by homicide and race

In [104]:
intents = [row[3] for row in data]
races = [row[7] for row in data]
homicide_race_counts = {}
for idx, race in enumerate(races):
    if intents[idx] == 'Homicide':
        if race in homicide_race_counts:
            homicide_race_counts[races[idx]] += 1
        else:
            homicide_race_counts[races[idx]] = 1
homicide_race_counts

{'Asian/Pacific Islander': 559,
 'Black': 19510,
 'Hispanic': 5634,
 'Native American/Native Alaskan': 326,
 'White': 9147}

In [106]:
homeicide_race_per_hundredk = {}
for race, counts in homicide_race_counts.items():
    homeicide_race_per_hundredk[race] = counts / mapping[race]*100000
    
homeicide_race_per_hundredk

{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}