### Read Dataset: Gun Deaths in the US

In [1]:
import csv
f = open("guns.csv", "r")
data = list(csv.reader(f))



In [2]:
print(data[:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education'], ['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4']]


### Remove Header from Dataset

In [3]:
headers = data[:1]
data = data[1:]

In [4]:
print(headers)
print(data[:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']]
[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


### List Comprehension to Extract Yearly Counts

In [5]:
years = [row[1] for row in data]
year_counts = {}

for value in years:
    if value not in year_counts:
       year_counts[value] = 1
    else:
        year_counts[value] += 1

year_counts


{'2012': 33563, '2013': 33636, '2014': 33599}

### List Comprehension to Explore by Month & Year

In [6]:
import datetime

dates = [datetime.datetime(year=int(row[1]), month=int(row[2]), day=1) for row in data]
dates[:5]



[datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 1, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0),
 datetime.datetime(2012, 2, 1, 0, 0)]

In [7]:
date_counts = {}

for date in dates:
    if date not in date_counts:
        date_counts[date] = 1
    else:
        date_counts[date] += 1

date_counts

{datetime.datetime(2012, 1, 1, 0, 0): 2758,
 datetime.datetime(2012, 2, 1, 0, 0): 2357,
 datetime.datetime(2012, 3, 1, 0, 0): 2743,
 datetime.datetime(2012, 4, 1, 0, 0): 2795,
 datetime.datetime(2012, 5, 1, 0, 0): 2999,
 datetime.datetime(2012, 6, 1, 0, 0): 2826,
 datetime.datetime(2012, 7, 1, 0, 0): 3026,
 datetime.datetime(2012, 8, 1, 0, 0): 2954,
 datetime.datetime(2012, 9, 1, 0, 0): 2852,
 datetime.datetime(2012, 10, 1, 0, 0): 2733,
 datetime.datetime(2012, 11, 1, 0, 0): 2729,
 datetime.datetime(2012, 12, 1, 0, 0): 2791,
 datetime.datetime(2013, 1, 1, 0, 0): 2864,
 datetime.datetime(2013, 2, 1, 0, 0): 2375,
 datetime.datetime(2013, 3, 1, 0, 0): 2862,
 datetime.datetime(2013, 4, 1, 0, 0): 2798,
 datetime.datetime(2013, 5, 1, 0, 0): 2806,
 datetime.datetime(2013, 6, 1, 0, 0): 2920,
 datetime.datetime(2013, 7, 1, 0, 0): 3079,
 datetime.datetime(2013, 8, 1, 0, 0): 2859,
 datetime.datetime(2013, 9, 1, 0, 0): 2742,
 datetime.datetime(2013, 10, 1, 0, 0): 2808,
 datetime.datetime(2013, 11,

### Identifying Gender & Race Counts

In [8]:
sex_counts = {}
sex_gender = [row[5] for row in data]

for sex in sex_gender:
    if sex not in sex_counts:
        sex_counts[sex] = 0
    sex_counts[sex] += 1
    
sex_counts

{'F': 14449, 'M': 86349}

In [9]:
race_counts = {}
races = [row[7] for row in data]

for race in races:
    if race not in race_counts:
        race_counts[race] = 0
    race_counts[race] += 1
    
race_counts

{'Asian/Pacific Islander': 1326,
 'Black': 23296,
 'Hispanic': 9022,
 'Native American/Native Alaskan': 917,
 'White': 66237}

### Reading Second Dataset

In [10]:
c = open("census.csv", "r")
census = list(csv.reader(c))
print(census)

[['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races'], ['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]


### Rates of Gun Deaths per Race

In [11]:
mapping = {'Asian/Pacific Islander': 15159516 + 674625 ,
 'Black': 40250635,
 'Hispanic': 44618105,
 'Native American/Native Alaskan': 3739506,
 'White': 197318956}

race_per_hundredk = {}

for race, population in race_counts.items():
    race_per_hundredk[race] = (population / mapping[race]) * 100000

race_per_hundredk


{'Asian/Pacific Islander': 8.374309664161762,
 'Black': 57.8773477735196,
 'Hispanic': 20.220491210910907,
 'Native American/Native Alaskan': 24.521955573811088,
 'White': 33.56849303419181}

### Rate of Homicides per Race 

In [12]:
intents = [row[3] for row in data]
homicide_race_counts = {}

for i, race in enumerate(races):
    if race not in homicide_race_counts:
        homicide_race_counts[race] = 0
    if intents[i] == "Homicide":
        homicide_race_counts[race] += 1

homicide_per_hundredk = {}   
        
for race, count in homicide_race_counts.items():
    homicide_per_hundredk[race] = (count/mapping[race]) * 100000

homicide_per_hundredk


{'Asian/Pacific Islander': 3.530346230970155,
 'Black': 48.471284987180944,
 'Hispanic': 12.627161104219914,
 'Native American/Native Alaskan': 8.717729026240365,
 'White': 4.6356417981453335}

### Key Findings

Most victims of gun related fatalities in relation to race are White (66,237), followed by Black (23,296). However, when exploring deaths due to "homicide" taking into consideration the rate of victims per 100,000 population, most fatalities are incurred by Black (48.4), followed by Hispanics (12.6). 

## Filtered by Intent Rate Calculator

In [13]:
total_population = sum([mapping[population] for population in mapping])

def rate_calculator(column, intent_name):
    data_column = [row[column] for row in data]
    intent_count = {}

    for i, datapoint in enumerate(data_column):
        if datapoint not in intent_count:
            intent_count[datapoint] = 0
        if intents[i] == intent_name:
            intent_count[datapoint] += 1

    for item, count in intent_count.items():
        intent_count[item] = (count/total_population) * 100000
            
    return intent_count
    
homicide_monthly_rate = rate_calculator(2, "Homicide")
print(homicide_monthly_rate,"\n")

homicide_gender_rate = rate_calculator(5, "Homicide")
print(homicide_gender_rate, "\n")

accidental_monthly_rate = rate_calculator(2, "Accidental")
print(accidental_monthly_rate, "\n")

accidental_gender_rate = rate_calculator(5, "Accidental")
print(accidental_gender_rate, "\n")


{'02': 0.721762429324819, '01': 0.9374958276216315, '11': 0.9673207213953843, '12': 1.0574581781338372, '07': 1.083306419404423, '09': 0.9828959436994553, '06': 1.037243527909405, '03': 0.9212578299003661, '05': 0.9862098207854278, '10': 0.9835587191166497, '04': 0.9427980309591875, '08': 1.0355865893664187} 

{'F': 1.7805461582930453, 'M': 9.87634787932396} 

{'02': 0.04208623899185125, '01': 0.0503709317067826, '11': 0.053022033375560634, '12': 0.05931839983890846, '07': 0.04937676858099083, '09': 0.03910374961447597, '06': 0.03711542336289245, '03': 0.04440595295203203, '05': 0.038109586488684205, '10': 0.04341178982624027, '04': 0.03247599544253089, '08': 0.05434758420994965} 

{'F': 0.07224252047420136, 'M': 0.47090193391669793} 



## Filtered by Intent Rate Calculator (Valid for Race)

In [14]:
def race_rate_calc(column, intent_name):
    data_column = [row[column] for row in data]
    intent_type = intent_name
    intent_racial_count = {}

    for i, datapoint in enumerate(data_column):
        if datapoint not in intent_racial_count:
            intent_racial_count[datapoint] = 0
        if intents[i] == intent_type:
            intent_racial_count[datapoint] += 1

    for datapoint, count in intent_racial_count.items():
        intent_racial_count[datapoint] = (count/mapping[datapoint]) * 100000
            
    return intent_racial_count

accidental_race_rate = race_rate_calc(7, "Accidental")
print(accidental_race_rate, "\n")

suicide_race_rate = race_rate_calc(7, "Suicide")
print(suicide_race_rate, "\n")

undertermined_race_rate = race_rate_calc(7, "Undetermined")
print(undertermined_race_rate, "\n")

{'Hispanic': 0.3249801846133985, 'White': 0.5736904466492313, 'Asian/Pacific Islander': 0.07578560782046845, 'Native American/Native Alaskan': 0.5883130017708221, 'Black': 0.814893976206835} 

{'Hispanic': 7.106980451097149, 'White': 28.06217969245692, 'Asian/Pacific Islander': 4.705023152187416, 'Native American/Native Alaskan': 14.841532544673013, 'Black': 8.278130270491385} 

{'Hispanic': 0.16136947098044616, 'White': 0.2964743032595409, 'Asian/Pacific Islander': 0.0631546731837237, 'Native American/Native Alaskan': 0.3743810011268868, 'Black': 0.3130385396404305} 



### Gun Death Rate Correlation (Location & Education)

In [15]:
location = [row[9] for row in data]
education = [row[10] for row in data]

location_types = set(location)
location_dict = {}
counter = 1

for place in location_types:
    location_dict[place] = counter
    counter = counter + 1

location_int = [location_dict[value] for value in location]

for level, i in enumerate(education):
      if i == 'NA':
         education[level] = '5'

education_int = [int(i) for i in education]

In [16]:
import pandas as pd
corr_lst1 = list(zip(education_int, location_int))
corr_lst = pd.DataFrame(corr_lst1, columns=['Education', 'Location'])
corr_lst

corr_lst.corr()

Unnamed: 0,Education,Location
Education,1.0,0.082676
Location,0.082676,1.0
