### Analyzing Gun Deaths in the US
The dataset can be found https://github.com/fivethirtyeight/guns-data 
It contains information on gun deaths in the US from 2012 to 2014.
Each row in the dataset represents a single fatality. The columns contain demographic and other information about the victim. 

In [68]:
import csv
import datetime

# Open the file using the open() function.
f = open("guns.csv", "r")
# Use the csv.reader() function to load the opened file.
csvreader = csv.reader(f)
# Convert csvreader variable to list and assign it to data variable.
data = list(csvreader)
# Extracting first row of data, and assign it to the variable headers.
headers = data[:1]
# Remove the first row from data.
data = data[1:]
# Display headers.
print(headers)
# Display first 5 rows of data.
print(data[:5])

[['', 'year', 'month', 'intent', 'police', 'sex', 'age', 'race', 'hispanic', 'place', 'education']]
[['1', '2012', '01', 'Suicide', '0', 'M', '34', 'Asian/Pacific Islander', '100', 'Home', '4'], ['2', '2012', '01', 'Suicide', '0', 'F', '21', 'White', '100', 'Street', '3'], ['3', '2012', '01', 'Suicide', '0', 'M', '60', 'White', '100', 'Other specified', '4'], ['4', '2012', '02', 'Suicide', '0', 'M', '64', 'White', '100', 'Home', '4'], ['5', '2012', '02', 'Suicide', '0', 'M', '31', 'White', '100', 'Other specified', '2']]


In [69]:
year_counts = dict()
for row in data:
    years = row[1]
    if years in year_counts:
        year_counts[years] += 1
    else:
        year_counts[years] = 1

In [70]:
dates = []
date_counts = dict()
for row in data:
    dates.append(datetime.datetime(year=int(row[1]), month=int(row[2]), day=1))

for row in dates:
    if row in date_counts:
        date_counts[row] += 1
    else:
        date_counts[row] = 1
        
print(date_counts)
   

{datetime.datetime(2014, 9, 1, 0, 0): 2914, datetime.datetime(2012, 3, 1, 0, 0): 2743, datetime.datetime(2012, 7, 1, 0, 0): 3026, datetime.datetime(2014, 2, 1, 0, 0): 2361, datetime.datetime(2012, 12, 1, 0, 0): 2791, datetime.datetime(2012, 2, 1, 0, 0): 2357, datetime.datetime(2012, 9, 1, 0, 0): 2852, datetime.datetime(2012, 5, 1, 0, 0): 2999, datetime.datetime(2014, 12, 1, 0, 0): 2857, datetime.datetime(2013, 12, 1, 0, 0): 2765, datetime.datetime(2013, 9, 1, 0, 0): 2742, datetime.datetime(2013, 6, 1, 0, 0): 2920, datetime.datetime(2012, 1, 1, 0, 0): 2758, datetime.datetime(2014, 5, 1, 0, 0): 2864, datetime.datetime(2013, 4, 1, 0, 0): 2798, datetime.datetime(2014, 3, 1, 0, 0): 2684, datetime.datetime(2013, 11, 1, 0, 0): 2758, datetime.datetime(2012, 11, 1, 0, 0): 2729, datetime.datetime(2013, 5, 1, 0, 0): 2806, datetime.datetime(2012, 10, 1, 0, 0): 2733, datetime.datetime(2014, 10, 1, 0, 0): 2865, datetime.datetime(2014, 4, 1, 0, 0): 2862, datetime.datetime(2012, 6, 1, 0, 0): 2826, dat

In [71]:
sex_counts = dict()
for row in data:
    sex = row[5]
    if sex in sex_counts:
        sex_counts[sex] +=1
    else:
        sex_counts[sex] = 1
        
race_counts = dict()
for row in data:
    race = row[7]
    if race in race_counts:
        race_counts[race] +=1
    else:
        race_counts[race] = 1


### Learnings
- Reading file and get it in to a list
- Extracting
- Dictionary
- Grouping or categorizing, and finding their values
- datetime

In [72]:
f = open("census.csv", "r")
csvreader = csv.reader(f)
census = list(csvreader)

print(census)

[['Id', 'Year', 'Id', 'Sex', 'Id', 'Hispanic Origin', 'Id', 'Id2', 'Geography', 'Total', 'Race Alone - White', 'Race Alone - Hispanic', 'Race Alone - Black or African American', 'Race Alone - American Indian and Alaska Native', 'Race Alone - Asian', 'Race Alone - Native Hawaiian and Other Pacific Islander', 'Two or More Races'], ['cen42010', 'April 1, 2010 Census', 'totsex', 'Both Sexes', 'tothisp', 'Total', '0100000US', '', 'United States', '308745538', '197318956', '44618105', '40250635', '3739506', '15159516', '674625', '6984195']]


In [73]:
mapping = {
    "Asian/Pacific Islander": float(census[1][14]) + float(census[1][15]),
    "Black": float(census[1][12]),
    "Native American/Native Alaskan": float(census[1][13]),
    "Hispanic": float(census[1][11]),
    "White": float(census[1][10])
}

race_per_hundredk = dict()
    
for key, value in race_counts.items():
    if key in mapping:
        race_per_hundredk[key] = (float(value) / float(mapping[key])) * 100000
        
print(race_per_hundredk)

{'Black': 57.8773477735196, 'Native American/Native Alaskan': 24.521955573811088, 'Asian/Pacific Islander': 8.374309664161762, 'White': 33.56849303419181, 'Hispanic': 20.220491210910907}


In [92]:
intents = []
races = []
for row in data:
    intents.append(row[3])
    races.append(row[7])

homicide_race_counts = dict()    
for i, race in enumerate(races):
    if intents[i] == "Homicide":
        if race in homicide_race_counts:
            homicide_race_counts[race] +=1
        else:
            homicide_race_counts[race] = 1
                
print(homicide_race_counts)
        
        

{'Black': 19510, 'Native American/Native Alaskan': 326, 'Asian/Pacific Islander': 559, 'Hispanic': 5634, 'White': 9147}


#### Findings
- Compare and count intersected parts of two columns
- Extracting columns