In [1]:
#!pip install sodapy

In [2]:
import pandas as pd
import numpy as np
import requests
from tqdm import tqdm
from sodapy import Socrata

## API Key
Uncomment to prompt for API key to access [CDC](https://www.cdc.gov/nchs/pressroom/sosmap/firearm_mortality/firearm.htm) data using the [Socrata Open Data API](https://dev.socrata.com) API.

In [3]:
# import getpass
# api_key = getpass.getpass(prompt='Please enter your FRED API Key: ')

In [4]:
# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cdc.gov", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cdc.gov,
#                  MyAppToken,
#                  username="user@example.com",
#                  password="AFakePassword")

# Firearm-related injury

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("489q-934x", cause_of_death="Firearm-related injury")

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)



Verify that only <b>Firearm-related injury</b> data was retrieved.

In [5]:
results_df.head()
set(results_df['cause_of_death'])

{'Firearm-related injury'}

In [6]:
# save raw dataset to disk - to avoid API calls during development
results_df.to_csv('489q-934x-firearm-related-injury.csv', index=False)

# Tidy

We are interested in overall (disragarding age and sex) mortality rates due to firearms rates for each state. Drop rows and columns that are not required.

In [7]:
# drop all 3-month timer periods  - keep 12- month periods
results_12M_df = results_df[results_df['time_period'] == '12 months ending with quarter']
results_12M_df = results_12M_df[results_12M_df['rate_type'] == 'Crude']
results_12M_df.head()

Unnamed: 0,year_and_quarter,time_period,cause_of_death,rate_type,unit,rate_overall,rate_sex_female,rate_sex_male,rate_alaska,rate_alabama,...,rate_age_1_4,rate_age_5_14,rate_age_15_24,rate_age_25_34,rate_age_35_44,rate_age_45_54,rate_age_55_64,rate_65_74,rate_age_75_84,rate_age_85_plus
24,2020 Q1,12 months ending with quarter,Firearm-related injury,Crude,"Deaths per 100,000",12.3,3.4,21.5,23.4,21.6,...,0.5,1.0,18.0,18.6,14.7,12.8,12.2,11.1,15.2,15.9
26,2020 Q2,12 months ending with quarter,Firearm-related injury,Crude,"Deaths per 100,000",12.6,3.5,21.9,23.3,22.5,...,0.6,1.1,19.0,19.5,15.0,12.7,12.0,11.1,15.3,16.2
28,2020 Q3,12 months ending with quarter,Firearm-related injury,Crude,"Deaths per 100,000",13.1,3.6,23.0,25.3,22.8,...,0.7,1.3,20.7,21.0,15.7,13.2,12.0,10.9,15.3,15.8
30,2020 Q4,12 months ending with quarter,Firearm-related injury,Crude,"Deaths per 100,000",13.7,3.7,24.0,23.9,23.2,...,0.8,1.5,22.2,22.7,16.7,13.3,11.8,10.9,15.3,16.2
32,2021 Q1,12 months ending with quarter,Firearm-related injury,Crude,"Deaths per 100,000",14.1,3.9,24.5,23.1,24.2,...,0.8,1.7,23.0,23.6,17.3,13.6,11.7,10.9,15.1,16.0


In [8]:
tidy = results_12M_df[['year_and_quarter'] + [x for x in results_12M_df.columns if 'rate_' in x and 'rate_65_74' not in x and 'rate_age_' not in x and x not in ['rate_type', 'rate_overall', 'rate_sex_female', 'rate_sex_male']]]
tidy = tidy.dropna()
tidy.head()

Unnamed: 0,year_and_quarter,rate_alaska,rate_alabama,rate_arkansas,rate_arizona,rate_california,rate_colorado,rate_connecticut,rate_district_of_columbia,rate_delaware,...,rate_south_dakota,rate_tennessee,rate_texas,rate_utah,rate_virginia,rate_vermont,rate_washington,rate_wisconsin,rate_west_virginia,rate_wyoming
24,2020 Q1,23.4,21.6,19.5,15.7,7.6,14.8,5.5,20.6,9.8,...,12.0,19.0,12.9,12.1,12.7,11.2,11.1,10.7,17.7,25.0
26,2020 Q2,23.3,22.5,21.7,15.2,7.7,15.2,5.4,21.5,11.5,...,11.9,19.0,13.3,12.8,12.5,11.5,11.2,10.9,17.5,26.0
28,2020 Q3,25.3,22.8,21.2,16.2,8.0,16.0,5.7,21.9,12.6,...,12.3,20.0,13.6,12.9,13.1,11.5,11.3,11.8,16.3,28.7
30,2020 Q4,23.9,23.2,22.2,17.0,8.8,15.9,6.2,23.4,13.7,...,13.4,21.4,14.2,13.2,13.7,12.2,11.2,12.3,18.2,26.4
32,2021 Q1,23.1,24.2,22.6,17.1,9.0,16.7,6.3,23.7,13.9,...,15.2,22.2,14.5,13.4,13.4,12.5,11.0,12.3,17.8,25.2


Dataset is not in [tidy](!https://vita.had.co.nz/papers/tidy-data.pdf) format. Specicially, each State is a column in this table. Pivot-long - i.e. melt in Python - to tidy the dataset.

In [9]:
tidy = pd.melt(tidy, id_vars='year_and_quarter', var_name='state', value_name='mortality_per_100k')
tidy = tidy.sort_values(by=['year_and_quarter', 'state'])
tidy = tidy.reset_index(drop=True)
tidy.head()

Unnamed: 0,year_and_quarter,state,mortality_per_100k
0,2020 Q1,rate_alabama,21.6
1,2020 Q1,rate_alaska,23.4
2,2020 Q1,rate_arizona,15.7
3,2020 Q1,rate_arkansas,19.5
4,2020 Q1,rate_california,7.6


Change  values in the <b>state</b> column to actual <b>state name</b> by removing the 'rate_' prefix and capitalizing.

In [10]:
tidy['state'] = [s.replace('rate_', '').replace('_', ' ').title() for s in tidy['state']]
tidy.head()

Unnamed: 0,year_and_quarter,state,mortality_per_100k
0,2020 Q1,Alabama,21.6
1,2020 Q1,Alaska,23.4
2,2020 Q1,Arizona,15.7
3,2020 Q1,Arkansas,19.5
4,2020 Q1,California,7.6


Finally, add 2-letter state abbreviations.

In [11]:
us_state_to_abbrev = {
    'AK': 'Alaska',
    'AL': 'Alabama',
    'AR': 'Arkansas',
    'AZ': 'Arizona',
    'CA': 'California',
    'CO': 'Colorado',
    'CT': 'Connecticut',
    'DC': 'District Of Columbia',
    'DE': 'Delaware',
    'FL': 'Florida',
    'GA': 'Georgia',
    'HI': 'Hawaii',
    'IA': 'Iowa',
    'ID': 'Idaho',
    'IL': 'Illinois',
    'IN': 'Indiana',
    'KS': 'Kansas',
    'KY': 'Kentucky',
    'LA': 'Louisiana',
    'MA': 'Massachusetts',
    'MD': 'Maryland',
    'ME': 'Maine',
    'MI': 'Michigan',
    'MN': 'Minnesota',
    'MO': 'Missouri',
    'MS': 'Mississippi',
    'MT': 'Montana',
    'NC': 'North Carolina',
    'ND': 'North Dakota',
    'NE': 'Nebraska',
    'NH': 'New Hampshire',
    'NJ': 'New Jersey',
    'NM': 'New Mexico',
    'NV': 'Nevada',
    'NY': 'New York',
    'OH': 'Ohio',
    'OK': 'Oklahoma',
    'OR': 'Oregon',
    'PA': 'Pennsylvania',
    'RI': 'Rhode Island',
    'SC': 'South Carolina',
    'SD': 'South Dakota',
    'TN': 'Tennessee',
    'TX': 'Texas',
    'UT': 'Utah',
    'VA': 'Virginia',
    'VT': 'Vermont',
    'WA': 'Washington',
    'WI': 'Wisconsin',
    'WV': 'West Virginia',
    'WY': 'Wyoming'
}
us_state_to_abbrev = dict(map(reversed, us_state_to_abbrev.items()))
tidy['state_abbrev'] = [us_state_to_abbrev[s] for s in tidy['state']]
tidy = tidy[['year_and_quarter', 'state', 'state_abbrev', 'mortality_per_100k']]
tidy.head()

Unnamed: 0,year_and_quarter,state,state_abbrev,mortality_per_100k
0,2020 Q1,Alabama,AL,21.6
1,2020 Q1,Alaska,AK,23.4
2,2020 Q1,Arizona,AZ,15.7
3,2020 Q1,Arkansas,AR,19.5
4,2020 Q1,California,CA,7.6


Giffords Gun Law Score data (see below) is not available for the District of Columbia. Elimiate DC from dataset.

In [12]:
tidy = tidy[tidy['state_abbrev'] != 'DC']
tidy.head(n=10)

Unnamed: 0,year_and_quarter,state,state_abbrev,mortality_per_100k
0,2020 Q1,Alabama,AL,21.6
1,2020 Q1,Alaska,AK,23.4
2,2020 Q1,Arizona,AZ,15.7
3,2020 Q1,Arkansas,AR,19.5
4,2020 Q1,California,CA,7.6
5,2020 Q1,Colorado,CO,14.8
6,2020 Q1,Connecticut,CT,5.5
7,2020 Q1,Delaware,DE,9.8
9,2020 Q1,Florida,FL,13.5
10,2020 Q1,Georgia,GA,16.1


In [13]:
tidy.to_csv('tidy-489q-934x-firearm-related-injury.csv', index=False)

## Giffords Gun Law Scorercard

Data from [Giffords](https://giffords.org/lawcenter/resources/scorecard/) Annual Gun Law Scorecard is shown below. This dataset is not available (far as I can tell) via an API.

In [14]:
giffords_gun_law_scorecard = {
    'AK': 'F',
    'AL': 'F',
    'AR': 'F',
    'AZ': 'F',
    'CA': 'A',
    'CO': 'B',
    'CT': 'A-',
    #'DC': 'C',       # data not available for Washington DC
    'DE': 'B',
    'FL': 'C-',
    'GA': 'F',
    'HI': 'A-',
    'IA': 'F',
    'ID': 'F',
    'IL': 'A-',
    'IN': 'F',
    'KS': 'F',
    'KY': 'F',
    'LA': 'F',
    'MA': 'A-',
    'MD': 'A-',
    'ME': 'F',
    'MI': 'C+',
    'MN': 'C+',
    'MO': 'F',
    'MS': 'F',
    'MT': 'F',
    'NC': 'C',
    'ND': 'F',
    'NE': 'C',
    'NH': 'F',
    'NJ': 'A',
    'NM': 'C+',
    'NV': 'C+',
    'NY': 'A-',
    'OH': 'F',
    'OK': 'F',
    'OR': 'B+',
    'PA': 'B-',
    'RI': 'B+',
    'SC': 'F',
    'SD': 'F',
    'TN': 'F',
    'TX': 'F',
    'UT': 'F',
    'VA': 'B',
    'VT': 'C-',
    'WA': 'B+',
    'WI': 'D+',
    'WV': 'F',
    'WY': 'F'
}

letter_grade_to_numeric = {
    'A+' : 4,
    'A' : 4,
    'A-' : 3.7,

    'B+' : 3.3,
    'B' : 3.0,
    'B-' : 2.7,

    'C+' : 2.3,
    'C' : 2.0,
    'C-' : 1.7,
    
    'D+' : 1.3,
    'D' : 1,
    'F' : 0

}

Squish Gifford letter scores and corresponding numeric grades into 5 bins (as required for assignment): {A: 4, B:3, C:2, D:1, F:0}

In [15]:
giffords_gun_law_scorecard_compact = {}
for item in giffords_gun_law_scorecard.items():
    giffords_gun_law_scorecard_compact[item[0]] = item[1].replace('+', '').replace('-', '')
giffords_gun_law_scorecard_compact

{'AK': 'F',
 'AL': 'F',
 'AR': 'F',
 'AZ': 'F',
 'CA': 'A',
 'CO': 'B',
 'CT': 'A',
 'DE': 'B',
 'FL': 'C',
 'GA': 'F',
 'HI': 'A',
 'IA': 'F',
 'ID': 'F',
 'IL': 'A',
 'IN': 'F',
 'KS': 'F',
 'KY': 'F',
 'LA': 'F',
 'MA': 'A',
 'MD': 'A',
 'ME': 'F',
 'MI': 'C',
 'MN': 'C',
 'MO': 'F',
 'MS': 'F',
 'MT': 'F',
 'NC': 'C',
 'ND': 'F',
 'NE': 'C',
 'NH': 'F',
 'NJ': 'A',
 'NM': 'C',
 'NV': 'C',
 'NY': 'A',
 'OH': 'F',
 'OK': 'F',
 'OR': 'B',
 'PA': 'B',
 'RI': 'B',
 'SC': 'F',
 'SD': 'F',
 'TN': 'F',
 'TX': 'F',
 'UT': 'F',
 'VA': 'B',
 'VT': 'C',
 'WA': 'B',
 'WI': 'D',
 'WV': 'F',
 'WY': 'F'}

In [16]:
giffords_gun_law_scorecard_ds = pd.DataFrame({'state_abbrev':giffords_gun_law_scorecard_compact.keys(), 'giffords_grade':giffords_gun_law_scorecard_compact.values()})
giffords_gun_law_scorecard_ds.head()

Unnamed: 0,state_abbrev,giffords_grade
0,AK,F
1,AL,F
2,AR,F
3,AZ,F
4,CA,A


In [17]:
giffords_gun_law_scorecard_ds['grade'] = [letter_grade_to_numeric[lg] for lg in giffords_gun_law_scorecard_ds['giffords_grade']]
giffords_gun_law_scorecard_ds = giffords_gun_law_scorecard_ds.sort_values(by=['grade', 'state_abbrev'], ascending=False)
giffords_gun_law_scorecard_ds = giffords_gun_law_scorecard_ds.reset_index(drop=True)
giffords_gun_law_scorecard_ds.head()

Unnamed: 0,state_abbrev,giffords_grade,grade
0,NY,A,4.0
1,NJ,A,4.0
2,MD,A,4.0
3,MA,A,4.0
4,IL,A,4.0


In [18]:
giffords_gun_law_scorecard_ds.to_csv('giffords_gun_law_scorecard.csv', index=False)