## NRA Web Scraping
This is a a quick scraping script which scrapes data from the NRA website to create a dataset containing gun laws for all 50 states and the district of columbia. It works well and there are no missing values as of 10/27/2016. The section of data cleaning may need to be tweaked down the line as data on their site is modified, but as of now it works perfectly. The script produces the 3 csv files listed below:<br>

- 'NRA_Gun_Laws_by_State_CHAR.csv' - This is a table with the gun laws by state and their character description.
- 'NRA_Gun_Laws_by_State_NUM.csv' - This is a table with the gun laws by state with a numeric value in place of the character value.
- 'NRA_Gun_Laws_by_State_LEGEND.csv' - This table describes what numeric values correspond to the character values
<br> <br>
Both the Numeric and Character version have the states clustered on strictness of gun laws

In [17]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

In [18]:
#Create List of States
states = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', \
          'district-of-columbia', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', \
          'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', \
          'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New-Hampshire', 'New-Jersey', 'New-Mexico', \
          'New-York', 'North-Carolina', 'North-Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode-Island', \
          'South-Carolina', 'South-Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', \
          'West-Virginia', 'Wisconsin', 'Wyoming']

states_dict = {
        'AK': 'Alaska',
        'AL': 'Alabama',
        'AR': 'Arkansas',
        'AZ': 'Arizona',
        'CA': 'California',
        'CO': 'Colorado',
        'CT': 'Connecticut',
        'DC': 'District of Columbia',
        'DE': 'Delaware',
        'FL': 'Florida',
        'GA': 'Georgia',
        'HI': 'Hawaii',
        'IA': 'Iowa',
        'ID': 'Idaho',
        'IL': 'Illinois',
        'IN': 'Indiana',
        'KS': 'Kansas',
        'KY': 'Kentucky',
        'LA': 'Louisiana',
        'MA': 'Massachusetts',
        'MD': 'Maryland',
        'ME': 'Maine',
        'MI': 'Michigan',
        'MN': 'Minnesota',
        'MO': 'Missouri',
        'MS': 'Mississippi',
        'MT': 'Montana',
        'NC': 'North Carolina',
        'ND': 'North Dakota',
        'NE': 'Nebraska',
        'NH': 'New Hampshire',
        'NJ': 'New Jersey',
        'NM': 'New Mexico',
        'NV': 'Nevada',
        'NY': 'New York',
        'OH': 'Ohio',
        'OK': 'Oklahoma',
        'OR': 'Oregon',
        'PA': 'Pennsylvania',
        'RI': 'Rhode Island',
        'SC': 'South Carolina',
        'SD': 'South Dakota',
        'TN': 'Tennessee',
        'TX': 'Texas',
        'UT': 'Utah',
        'VA': 'Virginia',
        'VT': 'Vermont',
        'WA': 'Washington',
        'WI': 'Wisconsin',
        'WV': 'West-Virginia',
        'WY': 'Wyoming'
}

#Lowercase all values in list
states = [x.lower() for x in states]

#Innitialize dict
gun_laws = dict.fromkeys(states)

In [19]:
#Set Base URL
url = "https://www.nraila.org/gun-laws/state-gun-laws/{}/"

In [20]:
#Create Sub-dict for each state
for x in gun_laws.keys(): gun_laws[x]=dict()

In [21]:
for s in range(len(states)):
    #Pull Site
    result = requests.get(url.format(states[s]))

    #Parse
    c = result.content
    soup = BeautifulSoup(result.text,"lxml")

    #first table names
    rifle = [x.get_text()+" (Rifle-Shotgun):" for x in soup.find_all("tbody")[0].find_all("td",{"class":"title"})]
    handgun = [x.get_text()+": (Handgun):" for x in soup.find_all("tbody")[0].find_all("td",{"class":"title"})]
    riflehandgun = []
    for i in range(len(rifle)):
        riflehandgun.append(rifle[i])
        riflehandgun.append(handgun[i])
    #make the first column
    table_r1 = riflehandgun+[x.get_text() for x in soup.find_all("tbody")[1].find_all("td",{"class":"title"})]

    #Get the first table values
    t1 = soup.find_all("tbody")[0].find_all("td",{"class":None})
    #Get the Text
    t1 = [x.get_text() for x in t1]
    #Get the first table values
    t2 = soup.find_all("tbody")[1].find_all("td",{"class":None})
    #Get the Text
    t2 = [x.get_text() for x in t2]
    #Full List of Answers
    t3 = t1+t2
    #Put it into the State Dict
    for i in range(len(t3)):
        gun_laws[states[s]][table_r1[i]] = t3[i]

In [22]:
#Create DataFrame
gun_laws_df = pd.DataFrame(gun_laws).T
#gun_laws_df.to_csv("gun_laws_by_state_scrape_dirty.csv")

In [23]:
#Summary of DataFrame
gun_laws_df.describe().T.sort_values(by="count", ascending=False)

Unnamed: 0,count,unique,top,freq
Castle Doctrine,51,3,Enacted,33
Permit to Purchase (Rifle-Shotgun):,51,8,No,42
Permit to Purchase: (Handgun):,51,7,No,35
Right to Carry in Restaurants,50,3,Legal,26
Right to Carry Confidentiality,50,2,Provisions Enacted,39
Right To Carry Reciprocity and Recognition,50,4,Outright Recognition,17
Registration of Firearms: (Handgun):,50,4,No,41
Registration of Firearms (Rifle-Shotgun):,50,4,No,45
Right to Keep & Bear Arms State Constitutional Provisions,50,3,With Provisions,42
Permit to Carry: (Handgun):,50,6,Yes,35


In [24]:
#Clean Up the Data

#Fix Alaska
gun_laws_df.iloc[1]["Permit to Carry: (Handgun):"] = "No"
gun_laws_df.iloc[1]["Permit to Carry (Rifle-Shotgun):"] = "No"

#Fix Connecticut
gun_laws_df.iloc[6]["No-Net Loss"] = "Yes"

#Fix Mass
gun_laws_df.iloc[21]["Registration of Firearms: (Handgun):"] = "Yes"
gun_laws_df.iloc[21]["Registration of Firearms (Rifle-Shotgun):"] = "Yes"
gun_laws_df.iloc[21]["Licensing of Owners: (Handgun):"] = "Yes"
gun_laws_df.iloc[21]["Licensing of Owners (Rifle-Shotgun):"] = "Yes"

#Fix California
gun_laws_df.iloc[4]["Licensing of Owners: (Handgun):"] = "No"
gun_laws_df.iloc[4]["Licensing of Owners (Rifle-Shotgun):"] = "No"

#Fix Kansas
gun_laws_df.iloc[16]["Licensing of Owners: (Handgun):"] = "No"
gun_laws_df.iloc[16]["Licensing of Owners (Rifle-Shotgun):"] = "No"

#Fix Missouri
gun_laws_df.iloc[25]["Right to Keep & Bear Arms State Constitutional Provisions"] = "With Provisions"
gun_laws_df.iloc[25]["Right to Carry Confidentiality"] = "Provisions Enacted"
gun_laws_df["Right to Carry in Restaurants"] = "Partial Ban"

#Fix Alabama
gun_laws_df.iloc[0]["Right To Carry Laws"] = "Shall Issue"
gun_laws_df.iloc[0]["Right To Carry Reciprocity and Recognition"] = "Outright Recognition"

#Fix Arizona
gun_laws_df.iloc[2]["Right To Carry Laws"] = "No Permit Required"

#Remove Unnecessary Columns
to_drop = ["Registration of Firearms*: (Handgun):", "Registration of Firearms* (Rifle-Shotgun):",\
"Licensing of Owner: (Handgun):", "Licensing of Owner (Rifle-Shotgun):",\
"Right To Keep & Bear Arms State Constitutional Provisions", "Right to Carry Laws", "Right To Carry Confidentiality",\
"Right To Carry in Restaurants", "Right to Carry Reciprocity and Recognition",\
'Permit to possess registered "Assault Weapons": (Handgun):', 'Permit to possess registered \
"Assault Weapons" (Rifle-Shotgun):']
gun_laws_df = gun_laws_df.drop(to_drop,axis=1)

#create state column
state_dict_exch = {val.lower().replace(' ','-'):key.lower() for (key, val) in states_dict.items()}
gun_laws_df['State'] = [state_dict_exch[x] for x in gun_laws_df.index]

#Correcting Inconsistent Values
toreplace = ['\*',
'FOID Required',
'FID Required',
'ID Card Required',
'Yes \(in incorporated areas and concealed anywhere\)',
'Not Available',
'']

replacement = ['',
'Yes',
'Yes',
'Yes',
'Yes',
'Yes',
'No']

gun_laws_df = gun_laws_df.replace(toreplace, replacement, regex = True)

#Standardize column names
gun_laws_df.columns = [re.sub('&','and',re.sub(':','',x.lower()) )for x in list(gun_laws_df.columns)]

#Fixing CT no-net loss missing values True as of 2009
gun_laws_df['no-net loss'][6] = 'Enacted'

In [25]:
gun_laws_df.describe().T

Unnamed: 0,count,unique,top,freq
castle doctrine,51,3,Enacted,33
licensing of owners (rifle-shotgun),51,2,No,47
licensing of owners (handgun),51,2,No,45
no-net loss,51,2,No Legislation,37
permit to carry (rifle-shotgun),51,2,No,48
permit to carry (handgun),51,2,Yes,42
permit to purchase (rifle-shotgun),51,2,No,45
permit to purchase (handgun),51,2,No,38
registration of firearms (rifle-shotgun),51,2,No,46
registration of firearms (handgun),51,2,No,44


In [26]:
#Creating a new df with numeric values instead of character values
gun_laws_num = gun_laws_df.replace(['No','Yes','Enacted', 'No Law', 'No Legislation', 'None', 
'True Reciprocity', 'Outright Recognition','Conditional Recognition', 'No Permit Required', 
'Shall Issue', 'Discretionary/Reasonable Issue', 'Rights Restricted-Very Limited Issue',
'No Provisions', 'Provisions Enacted', 'Partial Ban', 'With Provisions',
'Partial (duty to retreat from co-habitants)'], 
[1, 4, 1, 4, 4, 4, 1, 2, 3, 1, 2, 3, 4, 1, 4, 1, 2.5, 2.5])

#Creates a Legend for What Numeric Values Mean
legend = pd.DataFrame({'Char':['No','Yes','Enacted', 'No Law', 'No Legislation', 'None', 
'True Reciprocity', 'Outright Recognition','Conditional Recognition', 'No Permit Required', 
'Shall Issue', 'Discretionary/Reasonable Issue', 'Rights Restricted-Very Limited Issue',
'No Provisions', 'Provisions Enacted', 'Partial Ban', 'With Provisions',
'Partial (duty to retreat from co-habitants)'],
'Num':[1, 4, 1, 4, 4, 4, 1, 2, 3, 1, 2, 3, 4, 1, 4, 1, 2.5, 2.5]})

In [11]:
#Save Clean
# gun_laws_df.to_csv("gun_laws_by_state_scrape_clean.csv")
# gun_laws_df.describe().T.sort_values(by="count", ascending=False)

In [29]:
gun_laws_num

Unnamed: 0,castle doctrine,licensing of owners (rifle-shotgun),licensing of owners (handgun),no-net loss,permit to carry (rifle-shotgun),permit to carry (handgun),permit to purchase (rifle-shotgun),permit to purchase (handgun),registration of firearms (rifle-shotgun),registration of firearms (handgun),right to carry laws,right to carry reciprocity and recognition,right to carry confidentiality,right to carry in restaurants,right to keep and bear arms state constitutional provisions,state,sum,rank,category
massachusetts,4.0,4,4,4,4,4,4,4,4,4,4,4,1,1,2.5,ma,40,0,Strict
new-jersey,2.5,4,4,4,4,4,4,4,1,1,4,4,4,1,1.0,nj,34,1,Strict
connecticut,1.0,4,4,1,1,4,4,4,4,1,3,4,4,1,2.5,ct,33,2,Strict
california,4.0,1,1,4,1,4,4,4,4,4,4,4,1,1,1.0,ca,31,3,Strict
hawaii,4.0,1,1,4,1,4,4,4,4,4,4,4,4,1,2.5,hi,31,4,Strict
illinois,4.0,4,4,1,1,4,4,4,1,1,2,4,1,1,2.5,il,29,5,Strict
new-york,4.0,1,4,4,1,4,1,4,1,4,4,4,4,1,1.0,ny,28,6,Strict
district-of-columbia,4.0,1,1,4,4,4,1,1,4,4,4,4,1,1,1.0,dc,28,7,Strict
maryland,1.0,1,4,1,1,4,1,4,1,4,4,4,1,1,1.0,md,28,8,Strict
michigan,1.0,1,1,4,1,4,1,4,1,4,2,2,4,1,2.5,mi,21,9,Strict


In [28]:
#Add Rank and Sum Columns
gun_laws_num['sum'] = gun_laws_num[[gun_laws_num.columns[i] for i in [1,2,4,5,6,7,8,9,10,-5]]].sum(axis = 1)
gun_laws_num = gun_laws_num.sort_values(by = 'sum', ascending = False)
gun_laws_num['rank'] = list(range(51))

#Break Into Categories and Add Column
gun_laws_num['category'] = ['Strict']*17 + ['Average']*17 + ['Lenient']*17

#Create df with character values and categories
final_gun_laws = pd.concat([gun_laws_df.sort_values(by = 'state'), 
gun_laws_num.iloc[:,-4:].sort_values(by = 'state').iloc[:,-3:]], axis =1)

In [13]:
#Test Print Statements
#gun_laws_num['category']
#gun_laws_num.iloc[:,-4:].sort_values(by = 'rank').iloc[:,-3:]

In [16]:
#Output to CSV
final_gun_laws.to_csv('data/NRA_Gun_Laws_by_State_CHAR.csv', index = False)
gun_laws_num.to_csv('data/NRA_Gun_Laws_by_State_NUM.csv', index = False)
legend.to_csv('data/NRA_Gun_Laws_by_State_LEGEND.csv', index = False)