## Obtaining other features

### Opioid prescribing map, 2016
https://www.cdc.gov/drugoverdose/maps/rxcounty2016.html

In [1]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re
import pickle
import time
from bs4 import BeautifulSoup

In [2]:
prescription_map_2016_url = "https://www.cdc.gov/drugoverdose/maps/rxcounty2016.html"

def get_rx_map(url):
    response = requests.get(url)
    page = response.text

    soup = BeautifulSoup(page, "html5lib")

    tables = soup.find_all("table")

    tables=soup.find_all("table")
    rows=[row for row in tables[0].find_all('tr')]

    records = []

    for row in rows:
        cells = [cell.text for cell in row.find_all('td')]
        records.append(cells)

    records = records[1:]
    records = [[r[1], r[2], r[3]] for r in records]
    
    return records

In [3]:
rx_2016 = get_rx_map(prescription_map_2016_url)

In [4]:
opioid_rx_df = pd.DataFrame(rx_2016, columns=["state", "INCITS", "opioid_rx_rate_2016"])

In [5]:
prescription_map_2015_url = "https://www.cdc.gov/drugoverdose/maps/rxcounty2015.html"

rx_2015 = get_rx_map(prescription_map_2015_url)

opioid_rx_2015 = pd.DataFrame(rx_2015, columns=["state", "INCITS", "opioid_rx_rate_2015"])

In [6]:
opioid_rx_df = pd.merge(opioid_rx_df, opioid_rx_2015[['INCITS','opioid_rx_rate_2015']], on='INCITS')

In [7]:
opioid_rx_df.shape

(3143, 4)

In [8]:
prescription_map_2014_url = "https://www.cdc.gov/drugoverdose/maps/rxcounty2014.html"

rx_2014 = get_rx_map(prescription_map_2014_url)

opioid_rx_2014 = pd.DataFrame(rx_2014, columns=["state", "INCITS", "opioid_rx_rate_2014"])

opioid_rx_df = pd.merge(opioid_rx_df, opioid_rx_2014[['INCITS','opioid_rx_rate_2014']], on='INCITS')

In [10]:
opioid_rx_df.head()

Unnamed: 0,state,INCITS,opioid_rx_rate_2016,opioid_rx_rate_2015,opioid_rx_rate_2014
0,AK,2013,–,–,–
1,AK,2016,–,–,–
2,AK,2020,66.3,68.2,68.0
3,AK,2050,–,–,–
4,AK,2060,–,–,–


In [13]:
#master_df = master_df[master_df.columns].apply(pd.to_numeric)
year_cols = ['opioid_rx_rate_2016', 'opioid_rx_rate_2015', 'opioid_rx_rate_2014']
opioid_rx_df[year_cols] = opioid_rx_df[year_cols].apply(pd.to_numeric, errors='coerce')

In [18]:
state_avg = opioid_rx_df.groupby('state').mean()

# df["value"] = df.groupby("name")["value"].transform(lambda x: x.fillna(x.mean()))
# https://stackoverflow.com/questions/19966018/pandas-filling-missing-values-by-mean-in-each-group
for year in year_cols:
    opioid_rx_df[year] = opioid_rx_df.groupby('state')[year].transform(lambda x: x.fillna(x.mean()))

In [19]:
opioid_rx_df.head()

Unnamed: 0,state,INCITS,opioid_rx_rate_2016,opioid_rx_rate_2015,opioid_rx_rate_2014
0,AK,2013,56.366667,59.425,63.141667
1,AK,2016,56.366667,59.425,63.141667
2,AK,2020,66.3,68.2,68.0
3,AK,2050,56.366667,59.425,63.141667
4,AK,2060,56.366667,59.425,63.141667


In [20]:
with open('opioid_rx_df.pkl', 'wb') as picklefile:
    pickle.dump(opioid_rx_df, picklefile)

(3143, 5)