In [29]:
from requests import get
from io import BytesIO
import pandas as pd
import pdfplumber
from tabula import read_pdf# ===================== CALIFORNIA =======================
def get_california_2015():
    print('2015 - Downloading California Vaccination Data')
    response = get('https://eziz.org/assets/docs/shotsforschool/2015-16CAKindergartenData.xls')
    content = response.content
    to_bytes = BytesIO(content)
    data = pd.read_excel(to_bytes, skiprows=4).dropna()
    data.columns = ['school_code', 'county', 'public/private', 
                'district', 'city', 'school_name', 'enrollment',
                'vaccinated_n', 'vaccinated_perc', 'temp_unvax_n', 
                'temp_unvax_perc', 'perm_med_exemp_n', 'perm_med_exemp_perc',
                'pbe_n', 'pbe_perc', 'hcp_counceled_pbe_n',
                'hcp_counceled_pbe_perc', 'religious_exemp_n',
                'religious_exemp_perc', 'over_due_n', 'over_due_perc',
                'five_dose_DTP_n', 'five_dose_DTP_perc',
                'polio_n', 'polio_perc', 'mmr_n', 'mmr_perc', 
                'hepb_n', 'hepb_perc', 'varicell_n', 'varicella_perc', 'reported']
    data['year'] = 2015
    data['state'] = 'CA'
    return data
def get_california_2018():
    print('2018 - Downloading California Vaccination Data')
    response = get('https://eziz.org/assets/docs/shotsforschool/2018-19CAKindergartenDataLetter.xlsx')
    content = response.content
    to_bytes = BytesIO(content)
    data = pd.read_excel(to_bytes, skiprows=3, sheet_name='Enrollment 20 or More').iloc[:,:28].dropna()
    data.columns = ['school_code', 'county', 'public/private', 'district', 'city', 
                    'school_name', 'enrollment_n', 'vaccinated_n',
                    'vaccinated_perc', 'temp_unvax_n', 'temp_unvax_perc',
                    'perm_med_exempt_n', 'perm_med_exempt_perc', 'other_n',
                    'other_perc','over_due_n', 'over_due_perc', 'five_dose_DTP_n', 'five_dose_DTP_perc',
                    'polio_n', 'polio_perc', 'mmr_n', 'mmr_perc', 
                    'hepb_n', 'hepb_perc', 'varicell_n', 'varicella_perc', 'reported']
    data['year'] = 2018
    data['state'] = 'CA'
    return data# ===================== ILLINOIS =======================
def get_illinois_2018():
    print('2018 - Downloading Illinois Vaccination Data')
    response = get('https://www.isbe.net/_layouts/Download.aspx?SourceUrl=/Documents/Immunization_18-19.xlsx')
    content = response.content
    to_bytes = BytesIO(content)
    data = pd.read_excel(to_bytes, skiprows=2, sheet_name='Measles')
    data['year'] = 2018
    data['state'] = 'IL'
    return data
def get_illinois_2015():
    print('2015 - Downloading Illinois Vaccination Data')
    response = get('https://www.isbe.net/_layouts/Download.aspx?SourceUrl=/Documents/Immunization_15-16.xlsx')
    content = response.content
    to_bytes = BytesIO(content)
    data = pd.read_excel(to_bytes, skiprows=2, sheet_name='Measles')
    data['year'] = 2015
    data['state'] = 'IL'
    return data
# ===================== TEXAS =======================
def get_texas_2015():
    print('2015 - Downloading Texas Vaccination Data')
    response = get('https://www.dshs.texas.gov/WorkArea/linkit.aspx?LinkIdentifier=id&ItemID=12884905305')
    content = response.content
    to_bytes = BytesIO(content)
    data = pd.read_excel(to_bytes)
    data['year'] = 2015
    data['state'] = 'TX'
    return data
def get_texas_2018():
    print('2018 - Downloading Texas Vaccination Data')
    response = get('https://www.dshs.texas.gov/immunize/coverage/docs/2018-2019-School-Vaccination-Coverage-Levels---Kindergarten-(XLS).pdf')
    content = response.content
    to_bytes = BytesIO(content)
    data = pd.read_excel(to_bytes, skiprows=2)
    data['year'] = 2018
    data['state'] = 'TX'
    return data
# ===================== MICHIGAN =======================
def get_michigan_2019():
    print('2019 - Downloading Michigan Vaccination Data')
    response = get('https://www.michigan.gov/documents/mdhhs/Kind_2019_For_Website_690426_7.xlsx')
    content = response.content
    to_bytes = BytesIO(content)
    data = pd.read_excel(to_bytes, skiprows=7)
    data.columns = ['school', 'district', 'type', 'county', 
                    'total_students', 'total_vaccinated',
                    'vaccinated_perc', 'in_progress',
                    'unvacinated', 'total_waivers', 
                    'waivers_perc','medical_waivers_n', 
                     'medical_waivers_perc', 'religion_waivers_n',
                     'religion_wavers_perc', 
                    'phil_waivers_n', 'phil_waivers_perc']
    data['year'] = 2019
    data['state'] = 'MI'
    return data
# ===================== FLORIDA =======================
def get_florida_2019():
    print('2019 - Downloading Florida Vaccination Data')
    response = get('http://www.floridahealth.gov/programs-and-services/immunization/resources/surveys/_documents/k-7-status2019-20.pdf')
    to_bytes = BytesIO(response.content)    
    pdf = pdfplumber.open(to_bytes)    
    data = pd.DataFrame()    
    
    page_1 = pdf.pages[4]
    page_1 = page_1.crop((0,140,612,792))
    frame = pd.DataFrame(page_1.extract_table(table_settings={
    "vertical_strategy": "lines", 
    "horizontal_strategy": "lines",}))
    data = data.append(frame)  
    
    page_2 = pdf.pages[5]
    page_2 = page_2.crop((0,140,612,792))
    frame = pd.DataFrame(page_2.extract_table(table_settings={
    "vertical_strategy": "lines", 
    "horizontal_strategy": "lines",}))
    data = data.append(frame) 

    data['year'] = 2019
    data['state'] = 'FL'
    data.columns = ['county', 'num_schools', 'num_students', 'full_immunization_count', 
           'full_immunization_perc', 'temp_medical_exemp_count', 
           'temp_medical_exemp_perc', 'perm_medical_exemp_count', 
           'perm_medical_exemp_perc', 'religious_exemp_count', 
           'religious_exemp_perc',  '30_day_exemp_count', '30_day_exemp_perc', 'year', 'state']
    return data
def get_florida_2015():
    print('2015 - Downloading Florida Vaccination Data')
    tables = read_pdf('http://www.floridahealth.gov/programs-and-services/immunization/resources/surveys/_documents/k-7-status2015-16.pdf', 
                      pages=[4,5], area=(152.17, 34.76, 152.17 + 574.21, 34.76 + 543.98))
    table1 = tables[0]
    table2 = tables[1]
    table1 = table1.drop(['Unnamed: 1', 'Unnamed: 2'], axis = 1)
    table2 = table2.drop(['Unnamed: 1', 'Unnamed: 2'], axis = 1)
    columns = ['county', 'num_schools', 'num_students', 'full_immunization_count', 
                       'full_immunization_perc', 'temp_medical_exemp_count', 
                       'temp_medical_exemp_perc', 'perm_medical_exemp_count', 
                       'perm_medical_exemp_perc', 'religious_exemp_count', 
                       'religious_exemp_perc',  '30_day_exemp_count', '30_day_exemp_perc' ]
    table1.columns = columns
    table2.columns = columns
    data = pd.concat([table1, table2])
    data['year'] = 2015
    data['state'] = 'FL'
    return data

def get_data():    return {'2015': {'CA': get_california_2015(),
                     'IL': get_illinois_2015(),
                     'FL': get_florida_2015(),
                     'TX': get_texas_2015()},            '2018': {'CA': get_california_2018(),
                     'IL': get_illinois_2018(),
                     'TX': get_texas_2018()},            '2019': {'MI': get_michigan_2019(),
                     'FL': get_florida_2019()}
           }

In [59]:
fl_2015 = get_florida_2015()

2015 - Downloading Florida Vaccination Data


In [60]:
fl_2015 = fl_2015.reset_index()

In [61]:
fl_2015 = fl_2015.drop([67, 68, 69, 70,71,72, 73], axis=0)

In [62]:
fl_2015 = fl_2015.drop(['index', 'year', 'state'], axis=1)

In [29]:
data = get_data()

2015 - Downloading California Vaccination Data
2015 - Downloading Illinois Vaccination Data
2015 - Downloading Florida Vaccination Data
2015 - Downloading Texas Vaccination Data
2018 - Downloading California Vaccination Data
2018 - Downloading Illinois Vaccination Data
2018 - Downloading Texas Vaccination Data
2019 - Downloading Michigan Vaccination Data
2019 - Downloading Florida Vaccination Data


In [30]:
!pwd

/Users/johnsimmons/lecture_material_folder/projects/pres_election/notebooks/jc


In [31]:
data['2019']['FL'].to_csv('../../data/florida_2019.csv', index=False)

In [64]:
fl_2015.to_csv('../../data/florida_2015.csv', index=False)

In [63]:
fl_2015

Unnamed: 0,county,num_schools,num_students,full_immunization_count,full_immunization_perc,temp_medical_exemp_count,temp_medical_exemp_perc,perm_medical_exemp_count,perm_medical_exemp_perc,religious_exemp_count,religious_exemp_perc,30_day_exemp_count,30_day_exemp_perc
0,Alachua,52,2667,2502,93.8%,90,3.4%,6.0,0.2%,45,1.7%,23,0.9%
1,Baker,2,358,353,98.6%,2,0.6%,0.0,0.0%,3,0.8%,0,"0.0""/o"
2,Bay,33,2305,2221,96.4%,50,2.2%,1.0,0.0%,30,1.3%,2,0.1%
3,Bradford,10,324,311,96.0%,13,4.0%,0.0,0.0%,0,0.0%,0,"0.0""/o"
4,Brevard,110,5850,5597,95.7%,84,1.4%,12.0,0.2%,144,2.5%,13,0.2%
...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,Union,2,198,194,98.0%,1,0.5%,1.0,0.5%,2,1.0%,0,0.0%
63,Vol usia,96,5066,4776,94.3%,94,1.9%,12.0,0.2%,157,3.1%,22,0.4%
64,Wakulla,6,366,359,98.1%,3,0.8%,2.0,0.5%,2,0.5%,0,0.0%
65,Walton,13,734,694,94.6%,8,1.1%,0.0,0.0%,24,3.3%,8,1.1%
