In [1]:
from pathlib import Path
import requests
from bs4 import BeautifulSoup
import zipfile

In [12]:
MDCD_ROOT = 'https://www.medicaid.gov'
fmr_homepage = MDCD_ROOT + '/medicaid/financial-management/state-expenditure-reporting-for-medicaid-chip/expenditure-reports-mbescbes/index.html'

In [20]:
resp = requests.get(fmr_homepage)
print(f'homepage status: {resp.status_code}')

homepage status: 200


In [14]:
doc = BeautifulSoup(resp.text, 'html.parser')

In [15]:
fmr_text = 'Financial Management Report'

In [16]:
fmr_links = {}
for link in doc.find_all('a'):
    if link.text.find(fmr_text) != -1:
        short_text = link.text.replace(fmr_text, '').replace('for', '')
        fy_label = str(short_text.strip())
        str_encode = fy_label.encode("ascii", "ignore")
        fy_label = str_encode.decode()
        fy_label = fy_label.replace('FY2', 'FY 2').replace('FY1', 'FY 1')
        fmr_links[fy_label] = {'url': link['href']}
print(f'FMR downloads found: {len(fmr_links)}')

FMR downloads found: 11


In [17]:
data_dir = Path.cwd().parent / 'DATA'
fmr_dir = data_dir / 'fmr'
fmr_dir.mkdir(exist_ok=True)

In [18]:
CHUNK_SZ = 256

In [19]:
for fyr, file_refs in fmr_links.items():
    print(f'Downloading {fyr}...')
    file_url = MDCD_ROOT + file_refs['url']
    print(file_url)
    r = requests.get(file_url)
    fy_label = fyr.replace(' ', '_')
    filename = fmr_dir / (fy_label + '.zip')
    ## download
    with open(filename, 'wb') as fd:
        for chunk in r.iter_content(chunk_size=CHUNK_SZ):
            fd.write(chunk)
        print('-- downloaded')
    ## extract
    with zipfile.ZipFile(filename, 'r') as zip_ref:
        zip_ref.extractall(fmr_dir)
        print('-- extracted')

Downloading FY 2021...
https://www.medicaid.gov/medicaid/financial-management/downloads/financial-management-report-fy2021.zip
-- downloaded
-- extracted
Downloading FY 2020...
https://www.medicaid.gov/medicaid/financial-management/downloads/financial-management-report-fy2020.zip
-- downloaded
-- extracted
Downloading FY 2019...
https://www.medicaid.gov/medicaid/financial-management/downloads/financial-management-report-fy2019.zip
-- downloaded
-- extracted
Downloading FY 2018...
https://www.medicaid.gov/medicaid/financial-management/downloads/financial-management-report-fy2018.zip
-- downloaded
-- extracted
Downloading FY 2017...
https://www.medicaid.gov/medicaid/downloads/financial-management-report-fy2017.zip
-- downloaded
-- extracted
Downloading FY 2016...
https://www.medicaid.gov/medicaid/downloads/financial-management-report-fy2016.zip
-- downloaded
-- extracted
Downloading FY 2015...
https://www.medicaid.gov/medicaid/downloads/financial-management-report-fy2015.zip
-- downloade

In [10]:
excel_files = {}
for xlsx in list(fmr_dir.glob('FY *')):
    fyr = xlsx.name.split()[1]
    if fyr not in excel_files:
        excel_files[fyr] = {}
    if xlsx.name.find('CHIP') != -1:
        excel_files[fyr]['CHIP'] = xlsx
    else:
        excel_files[fyr]['Medicaid'] = xlsx
print(f'Medicaid & CHIP fiscal years collected: {len(excel_files)}')

Medicaid & CHIP fiscal years collected: 0
