In [2]:
from pathlib import Path
import requests
from bs4 import BeautifulSoup

## Hospital Provider Cost Report (CMS)

ref: [API Documentation](https://data.cms.gov/provider-compliance/cost-report/hospital-provider-cost-report/api-docs)

In [3]:
CMS_DATA = 'http://data.cms.gov/'
CMS_DATA_API = CMS_DATA + 'data-api/v1/dataset'
# NOTE: full API requies a UUID followwed by '/data.json' for a downloadable file

In [5]:
HPCR_DOCS = CMS_DATA + 'provider-compliance/cost-report/hospital-provider-cost-report/api-docs'

In [31]:

meta_resp = requests.get(enrl_meta_api)
meta_resp.status_code

200

In [32]:
meta_keys = ['title', 'description', 'modified']
meta_data = {k:v for k,v in resp.json().items() if k in meta_keys}
meta_data['downloads'] = []
for dist in meta_resp.json()['distribution']:
    meta_data['downloads'].append(dist['downloadURL'])
meta_data

{'title': 'State Medicaid and CHIP Applications, Eligibility Determinations, and Enrollment Data',
 'description': 'All states (including the District of Columbia) are required to provide data to The Centers for Medicare &amp; Medicaid Services (CMS) on a range of indicators related to key application, eligibility, and enrollment processes within the state Medicaid and Children’s Health Insurance Programs (CHIP). These data reflect enrollment activity for all populations receiving comprehensive Medicaid and CHIP benefits in all states, as well as state program performance.',
 'modified': '2022-11-29T12:30:00+00:00',
 'downloads': ['https://data.medicaid.gov/sites/default/files/uploaded_resources/StateMedicaidandCHIPApplicationsEligibilityDeterminationsandEnrollmentDataAugust2022.csv']}

To retrieve data, append the specific distribution index to the base url. In this case, the dataset distirbution has only one entry (index = 0).

In [33]:
ds_indx = str(len(meta_data['downloads']) - 1)
print(f'Selected index: {ds_indx}')

Selected index: 0


In [38]:
# query to return records - add distribution index
enrl_data_api = '/'.join([MDCD_DATA, data_endpoint, ENRL_DATA_ID, ds_indx])

In [39]:
payload = {
    'limit': 10,
    'offset': 0,
}

In [43]:
data_resp = requests.get(enrl_data_api, params=payload)
data_resp.status_code

200

In [47]:
data_keys = ['state_name', 'report_date', 'preliminary_updated', 'final_report', 'total_medicaid_and_chip_enrollment']

In [66]:
for n, rslt in enumerate(data_resp.json()['results'], start=1):
    data = {k:v for k,v in rslt.items() if k in data_keys}
    print(f'{n}: {data}')

1: {'state_name': 'Alabama', 'report_date': '2013-09-01', 'preliminary_updated': 'U', 'final_report': 'Y', 'total_medicaid_and_chip_enrollment': '799176'}
2: {'state_name': 'Alabama', 'report_date': '2017-06-01', 'preliminary_updated': 'P', 'final_report': 'N', 'total_medicaid_and_chip_enrollment': '888314'}
3: {'state_name': 'Alabama', 'report_date': '2017-06-01', 'preliminary_updated': 'U', 'final_report': 'Y', 'total_medicaid_and_chip_enrollment': '888314'}
4: {'state_name': 'Alabama', 'report_date': '2017-07-01', 'preliminary_updated': 'P', 'final_report': 'N', 'total_medicaid_and_chip_enrollment': '893193'}
5: {'state_name': 'Alabama', 'report_date': '2017-07-01', 'preliminary_updated': 'U', 'final_report': 'Y', 'total_medicaid_and_chip_enrollment': '893193'}
6: {'state_name': 'Alabama', 'report_date': '2017-08-01', 'preliminary_updated': 'P', 'final_report': 'N', 'total_medicaid_and_chip_enrollment': '893261'}
7: {'state_name': 'Alabama', 'report_date': '2017-08-01', 'preliminary

In [54]:
data_dir = Path.cwd().parent / 'DATA'
enrl_dir = data_dir / 'enrl'
enrl_dir.mkdir(exist_ok=True)

In [53]:
csv_endpoint = meta_data['downloads'][int(ds_indx)]
csv_resp = requests.get(csv_endpoint)
csv_resp.status_code

200

In [60]:
csv_name = csv_endpoint.split('/')[-1]
csv_name = csv_name.replace('StateMedicaidandCHIPApplicationsEligibilityDeterminationsand', '')

In [61]:
CHUNK_SZ = 256

In [64]:
filename = enrl_dir / csv_name
## download
with open(filename, 'wb') as fd:
    for chunk in csv_resp.iter_content(chunk_size=CHUNK_SZ):
        fd.write(chunk)
    print(f'downloaded to {csv_name}')
    fl_sz = round(filename.stat().st_size / 1024)
    print(f'-- filesize: {fl_sz:,} kB')

downloaded to EnrollmentDataAugust2022.csv
-- filesize: 1,256 kB
