# imports

In [1]:
import requests
import pandas as pd

# acquire data

In [2]:
api_url = "https://data.cdc.gov/api/views.json"
response = requests.get(api_url)
data = response.json()

In [3]:
data[0:4]

[{'id': 'vqex-u5kb',
  'name': 'Decay rate of gas-phase peracetic acid in a polyvinyl fluoride sample bag',
  'assetType': 'file',
  'averageRating': 0,
  'blobFilename': 'MS-25-076 dataset.zip',
  'blobFileSize': 299364,
  'blobId': 'edcf7db1-0f27-4c79-a5b7-b43761fbac30',
  'blobMimeType': 'application/x-zip-compressed',
  'category': 'National Institute for Occupational Safety and Health',
  'createdAt': 1754487333,
  'description': 'In this study the temperature dependent decay rate of gas-phase peracetic acid (PAA) concentrations in polyvinyl fluoride (PVF) sample bags was measured. Headspace from a PAA solution was used to dynamically generate atmospheres of gas-phase PAA which were transferred to PVF bags. The concentration of gas-phase PAA in PVF bags, in a temperature-controlled environment, was measured versus time using both selected ion flow-tube mass spectrometry (SIFT-MS) and impinger based colorimetric measurements. A review of PAA decomposition reactions, kinetics, and r

In [4]:
df = pd.DataFrame(data)

# clean data

In [5]:
backup_df = df.copy()

In [6]:
df.dtypes

id                           object
name                         object
assetType                    object
averageRating                 int64
blobFilename                 object
blobFileSize                float64
blobId                       object
blobMimeType                 object
category                     object
createdAt                     int64
description                  object
diciBackend                    bool
displayType                  object
downloadCount                 int64
hideFromCatalog                bool
hideFromDataJson               bool
locked                         bool
newBackend                     bool
numberOfComments              int64
oid                           int64
provenance                   object
publicationAppendEnabled       bool
publicationDate               int64
publicationGroup              int64
publicationStage             object
rowsUpdatedAt                 int64
tableId                       int64
totalTimesRated             

# analyze data

In [11]:
asset_type_count = df['assetType'].value_counts()
asset_type_count.loc['Total'] = asset_type_count.sum()

In [9]:
just_names = df[['id', 'name']]

In [11]:
just_names = just_names.sort_values('id')

In [12]:
just_names

Unnamed: 0,id,name
44,235m-gsry,Pulmonary evaluation of whole-body inhalation ...
1170,2389-pvg9,Table of Smokefree Indoor Air for Private Work...
232,23ai-eekx,Sex at Birth Related Disparities in Cigarette ...
1296,23gt-ssfe,NNDSS - Table II. Invasive Pneumococcal to Leg...
851,247v-f7n9,"NNDSS - TABLE 1Z. Pertussis to Poliomyelitis, ..."
...,...,...
1092,yt7u-eiyg,NCHS - Birth Rates for Females by Age Group: U...
46,yu68-juzt,Anthropometry of Law Enforcement Officers
1110,yvdd-g9b2,Table of States with Laws Prohibiting Sales of...
376,yviw-z6j5,Weekly United States COVID-19 Cases and Deaths...


# export data

In [13]:
df.to_csv('exported_data/cdc-data.csv')
df.to_json('exported_data/cdc-data.json')
just_names.to_csv('exported_data/cdc-data-names.csv', index=False)
just_names.to_json('exported_data/cdc-data-names.json', index=False)
asset_type_count.to_csv('exported_data/asset_type_count.csv', index=False)