In [3]:
# Define the URL of the JSON-LD file
url = "https://api.census.gov/data.json"

In [4]:
from utils import fetch_jsonld_data 
import pyld
import pandas as pd
jsonld_data = fetch_jsonld_data(url)

In [5]:

# Parse the JSON-LD data into a Pandas DataFrame
context = "https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld"
options = {
    'documentLoader': pyld.jsonld.requests_document_loader(),
    'remote_contexts': {}
}
# Flatten the JSON-LD data using the pyld library# create a JSON-LD frame to extract only objects with dcat:Dataset @type
frame = {
    "@context": "https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld",
    "@type": "Dataset"
}

# extract only the JSON-LD objects with dcat:Dataset @type using the frame
parsed_data = pyld.jsonld.compact(jsonld_data, context, options=options)
filtered_jsonld = pyld.jsonld.frame(parsed_data, frame)
flattened_data = pyld.jsonld.flatten(filtered_jsonld)
# df = load_jsonld_data(parsed_data)

In [6]:
parsed_data

{'@context': 'https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld',
 '@id': 'http://api.census.gov/data/1994/cps/basic/jun.json',
 '@type': 'Catalog',
 'conformsTo': 'https://project-open-data.cio.gov/v1.1/schema',
 'describedBy': 'https://project-open-data.cio.gov/v1.1/schema/catalog.json',
 'dataset': [{'@type': 'Dataset',
   'description': 'To provide estimates of employment, unemployment, and other characteristics of the general labor force, of the population as a whole, and of various subgroups of the population. Monthly labor force data for the country are used by the Bureau of Labor Statistics (BLS) to determine the distribution of funds under the Job Training Partnership Act. These data are collected through combined computer-assisted personal interviewing (CAPI) and computer-assisted telephone interviewing (CATI). In addition to the labor force data, the CPS basic funding provides annual data on work experience, income, and migration from the March Annual Demographic 

In [7]:
dataset = pd.DataFrame.from_records(filtered_jsonld["@graph"]) 

In [8]:
dataset = dataset[dataset["accessLevel"] == "public"]

In [9]:
dataset = dataset.sort_values("temporal", ascending=False).reset_index(drop=True)

In [10]:
dataset= dataset[ ['title'] + [ col for col in dataset.columns if col != 'title' ] ]

In [11]:
dataset.columns

Index(['title', '@type', 'description', 'identifier', 'license', 'modified',
       'publisher', 'c_dataset', 'c_documentationLink', 'c_examplesLink',
       'c_geographyLink', 'c_groupsLink', 'c_isAvailable', 'c_isCube',
       'c_isMicrodata', 'c_sorts_url', 'c_tagsLink', 'c_variablesLink',
       'c_vintage', 'contactPoint', 'distribution', 'keyword', 'references',
       'accessLevel', 'bureauCode', 'programCode', 'spatial', 'temporal',
       'c_isAggregate', 'c_isTimeseries'],
      dtype='object')

In [12]:

# convert the dataframe to an interactive HTML table
html_table = dataset.to_html(render_links=True, escape=True)


In [13]:
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>title</th>\n      <th>@type</th>\n      <th>description</th>\n      <th>identifier</th>\n      <th>license</th>\n      <th>modified</th>\n      <th>publisher</th>\n      <th>c_dataset</th>\n      <th>c_documentationLink</th>\n      <th>c_examplesLink</th>\n      <th>c_geographyLink</th>\n      <th>c_groupsLink</th>\n      <th>c_isAvailable</th>\n      <th>c_isCube</th>\n      <th>c_isMicrodata</th>\n      <th>c_sorts_url</th>\n      <th>c_tagsLink</th>\n      <th>c_variablesLink</th>\n      <th>c_vintage</th>\n      <th>contactPoint</th>\n      <th>distribution</th>\n      <th>keyword</th>\n      <th>references</th>\n      <th>accessLevel</th>\n      <th>bureauCode</th>\n      <th>programCode</th>\n      <th>spatial</th>\n      <th>temporal</th>\n      <th>c_isAggregate</th>\n      <th>c_isTimeseries</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      

In [14]:

# write the HTML code to a file
with open('table.html', 'w') as f:
    f.write(html_table)