In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from facets_overview.feature_statistics_generator import FeatureStatisticsGenerator
from IPython.core.display import display, HTML
import base64


COLUMNS = [
    "age", "workclass", "fnlwgt", "education", "education_num",
    "marital_status", "occupation", "relationship", "race", "gender",
    "capital_gain", "capital_loss", "hours_per_week", "native_country",
    "income_bracket"]

# Download the CSV data
census_csv = tf.keras.utils.get_file(
    "adult.data",
    "https://download.mlcc.google.com/mledu-datasets/adult_census_train.csv"
)
# And load it
census_df = pd.read_csv(
    census_csv, 
    names=COLUMNS, 
    sep=r"\s*,\s*",
    engine="python",
    na_values="?"
)

# Create a generic feature statistics generator
fsg = FeatureStatisticsGenerator()
# Define some metadata for the facet
dataframes = [{"table": census_df, "name": "census_data"}]
# Create a feature statistics protocol buffer based on the metadata defined
census_proto = fsg.ProtoFromDataFrames(dataframes)
# Encode the protocol buffer
protostr = base64.b64encode(census_proto.SerializeToString()).decode("utf-8")

# Write the HTML code needed to render the facet
HTML_TEMPLATE = """<script src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js"></script>
        <link rel="import" href="https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html">
        <facets-overview id="elem"></facets-overview>
        <script>
          document.querySelector("#elem").protoInput = "{protostr}";
        </script>"""
html = HTML_TEMPLATE.format(protostr=protostr)
display(HTML(html))