# Sample Notebook: BigQuery table stats

## Imports

In [None]:
%load_ext google.cloud.bigquery

!pip install --user facets-overview

## Load data

In [None]:
%%bigquery total_births
SELECT *
FROM `bigquery-public-data.samples.natality`
LIMIT 100000

In [17]:
total_births.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
source_year,100000.0,1998.47958,11.912372,1969.0,1991.0,2005.0,2007.0,2008.0
year,100000.0,1998.47958,11.912372,1969.0,1991.0,2005.0,2007.0,2008.0
month,100000.0,6.57425,3.454615,1.0,4.0,7.0,10.0,12.0
day,22241.0,15.779731,8.802341,1.0,8.0,16.0,23.0,99.0
wday,77759.0,4.069805,1.885353,1.0,3.0,4.0,6.0,7.0
child_race,38386.0,4.404366,4.650257,1.0,1.0,2.0,9.0,58.0
weight_pounds,99286.0,7.146141,1.354057,0.500449,6.461749,7.242185,7.993962,16.935911
plurality,97802.0,1.023721,0.158693,1.0,1.0,1.0,1.0,4.0
apgar_1min,28281.0,62.915279,44.527775,1.0,9.0,99.0,99.0,99.0
apgar_5min,84652.0,20.807754,30.645352,0.0,9.0,9.0,9.0,99.0


## Visualize statistics

In [15]:
# Create the feature stats for the datasets and stringify it.
import base64
from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator

gfsg = GenericFeatureStatisticsGenerator()
proto = gfsg.ProtoFromDataFrames([{'name': 'total_births', 'table': total_births}])
protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")



In [16]:
# Display the facets overview visualization for this data
from IPython.core.display import display, HTML

HTML_TEMPLATE = """
        <script src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js"></script>
        <link rel="import" href="https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html" >
        <facets-overview id="elem"></facets-overview>
        <script>
          document.querySelector("#elem").protoInput = "{protostr}";
        </script>"""
html = HTML_TEMPLATE.format(protostr=protostr)
display(HTML(html))