# Tutorial for NGWB Ingest API with Brunel visualizations
Brunel defines a highly succinct and novel language that defines interactive data visualizations based on tabular data. The language is well suited for both data scientists and more aggressive business users. The system interprets the language and produces visualizations using the user's choice of existing lower-level visualization technologies typically used by application engineers such as RAVE or D3.

In [None]:
#!pip install brunel

In [None]:
import pandas as pd
import brunel

In [None]:
from pyspark.sql import SQLContext
sqlContext = SQLContext(sc)
from extension_utils import ExtensionUtils
eu = ExtensionUtils(sqlContext)

In [None]:
!wget --quiet  --output-document /resources/data/sparklingdataset.zip https://ibm.box.com/shared/static/9nxnsf6xwmuczjea911xjxp8l21yyd2x.zip
!unzip -o /resources/data/sparklingdataset.zip -d /resources/data/sparklingdata/
!rm /resources/data/sparklingdataset.zip

In [None]:
df = sqlContext.read.format("com.ibm.spark.discover").load("/resources/data/sparklingdata/data/sampleDataDir/")

In [None]:
options = {"extractFields": True}
dfInfered = eu.inferTypes(df, options)
dfInfered.printSchema()

In [None]:
options = {"revealNA": {"mode": "any", "brackets": (">[", "]<")}}
dfBaddata = eu.inferTypes(df, options)
dfBaddata.show()

In [None]:
options = {'locale': 'es', 'groupingUsed': True}
dfFixedBaddata = eu.inferTypes(df, options)
eu.printTypes(dfFixedBaddata)

In [None]:
options = {"extractFields": True, 'locale': 'es', 'groupingUsed': True}
dfConverted = eu.inferTypes(dfFixedBaddata, options)
dfConverted.printSchema()
dfConverted.show(5)

In [None]:
dfProfiled = eu.profile(dfConverted)
eu.printProfile(dfProfiled)

In [None]:
discovered_labels_BP = eu.render(dfProfiled,"labels").select("name", "value", "column").filter("column = 'BP'")
pd_discovered_labels_BP = discovered_labels_BP.toPandas()
discovered_types_Cost = eu.render(dfProfiled, 'types').select("name","value", "column").filter("column = 'Cost'")
pd_discovered_types_Cost = discovered_types_Cost.toPandas()
discovered_types_Drug = eu.render(dfProfiled,"labels").select("name","value", "column").filter("column = 'Drug'")
pd_discovered_types_Drug = discovered_types_Drug.toPandas()

In [None]:
%brunel data('pd_discovered_labels_BP') stack polar bar y(value) polar color(name) label(name) percent(value) tooltip(#all) | data('pd_discovered_types_Cost') stack polar bar y(value) polar color(name) label(name) percent(value) tooltip(#all)

In [None]:
%%brunel data('pd_discovered_types_Drug') stack polar bar y(value) polar color(name) label(name) percent(value) tooltip(#all)
 :: width=400, height=300

In [None]:
df_histogram = eu.render(dfProfiled,"histogram").select("name", "value", "column").filter("column = 'Cost'")
df_histogram.show()
pd_histogram = df_histogram.toPandas()

In [None]:
%brunel data('pd_histogram') bar x(name) y(value) filter(column) color(name) tooltip(#all) :: width=900, height=600

In [None]:
df_age = eu.render(dfProfiled,"histogram").select("name", "value", "column").filter("column = 'Age'")
pd_age = df_age.toPandas()

In [None]:
%brunel data('pd_age') bar x(name) y(value) filter(column) color(name) tooltip(#all)

In [None]:
df_stats = eu.render(dfProfiled,"stats").select("name", "value", "column").filter("column = 'Cost'")
pd_stats = df_stats.toPandas()

In [None]:
%brunel data('pd_stats') area x(name) yrange(0,value) sort(value)

In [None]:
df_datetime = eu.render(dfProfiled,"datetime").select("name", "value", "column").filter("column = 'DateTested'")
pd_datetime = df_datetime.toPandas()

In [None]:
%brunel data('pd_datetime') area x(name) yrange(0,value)

In [None]:
dfCustomers = sqlContext.read.format("com.ibm.spark.discover").load("/resources/data/sparklingdata/data/sampleDataDir/customers.csv")
dfCustomers.printSchema()
dfCustomers.show()

In [None]:
options = {'extractFields': True, 'semanticTypes': True}
dfCustomersInferred = eu.inferTypes(dfCustomers, options)
dfCustomersInferred.printSchema()
dfCustomersInferred.show()

In [None]:
options = {"semanticTypes": True, "columns": ["C2"], "revealNA": {"mode": "any", "brackets": (">[", "]<")}}
dfCustomersForAnalysis = eu.inferTypes(dfCustomers, options)
dfCustomersForAnalysis.show()

In [None]:
options = {'extractFields': False, 'semanticTypes': True}
dfCustomersInferred = eu.inferTypes(dfCustomers, options)
dfCustProfiled = eu.profile(dfCustomersInferred)
eu.printProfile(dfCustProfiled)

In [None]:
dfC1Labels = eu.render(dfCustProfiled, "labels").select("name", "value", "column").filter("column = 'C1'")
dfC1Labels.show
pd_dfC1Labels = dfC1Labels.toPandas()
dfC1Types = eu.render(dfCustProfiled, "types").select("name", "value", "column").filter("column = 'C1'")
dfC1Types.show
pd_dfC1Types = dfC1Types.toPandas()

In [None]:
%brunel data('pd_dfC1Labels') bar x(name) y(value) sort(value) label(name:3, ": ", value) | data('pd_dfC1Types')stack polar bar y(value) polar color(name) label(name) percent(value) tooltip(#all)

## Want to learn more?

<a href="http://bigdatauniversity.com/courses/introduction-to-python/?utm_source=tutorial-sparkling-python3&utm_medium=dswb&utm_campaign=bdu"><img src = "https://ibm.box.com/shared/static/l8yxiek0fg4e15lwz0ikgunj338nrrtd.png"> </a>

Created by: <a href="https://bigdatauniversity.com/?utm_source=bducreatedbylink&utm_medium=dswb&utm_campaign=bdu">The Cognitive Class Team</a>