In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
plt.rcParams['figure.figsize'] = 10, 5  # Set default figure size
sns.set_style('whitegrid')

# Data Loading

In [None]:
try:
    import google.colab
    data_file = 'https://raw.githubusercontent.com/sesise0307/pydata2021-eda/main/data/preprocessed/adult.feather'
except:
    data_file = '../data/preprocessed/adult.feather'

df = pd.read_feather(data_file)

In [None]:
hpw_bins = np.arange(0, 101, 5)
hpw_labels = hpw_bins[:-1] + 2.5
df['hpw_cut'] = pd.cut(df['hours_per_week'], bins=hpw_bins, labels=hpw_labels)

In [None]:
# Interactive plots are not very responsive when we deal with lots of data points.
# Let's use a small fraction of the entire data for interactive plots.
df_sample = df.sample(1000, random_state=47)

In [None]:
df.head()

# Ipywidgets

- [GitHub](https://github.com/jupyter-widgets/ipywidgets)
- [Documentation](https://ipywidgets.readthedocs.io/)

`ipywidgets` are interactive HTML widgets for Jupyter notebooks and the IPython kernel.

Notebooks come alive when interactive widgets are used.

Users gain control of their data and can visualize changes in the data.

In [None]:
import ipywidgets as widgets

## Basic `interact`

In [None]:
def square(x):
    return x**2

In [None]:
square(10)

In [None]:
widgets.interact(square, x=10);

## With Barplot

In [None]:
def plot_bar(x='age_group', y='fake_income', hue='sex'):
    sns.barplot(data=df,
                x=x,
                y=y,
                hue=hue,
                ci='sd')

In [None]:
plot_bar();

In [None]:
plot_bar(x='hpw_cut');

In [None]:
numeric_columns = df.select_dtypes(include='number').columns
str_columns = df.select_dtypes(exclude='number').columns

widgets.interact(plot_bar,
                 x=str_columns,
                 y=numeric_columns,
                 hue=str_columns);

## As a Decorator

In [None]:
@widgets.interact(
    x=str_columns,
    y=numeric_columns,
    hue=str_columns,
)
def plot_bar(x='age_group', y='fake_income', hue='sex'):
    sns.barplot(data=df,
                x=x,
                y=y,
                hue=hue,
                ci='sd')

In [None]:
@widgets.interact(
    age_group=sorted(df['age_group'].unique()),
    x=numeric_columns,
    y=numeric_columns,
)
def wrapper(age_group, x='age', y='fake_income'):
    target_df = df.loc[df['age_group'] == age_group]
    sns.regplot(data=target_df, x=x, y=y)

## [Supported Widgets](https://ipywidgets.readthedocs.io/en/stable/examples/Widget%20List.html#)

Extracted from the documentation.

In [None]:
widgets.IntSlider(
    value=7,
    min=0,
    max=10,
    step=1,
    description='Test:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)

In [None]:
widgets.FloatSlider(
    value=7.5,
    min=0,
    max=10.0,
    step=0.1,
    description='Test:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
)

In [None]:
widgets.IntRangeSlider(
    value=[5, 7],
    min=0,
    max=10,
    step=1,
    description='Test:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
)

In [None]:
widgets.IntProgress(
    value=7,
    min=0,
    max=10,
    step=1,
    description='Loading:',
    orientation='horizontal'
)

In [None]:
widgets.IntText(
    value=7,
    description='Any:',
    disabled=False
)

In [None]:
widgets.Checkbox(
    value=False,
    description='Check me',
    disabled=False
)

In [None]:
widgets.Dropdown(
    options=['1', '2', '3'],
    value='2',
    description='Number:',
    disabled=False,
)

In [None]:
widgets.RadioButtons(
    options=['pepperoni', 'pineapple', 'anchovies'],
    description='Pizza topping:',
    disabled=False
)

In [None]:
widgets.Select(
    options=['Linux', 'Windows', 'OSX'],
    value='OSX',
    # rows=10,
    description='OS:',
    disabled=False
)

In [None]:
widgets.Text(
    value='Hello World',
    placeholder='Type something',
    description='String:',
    disabled=False
)

In [None]:
widgets.Button(
    description='Click me',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click me',
    icon='check'
)

In [None]:
widgets.DatePicker(
    description='Pick a Date',
    disabled=False
)

# Plotly and Plotly Express

![Plotly](../image/plotly.png)

- [Plotly in Python GitHub](https://github.com/plotly/plotly.py)
- [Plotly in Python Documentation](https://plotly.com/python/)
- [Plotly Express in Python Documentation](https://plotly.com/python/plotly-express/)


Plotly in Python is an interactive, open-source, and browser-based graphing library for Python.

Built on top of `plotly.js`, `plotly.py` is a high-level, declarative charting library.

The Plotly Express module contains functions that can create entire figures at once.

Plotly Express is a built-in part of the plotly library,
and is the recommended starting point for creating most common figures.

> APIs of Plotly Express resemble the `Seaborn`'s.


In [None]:
import plotly.graph_objects as go
import plotly.express as px

## Scatter

In [None]:
# Plotly Version
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=df_sample['hours_per_week'],
        y=df_sample['fake_income'],
        mode='markers',
        marker=dict(showscale=True),
        marker_color=df_sample['age'],
    )
)

In [None]:
# Plotly Express Version
px.scatter(x=df_sample['hours_per_week'],
           y=df_sample['fake_income'],
           color=df_sample['age'])

In [None]:
# C.f., Seaborn Version
sns.scatterplot(x=df_sample['hours_per_week'],
                y=df_sample['fake_income'],
                hue=df_sample['age']);

> I usually start from Plotly Express then use Plotly only when it's necessary.
>
> Like we did with Seaborn then used Matplotlib for fine-tuning in the previous chapter.

## Scatter Matrix

In [None]:
px.scatter_matrix(df_sample,
                  dimensions=['age', 'capital_gain', 'hours_per_week', 'fake_income'],
                  color='sex')

In [None]:
# C.f., Seaborn Version
sns.pairplot(
    data=df,
    hue='sex',
    vars=['age', 'capital_gain', 'hours_per_week', 'fake_income'],
    plot_kws=dict(alpha=0.3, s=10),
);

## Parallel Categories

In [None]:
px.parallel_categories(
    df_sample,
    dimensions=[
        'age_group', 'education',
        'race', 'workclass', 'sex', 'income'
    ],
    color='hours_per_week',
)

## Parallel Coordinates

In [None]:
px.parallel_coordinates(df_sample,
                        color='fake_income')

> [HiPlot](https://github.com/facebookresearch/hiplot) is another nice parallel coordinates plotting library from Facebook.
>
> It is much more responsive with lots of data.

![HiPlot Example](../image/hiplot.png)

# Bokeh

- [GitHub](https://github.com/bokeh/bokeh)
- [Documentation](https://docs.bokeh.org/)

Bokeh is an interactive visualization library for modern web browsers.

It provides elegant, concise construction of versatile graphics, and affords **high-performance interactivity over large or streaming datasets**.

Bokeh can help anyone who would like to quickly and easily make interactive plots, dashboards, and data applications.

In [None]:
from bokeh.plotting import figure, show
from bokeh.io import output_notebook

output_notebook()

In [None]:
p = figure(tools='hover,crosshair')

p.scatter(
    x='hours_per_week',
    y='fake_income',
    color='age',
    source=df_sample,
)

show(p)

# Altair

- [GitHub](https://github.com/altair-viz/altair)
- [Documentation](https://altair-viz.github.io/)

Altair is a **declarative statistical visualization** library for Python, based on `Vega` and `Vega-Lite`.

With Altair, you can spend more time understanding your data and its meaning.

Altair’s API is simple, friendly and consistent and built on top of the powerful `Vega-Lite` visualization grammar.

This elegant simplicity produces beautiful and effective visualizations with a **minimal amount of code**.

In [None]:
import altair as alt

In [None]:
(
    alt
    .Chart(df_sample)
    .mark_circle()
    .encode(
        alt.X('hours_per_week'),
        alt.Y('fake_income'),
        color='age',
        tooltip=['hours_per_week', 'fake_income', 'age', 'education', 'income'],
    )
    .interactive()  # Add interactivity
)