# Exploratory visualization in `pandas`

For exploring data, `pandas` actually has pretty decent visualization capabilities.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
import pandas as pd

In [None]:
import seaborn as sns

In [None]:
df = sns.load_dataset('iris')

In [None]:
df.head()

In [None]:
pd.options.plotting.backend = 'matplotlib'

In [None]:
df.plot.kde(layout = (2,2), subplots = True, sharey=True)
pass

In [None]:
from pandas.plotting import boxplot_frame_groupby

In [None]:
boxplot_frame_groupby(df.groupby('species'), layout=(1,3), grid=False, rot=45);

In [None]:
pd.options.plotting.backend = 'plotly'

In [None]:
df.plot.scatter(
    x='sepal_length', 
    y='petal_length', 
    color='species', 
    marginal_y="violin",
    marginal_x="box",
    trendline="ols"
)

## Using `pandas-bokeh`

In [None]:
pd.options.plotting.backend = 'pandas_bokeh'
import pandas_bokeh
from bokeh.io import output_notebook

In [None]:
output_notebook()

Example from official docs: [pandas-bokeh](https://github.com/PatrikHlobil/Pandas-Bokeh)

In [None]:
df_mapplot = pd.read_csv(r"https://bit.ly/325W5Yy")
df_mapplot["size"] = df_mapplot["pop_max"] / 1000000
df_mapplot.plot_bokeh.map(
    x="longitude",
    y="latitude",
    hovertool_string="<h2> @{name} </h2> <h3> Population: @{pop_max} </h3>",
    tile_provider='STAMEN_TERRAIN_RETINA',
    size="size", 
    figsize=(900, 600),
    title="World cities with more than 1.000.000 inhabitants")

## More controlled visualizations

## Grammar of graphics in Python

If you love `ggplot2` and just want to stick with it.

In [None]:
import warnings
from plotnine import *
from plotnine.exceptions import PlotnineWarning
from plotnine.data import meat

warnings.simplefilter('ignore', FutureWarning)
warnings.simplefilter('ignore', PlotnineWarning)

In [None]:
meat.sample(3)

In [None]:
df = pd.melt(meat, id_vars=['date'], 
             var_name='meat', 
             value_name='price')

In [None]:
p = (
    ggplot(df, aes(x='date', y='price', color='meat')) +
    geom_line() +
    theme_xkcd() +
    labs(title="Lorem ipsum bacon")
)

In [None]:
p.draw();

In [None]:
p.save('meat.png')

In [None]:
from IPython.display import Image

In [None]:
Image('meat.png')

## Similar plot in `seaborn`

In [None]:
with plt.xkcd():
    g = sns.lineplot(data=df, x='date', y='price', hue='meat')
    g.set_title('Lorem ipsum bacon')
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

### Show as heatmap

In [None]:
(
    sns.heatmap(
        meat.select_dtypes('number').
        apply(lambda x: (x-x.mean())/x.std(), axis=0))
)
pass