![Königsweg Logo](../img/koenigsweg_150.png)

<span style="font-size: small;float: right;">&copy; 2015-2017 Alexander C.S. Hendorf, <a href="http://koenigsweg.com">Königsweg GmbH</a>, Mannheim </span>

---

# Visualisation

---

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('default')

In [None]:
%matplotlib inline
# without this the plots would be opened  in a new window (not browser)
# with this instruction plots will be included in the notebook

In [None]:
%config InlineBackend.figure_format = 'retina'

In [None]:
sales_data = pd.read_excel('../data/blooth_sales_data_clean.xlsx' )
sales_data.head(5)
sales_data['turnover'] = sales_data['unitprice'] * sales_data['units']

sales_data.head(5)

In [None]:
sales_data['units'].plot()
# pandas will interact with matplotlib  - default is linechart

In [None]:
sales_data['units'].plot.hist(bins=15);

In [None]:
sales_data['units'].plot.hist(bins=15,
                             figsize=(10,5));

In [None]:
import matplotlib
matplotlib.rcParams['figure.figsize'] = (10, 5)

In [None]:
matplotlib.rcParams

Matplotlib colormaps [Webseite](https://matplotlib.org/examples/color/colormaps_reference.html)

In [None]:
sales_data['units'].plot.hist(
    bins=15, 
    title="Verteilung Bestellmenge",
    legend=True,
    fontsize=14,
    colormap='Pastel1',
    grid=True,
    linestyle='--',
    edgecolor='black', 
    linewidth=1.2
);

In [None]:
sales_data['units_bin'] = sales_data['units'].apply(lambda x: np.int(np.ceil(x*15/50)))

In [None]:
ax = sales_data['units'].plot.hist(
    bins=15, 
    title="Verteilung Bestellmenge",
    legend=True,
    fontsize=14,
    colormap='Pastel1',
    grid=True,
    linestyle='--',
    edgecolor='black', 
    linewidth=1.2
);
ax.axhline(sales_data['units'].max(), color='red', linestyle='-');

In [None]:
ax = sales_data.groupby('units_bin').count()['units'].plot.bar(
    title="Verteilung Bestellmenge",
    colormap='Pastel2',
    grid=True,
    linestyle='--',
    edgecolor='black', 
    linewidth=1.2
);
ax.set_xlabel('Bins')
ax.set_ylabel('Frequency')
ax.text(1, 75, r'equation: y=ceil(x*15/50)', fontsize=15)
ax.annotate('Schauen Sie dort!', xy=(7, 26), xytext=(3, 30),
            arrowprops=dict(facecolor='black', shrink=0.1), fontsize=11);

In [None]:
tt = sales_data[['product', 'turnover']].groupby('product').sum()
tt

In [None]:
tt.plot.bar();

In [None]:
tt.plot.bar(logy=True, fontsize=14);

---

#### Excecise

In [None]:
product_sum = sales_data[['product', 'units']].groupby('product').sum()
product_sum

Create a barchart and customize three alements of your choice

In [None]:
# your code here


In [None]:
%load ../solutions/401.py

Weiteführendes:

* [Text in matplotlib](https://matplotlib.org/users/text_intro.html)
* [Text properties and layout](https://matplotlib.org/users/text_props.html)


---

### Seaborn

Seaborn is based on matplotlib and provides a more *modern* style. 

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

Histogram

In [None]:
sns.distplot(sales_data['units']);

In [None]:
# For demonstration purposes we now use a subset of the dataset
sales_data['weekday'] = sales_data['orderdate'].map(lambda x: x.weekday())
sales_50 = sales_data[sales_data['unitprice']<50][['units', 'turnover', 'product', 'weekday']]
sales_50

Density Plot

In [None]:
sns.boxplot(sales_50['units']);

In [None]:
sns.set_style("dark")
sns.boxplot(sales_50['turnover']);

In [None]:
sns.set_style("ticks")
sns.boxplot(sales_50['turnover']);
sns.despine(offset=10, trim=True);

In [None]:
sns.set_style("darkgrid")
sns.kdeplot(sales_50['weekday'], sales_50['turnover']);

In [None]:
sns.set_style("darkgrid")
sns.kdeplot(sales_50['weekday'], sales_50['turnover'],
           {"xtick.major.size": 8, "ytick.major.size": 8});

Clustermap

In [None]:
sns.clustermap(
    sales_data[sales_data['unitprice']<50][['weekday', 'product', 'turnover']]
    .groupby(['weekday', 'product']).sum().unstack());

In [None]:
sns.palplot(sns.color_palette("YlGnBu", 10))

In [None]:
myplatte = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e", "#2ecc71"]
sns.set_palette(myplatte)
sns.palplot(sns.color_palette())

In [None]:
sales_50.head()

In [None]:
g = sns.FacetGrid(data=sales_50.dropna(), col='product', col_wrap=5, hue='product')
g.map(sns.barplot, "weekday", "turnover");

---

### Bokeh

Bokeh ist unabhängig von matplotlib und für interaktive Datenvisualisierungen in modernen Browsern gemacht.

In [None]:
from bokeh.plotting import figure, output_notebook, output_file, show

In [None]:
output_notebook()

In [None]:
import numpy as np

from bokeh.plotting import figure, output_file, show

# prepare some data
N = 4000
x = np.random.random(size=N) * 100
y = np.random.random(size=N) * 100
radii = np.random.random(size=N) * 1.5
colors = [
    "#%02x%02x%02x" % (int(r), int(g), 150) for r, g in zip(50+2*x, 30+2*y)
]

# output to static HTML file (with CDN resources)
# output_file("color_scatter.html", title="color_scatter.py example", mode="cdn")

TOOLS="crosshair,pan,wheel_zoom,box_zoom,reset,box_select,lasso_select"

# create a new plot with the tools above, and explicit ranges
p = figure(tools=TOOLS, x_range=(0,100), y_range=(0,100))

# add a circle renderer with vectorized colors and sizes
p.circle(x,y, radius=radii, fill_color=colors, fill_alpha=0.6, line_color=None)
#
# show the results
show(p)