In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
# sns.set(style="ticks")

# Load the example dataset for Anscombe's quartet
df = sns.load_dataset("anscombe")

# Data visualization in python

### There are several excellent visualization packages in pythons, including:
- matplotlib
- seaborn
- bokeh
- plotly
- pygg / plotnine
- pandas built-in plotting

and others...

Each one has a slightly different grammar philosophy, and each one does different things better. It is very common for data scientists to use more than one plotting package in a single report.

This great variety means there is a lot to learn before you can create the absolute best visualization of your data.



# 1. Introduction to matplotlib

In [None]:
import matplotlib
print(matplotlib.__version__)
print(matplotlib.get_backend())

## Anatomy of a "plot"

<img align="center" src='figure_axes_axis_labeled.png'>

## Getting started

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
%matplotlib notebook

### Figures

In [None]:
fig = plt.figure()

In [None]:
# Twice as tall as it is wide:
fig = plt.figure(figsize=plt.figaspect(2))

### Axes

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111) # Basically, 1 row and 1 column.
ax.set(xlim=[0.5, 4.5], 
       ylim=[-2, 8], 
       title='An Example Axes',
       ylabel='Y-Axis', 
       xlabel='X-Axis')
plt.show()

In [None]:
ax.set_xlim([0.5, 4.5])
ax.set_ylim([-2, 8])
ax.set_title('An Example Axes')
ax.set_ylabel('Y-Axis')
ax.set_xlabel('X-Axis')

### Basic Plotting

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot([1, 2, 3, 4], [10, 20, 25, 30], color='lightblue', linewidth=3)
ax.scatter([0.3, 3.8, 1.2, 2.5], [11, 25, 9, 26], color='darkgreen', marker='^')
ax.set_xlim(0.5, 4.5)
# plt.show()

### Axes methods vs. pyplot

In [None]:
plt.plot([1, 2, 3, 4], [10, 20, 25, 30], color='lightblue', linewidth=3)
plt.scatter([0.3, 3.8, 1.2, 2.5], [11, 25, 9, 26], color='darkgreen', marker='^')
plt.xlim(0.5, 4.5)
plt.show()

### Multiple Axes

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=2)
plt.show()

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=2)
axes[0,0].set(title='Upper Left')
axes[0,1].set(title='Upper Right')
axes[1,0].set(title='Lower Left')
axes[1,1].set(title='Lower Right')

# To iterate over all items in a multidimensional numpy array, use the `flat` attribute
for ax in axes.flat:
    # Remove all xticks and yticks...
    ax.set(xticks=[], yticks=[])
    
plt.show()

### A brief exercise - can you reproduce this plot?

<img align='center' src='ex_1_1.png'>

In [None]:
x = np.linspace(0, 10, 100)
y1, y2, y3 = np.cos(x), np.cos(x + 1), np.cos(x + 2)
names = ['Signal 1', 'Signal 2', 'Signal 3']

In [None]:
# %load snippets/excercise_1_1.py
plt.style.use('classic')
fig, axes = plt.subplots(nrows=3, ncols=1)
for ax, y, name in zip(axes, [y1, y2, y3], names):
    ax.plot(x, y, 'k')
    ax.set(xticks=[], yticks=[] ,title=name)
plt.show()

### What about plotting them on the same axis?

In [None]:
# %load snippets/excercise_1_1_2.py

# 2. Visual overview of plotting functions 

### What we've mentioned so far
<a  href="AnatomyOfMatplotlib/examples/plot_example.py"><img src="AnatomyOfMatplotlib/images/plot_example.png"></a>
<a href="AnatomyOfMatplotlib/examples/scatter_example.py"><img src="AnatomyOfMatplotlib/images/scatter_example.png"></a>
### Other common plot types
<a href="AnatomyOfMatplotlib/examples/bar_example.py"><img src="AnatomyOfMatplotlib/images/bar_example.png"></a>
<a href="AnatomyOfMatplotlib/examples/fill_example.py"><img src="AnatomyOfMatplotlib/images/fill_example.png"></a>

## 2D Arrays and Images
<a href="AnatomyOfMatplotlib/examples/imshow_example.py"><img src="AnatomyOfMatplotlib/images/imshow_example.png"></a>
<a href="AnatomyOfMatplotlib/examples/pcolor_example.py"><img src="AnatomyOfMatplotlib/images/pcolor_example.png"></a>
<a href="AnatomyOfMatplotlib/examples/contour_example.py"><img src="AnatomyOfMatplotlib/images/contour_example.png"></a>

## Vector Fields
<a href="AnatomyOfMatplotlib/examples/vector_example.py"><img src="AnatomyOfMatplotlib/images/vector_example.png"></a>

## Data Distributions
<a href="AnatomyOfMatplotlib/examples/statistical_example.py"><img src="AnatomyOfMatplotlib/images/statistical_example.png"></a>

## Detailed examples:

### 1. Scatter plots:

In [None]:
# generate some data
np.random.seed(1874)
x, y, z = np.random.normal(0, 1, (3, 100))
t = np.arctan2(y, x)
size = 50 * np.cos(2 * t)**2 + 10

In [None]:
fig, axes = plt.subplots(ncols=3, figsize=(12,4))
axes[0].scatter(x, y, marker='o', facecolor='white', s=80)
axes[1].scatter(x, y, s=size, marker='s', color='darkblue')
axes[2].scatter(x, y, c=z, s=size, cmap='gist_ncar')

### 2. Distributions:

In [None]:
# generate data
means = [0, -1, 2.5, 4.3, -3.6]
sigmas = [1.2, 5, 3, 1.5, 2]
# Each distribution has a different number of samples.
nums = [150, 1000, 100, 200, 500]
dists = [np.random.normal(*args) for args in zip(means, sigmas, nums)]

In [None]:
fig, axes = plt.subplots(ncols=3, figsize=(15,5))

colors = ['cyan', 'red', 'blue', 'green', 'purple']
axes[0].set_prop_cycle('color', colors)

# histograms
for dist in dists:
    axes[0].hist(dist, bins=20, normed=True, edgecolor='none', alpha=0.5)
axes[0].margins(y=0.05)
axes[0].set_ylim(bottom=0)
axes[0].set_title('Histograms')

# box plots
result = axes[1].boxplot(dists, patch_artist=True, notch=True, vert=False)
for box, color in zip(result['boxes'], colors):
    box.set(facecolor=color, alpha=0.5)
for item in ['whiskers', 'caps', 'medians']:
    plt.setp(result[item], color='gray', linewidth=1.5)
plt.setp(result['fliers'], markeredgecolor='gray', markeredgewidth=1.5)
plt.setp(result['medians'], color='black')
axes[1].margins(0.05)
axes[1].set(yticks=[], ylim=[0, 6])
axes[0].set_title('Boxplots')

# violin plots
result = axes[2].violinplot(dists, vert=False, showmedians=True)
for body, color in zip(result['bodies'], colors):
    body.set(facecolor=color, alpha=0.5)
for item in ['cbars', 'cmaxes', 'cmins', 'cmedians']:
    plt.setp(result[item], edgecolor='gray', linewidth=1.5)
plt.setp(result['cmedians'], edgecolor='black')
axes[2].margins(0.05)
axes[2].set(ylim=[0, 6])
axes[0].set_title('Violinplots')

### Exercise: can you reproduce this figure?

<img src='data/plt_exercise.png'>



In [None]:
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(1)

# Generate data...
y_raw = np.random.randn(1000).cumsum() + 15
x_raw = np.linspace(0, 24, y_raw.size)

# Get averages of every 100 samples...
x_pos = x_raw.reshape(-1, 100).min(axis=1)
y_avg = y_raw.reshape(-1, 100).mean(axis=1)
y_err = y_raw.reshape(-1, 100).ptp(axis=1)

bar_width = x_pos[1] - x_pos[0]

# Make a made up future prediction with a fake confidence
x_pred = np.linspace(0, 30)
y_max_pred = y_avg[0] + y_err[0] + 2.3 * x_pred
y_min_pred = y_avg[0] - y_err[0] + 1.2 * x_pred

# Just so you don't have to guess at the colors...
barcolor, linecolor, fillcolor = 'wheat', 'salmon', 'lightblue'

# Now you're on your own!

In [None]:
# %load snippets/exercise_2_1_1.py

# 3. Useful plotting tools for data science

## pandas built-in plotting

In [None]:
import pandas as pd

In [None]:
plt.style.use('seaborn')

In [None]:
df = pd.read_csv('data/gapminder.tsv', delimiter='\t')

In [None]:
df.head(15)

In [None]:
(df.continent.value_counts()/12).plot.bar(figsize=[5,5])

In [None]:
df.loc[df['year']==2007]['gdpPercap'].plot.hist(bins=30)

In [None]:
df.loc[df['year']==2007]['gdpPercap'].plot.kde()
plt.xlim([0, None])

In [None]:
df.groupby('continent').plot.scatter(x='year', y='gdpPercap')

## Seaborn

In [None]:
import seaborn as sns
plt.style.use('seaborn')

[seaborn gallery](https://seaborn.pydata.org/examples/index.html)

In [None]:
df = sns.load_dataset('tips')

In [None]:
df.head()

In [None]:
sns.countplot(df['size'])

In [None]:
sns.kdeplot(df['total_bill'])

In [None]:
plt.subplot(121)
sns.distplot(df['total_bill'])
plt.subplot(122)
sns.distplot(df['tip'])

In [None]:
sns.kdeplot(data=df['total_bill'], data2=df['tip'])

In [None]:
sns.jointplot(x='total_bill', y='tip', data=df)

In [None]:
sns.jointplot(x='total_bill', y='tip', data=df, kind='hex')

In [None]:
sns.boxplot(
    x='time',
    y='tip',
    data=df
)

In [None]:
df.columns

In [None]:
df.loc[(df['time'] == 'Dinner')&(df['tip']>6)]

In [None]:
df.loc[(df['time'] == 'Lunch')&(df['tip']>5)]

In [None]:
sns.violinplot(x='size', y='tip', hue='sex', data=df, split=True)

In [None]:
sns.lmplot(x="total_bill", y="tip",
           col="sex", 
#            hue="size", 
           data=df,
           ci=99, 
           palette="muted", 
           scatter_kws={"s": 50, "alpha": 1}
          )

### Faceting with seaborn

In [None]:
g = sns.FacetGrid(data=df, col='size', row='sex', xlim=[0, 10])
g.map(sns.violinplot, 'tip')

In [None]:
sns.pairplot(df)

## Interactive plot parameters with ipywidgets

In [None]:
from ipywidgets import interact, FloatRangeSlider, FloatSlider

In [None]:
def myfig(xrange, yrange, w):
    x = np.arange(-10, 10, step=1e-3)
    y = np.sin(w*x)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(x, y)
    ax.set_xlim(xrange)
    ax.set_ylim(yrange)
#     plt.show()

In [None]:
myfig(xrange=[0,np.pi], yrange=[-1.1, 1.1], w=6)

In [None]:
interact(myfig, 
         xrange=FloatRangeSlider(value=[0, 10], min=-10, max=10, readout=True), 
         yrange=FloatRangeSlider(value=[-1, 1], min=-10, max=10, readout=True),
         w=FloatSlider(value=1, min=0, max=10, step=0.5)
        )

# Other libraries to explore:
- Interactive plotting with plotly
- "grammar of graphics" (R ggplot) libraries such as plotnine
- Interactive ggplotting with bokeh