# Plotting and Visualization

In [155]:
import numpy as np
import pandas as pd
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_rows = 20
np.random.seed(12345)
import matplotlib.pyplot as plt
import matplotlib
plt.rc('figure', figsize=(10, 6))
np.set_printoptions(precision=4, suppress=True)

The simplest way to follow the code examples in the chapter is to use interactive plotting
in the Jupyter notebook.

In [156]:
%matplotlib notebook

## A Brief matplotlib API Primer

In [157]:
import matplotlib.pyplot as plt

In [158]:
import numpy as np
data = np.arange(10)
data

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [159]:
plt.plot(data)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x203e4212250>]

### Figures and Subplots

In [160]:
# create a new figure with
# plt.figure:
fig = plt.figure()

<IPython.core.display.Javascript object>

The figure should be 2 × 2 (so up to four plots in total), 
and we’re selecting the first of four subplots (numbered from 1):

In [161]:
ax1 = fig.add_subplot(2, 2, 1)

In [162]:
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

In [163]:
# An empty matplotlib figure with three subplots
fig = plt.figure()
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

# for more complex plots you must put all of the plotting commands in a single notebook cell.
# 'k--' is a style option instructing matplotlib to plot a black dashed line.
plt.plot(np.random.randn(50).cumsum(), 'k--')

_ = ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x203edff2910>

In [164]:
plt.close('all')

In [165]:
# Creating a figure with a grid of subplots
fig, axes = plt.subplots(2, 3)
axes

<IPython.core.display.Javascript object>

array([[<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>],
       [<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>]], dtype=object)

options: pyplot.subplots

nrows # Number of rows of subplots

ncols Number of columns of subplots

sharex # All subplots should use the same x-axis ticks (adjusting the xlim will affect all subplots)

sharey # All subplots should use the same y-axis ticks (adjusting the ylim will affect all subplots)

subplot_kw # Dict of keywords passed to add_subplot call used to create each subplot

**fig_kw # Additional keywords to subplots are used when creating the figure, such as plt.subplots(2, 2,
figsize=(8, 6))


#### Adjusting the spacing around subplots

In [None]:
subplots_adjust(left=None, bottom=None, right=None, top=None,
                wspace=None, hspace=None)

# wspace and hspace controls the percent of the figure width and figure height

In [167]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0, hspace=0)

<IPython.core.display.Javascript object>

In [168]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.3)
plt.subplots_adjust(wspace=0.1, hspace=0.1)

<IPython.core.display.Javascript object>

### Colors, Markers, and Line Styles

In [None]:
# plot x versus y with green dashes
ax.plot(x, y, 'g--')

In [None]:
# more explicitly as:
ax.plot(x, y, linestyle='--', color='g')

In [171]:
plt.figure()

# Line plots can additionally have markers to highlight the actual data points.
from numpy.random import randn
plt.plot(randn(30).cumsum(), 'ko--')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x203f32d2ca0>]

In [172]:
# more explicitly as:
plt.plot(randn(30).cumsum(), color='k', marker='o', linestyle='dashed')

[<matplotlib.lines.Line2D at 0x203f3d912e0>]

In [173]:
plt.close('all')

In [174]:
data = np.random.randn(30).cumsum()
plt.plot(data, 'k--', label='Default')
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')
plt.legend(loc='best')

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x203f3d918b0>

### Ticks, Labels, and Legends

#### Setting the title, axis labels, ticks, and ticklabels

In [175]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(np.random.randn(1000).cumsum())

# rotation option sets the x tick labels at a 30-degree rotation.
ticks = ax.set_xticks([0, 250, 500, 750, 1000])
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
                            rotation=30, fontsize='small')

ax.set_title('My first matplotlib plot')
ax.set_xlabel('Stages')

<IPython.core.display.Javascript object>

Text(0.5, 0, 'Stages')

In [176]:
# could also have written:.
props = {
    'title': 'My first matplotlib plot',
    'xlabel': 'Stages'
}
ax.set(**props)

[Text(0.5, 1.0, 'My first matplotlib plot'),
 Text(0.5, 28.98301732809565, 'Stages')]

#### Adding legends

In [177]:
from numpy.random import randn
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

ax.plot(randn(1000).cumsum(), 'k', label='one')
ax.plot(randn(1000).cumsum(), 'k--', label='two')
ax.plot(randn(1000).cumsum(), 'k.', label='three')

ax.legend(loc='best')

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x203f3df3820>

### Annotations and Drawing on a Subplot

In [None]:
ax.text(x, y, 'Hello world!',
        family='monospace', fontsize=10)

In [180]:
from datetime import datetime

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

data = pd.read_csv('examples/spx.csv', index_col=0, parse_dates=True)
spx = data['SPX']

spx.plot(ax=ax, style='k-')

crisis_data = [
    (datetime(2007, 10, 11), 'Peak of bull market'),
    (datetime(2008, 3, 12), 'Bear Stearns Fails'),
    (datetime(2008, 9, 15), 'Lehman Bankruptcy')
]

# ax.annotate method can draw labels at the indicated x and y coordinates.
for date, label in crisis_data:
    ax.annotate(label, xy=(date, spx.asof(date) + 75),
                xytext=(date, spx.asof(date) + 225),
                arrowprops=dict(facecolor='black', headwidth=4, width=2,
                                headlength=4),
                horizontalalignment='left', verticalalignment='top')

# Zoom in on 2007-2010
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])

ax.set_title('Important dates in the 2008-2009 financial crisis')

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Important dates in the 2008-2009 financial crisis')

In [181]:
# Calling ax.add_patch(shp) to add a shape to a plot

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
                   color='g', alpha=0.5)

ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

<IPython.core.display.Javascript object>

<matplotlib.patches.Polygon at 0x203f3f8fd60>

In [96]:
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(1, 1, 1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
                   color='g', alpha=0.5)

ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

<IPython.core.display.Javascript object>

<matplotlib.patches.Polygon at 0x203c5c77e20>

### Saving Plots to File

In [None]:
plt.savefig('figpath.svg')

dpi, which controls the dots-per-inch resolution

bbox_inches can trim the whitespace around the actual figure.

In [None]:
plt.savefig('figpath.png', dpi=400, bbox_inches='tight')

In [None]:
# savefig can also write to any file-like object, such as a BytesIO:

from io import BytesIO
buffer = BytesIO()
plt.savefig(buffer)
plot_data = buffer.getvalue()

Figure.savefig options

fname # String containing a filepath or a Python file-like object. The figure format is inferred from the file
extension (e.g., .pdf for PDF or .png for PNG)

dpi # The figure resolution in dots per inch; defaults to 100 out of the box but can be configured

facecolor, edgecolor # The color of the figure background outside of the subplots; 'w' (white), by default
format The explicit file format to use ('png', 'pdf', 'svg', 'ps', 'eps', ...)

bbox_inches # The portion of the figure to save; if 'tight' is passed, will attempt to trim the empty space around
the figure

### matplotlib Configuration

In [182]:
# rc method
plt.rc('figure', figsize=(10, 6))

In [None]:
font_options = {'family' : 'monospace',
                'weight' : 'bold',
                'size'   : 'small'}
plt.rc('font', **font_options)

## Plotting with pandas and seaborn

### Line Plots

In [200]:
plt.close('all')

In [198]:
plt.rc('figure', figsize=(10, 6))

In [201]:
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
s.plot()

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [202]:
# Simple DataFrame plot
df = pd.DataFrame(np.random.randn(10, 4).cumsum(0),
                  columns=['A', 'B', 'C', 'D'],
                  index=np.arange(0, 100, 10))
df.plot()

<IPython.core.display.Javascript object>

<AxesSubplot:>

#### Table 9-3. Series.plot method arguments

label # Label for plot legend

ax # matplotlib subplot object to plot on; if nothing passed, uses active matplotlib subplot
style Style string, like 'ko--', to be passed to matplotlib

alpha # The plot fill opacity (from 0 to 1)
kind Can be 'area', 'bar', 'barh', 'density', 'hist', 'kde', 'line', 'pie'

logy # Use logarithmic scaling on the y-axis

use_index # Use the object index for tick labels

rot # Rotation of tick labels (0 through 360)

xticks # Values to use for x-axis ticks

yticks # Values to use for y-axis ticks

xlim # x-axis limits (e.g., [0, 10])

ylim # y-axis limits

grid # Display axis grid (on by default)

#### Table 9-4. DataFrame-specific plot arguments

subplots # Plot each DataFrame column in a separate subplot

sharex # If subplots=True, share the same x-axis, linking ticks and limits

sharey # If subplots=True, share the same y-axis

figsize # Size of figure to create as tuple

title # Plot title as string


legend # Add a subplot legend (True by default)

sort_columns # Plot columns in alphabetical order; by default uses existing column order

### Bar Plots

In [203]:
fig, axes = plt.subplots(2, 1)
data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))
data.plot.bar(ax=axes[0], color='k', alpha=0.7)
data.plot.barh(ax=axes[1], color='k', alpha=0.7)

# color='k' set the color of the plots to black
# alpha=0.7 use partial transparency

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [107]:
np.random.seed(12348)

In [204]:
df = pd.DataFrame(np.random.rand(6, 4),
                  index=['one', 'two', 'three', 'four', 'five', 'six'],
                  columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
df
df.plot.bar()

# Note that the name “Genus” on the DataFrame’s columns is used to title the legend.

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [None]:
plt.figure()

In [205]:
# create stacked bar plots
df.plot.barh(stacked=True, alpha=0.5)

<IPython.core.display.Javascript object>

<AxesSubplot:>

A useful recipe for bar plots is to visualize a Series’s value frequency
using value_counts: s.value_counts().plot.bar().

In [206]:
plt.close('all')

In [207]:
tips = pd.read_csv('examples/tips.csv')
party_counts = pd.crosstab(tips['day'], tips['size'])
party_counts

size,1,2,3,4,5,6
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fri,1,16,1,1,0,0
Sat,2,53,18,13,1,0
Sun,0,39,15,18,3,1
Thur,1,48,4,5,1,3


In [208]:
# Not many 1- and 6-person parties
party_counts = party_counts.loc[:, 2:5]

In [209]:
# Normalize to sum to 1
# axis=0: noemalize by each row
party_pcts = party_counts.div(party_counts.sum(1), axis=0)
party_pcts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,0.888889,0.055556,0.055556,0.0
Sat,0.623529,0.211765,0.152941,0.011765
Sun,0.52,0.2,0.24,0.04
Thur,0.827586,0.068966,0.086207,0.017241


In [210]:
party_pcts.plot.bar()

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='day'>

So you can see that party sizes appear to increase on the weekend in this dataset.

In [211]:
plt.close('all')

In [212]:
import seaborn as sns
tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])
tips.head()

Unnamed: 0,total_bill,tip,smoker,day,time,size,tip_pct
0,16.99,1.01,No,Sun,Dinner,2,0.063204
1,10.34,1.66,No,Sun,Dinner,3,0.191244
2,21.01,3.5,No,Sun,Dinner,3,0.199886
3,23.68,3.31,No,Sun,Dinner,2,0.162494
4,24.59,3.61,No,Sun,Dinner,4,0.172069


In [213]:
sns.barplot(x='tip_pct', y='day', data=tips, orient='h')

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='tip_pct', ylabel='day'>

In [214]:
plt.close('all')

In [215]:
# hue option that enables us to split by an additional categorical value
sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='tip_pct', ylabel='day'>

In [216]:
plt.close('all')

In [129]:
# witch between different plot appearances using seaborn.set:
sns.set(style="whitegrid")

### Histograms and Density Plots

In [217]:
plt.figure()
tips['tip_pct'].plot.hist(bins=50)

<IPython.core.display.Javascript object>

<AxesSubplot:ylabel='Frequency'>

In [218]:
# density plots are also known as kernel density estimate (KDE) plots
plt.figure()
tips['tip_pct'].plot.density()

<IPython.core.display.Javascript object>

<AxesSubplot:ylabel='Density'>

In [219]:
# Seaborn makes histograms and density plots even easier through its distplot method
plt.figure()
comp1 = np.random.normal(0, 1, size=200)
comp2 = np.random.normal(10, 2, size=200)
values = pd.Series(np.concatenate([comp1, comp2]))
sns.distplot(values, bins=100, color='k')

<IPython.core.display.Javascript object>



<AxesSubplot:ylabel='Density'>

### Scatter or Point Plots

In [220]:
macro = pd.read_csv('examples/macrodata.csv')
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]

# compute log differences:
trans_data = np.log(data).diff().dropna()
trans_data[-5:]

Unnamed: 0,cpi,m1,tbilrate,unemp
198,-0.007904,0.045361,-0.396881,0.105361
199,-0.021979,0.066753,-2.277267,0.139762
200,0.00234,0.010286,0.606136,0.160343
201,0.008419,0.037461,-0.200671,0.127339
202,0.008894,0.012202,-0.405465,0.04256


In [221]:
plt.figure()

# regplot method, which makes a scatter plot and fits a linear regression line
sns.regplot('m1', 'unemp', data=trans_data)
plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))

<IPython.core.display.Javascript object>



Text(0.5, 1.0, 'Changes in log m1 versus log unemp')

In [222]:
# known as a pairs plot or scatter plot matrix
# diag_kind : {'auto', 'hist', 'kde'}
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha': 0.2})

<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x2038473c6a0>

### Facet Grids and Categorical Data

In [223]:
sns.factorplot(x='day', y='tip_pct', hue='time', col='smoker',
               kind='bar', data=tips[tips.tip_pct < 1])



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x20383203ca0>

In [224]:
sns.factorplot(x='day', y='tip_pct', row='time',
               col='smoker',
               kind='bar', data=tips[tips.tip_pct < 1])



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x20384e7a0d0>

In [225]:
sns.factorplot(x='tip_pct', y='day', kind='box',
               data=tips[tips.tip_pct < 0.5])



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x203ee0db7c0>