# Plotting and Visualization

In [None]:
from __future__ import division
from numpy.random import randn
import numpy as np
import os
import matplotlib.pyplot as plt
np.random.seed(12345)
plt.rc('figure', figsize=(10, 6))
from pandas import Series, DataFrame
import pandas as pd
np.set_printoptions(precision=4)

In [None]:
%matplotlib inline

In [None]:
%pwd

## A brief matplotlib API primer

In [None]:
import matplotlib.pyplot as plt

### Figures and Subplots

In [None]:
fig = plt.figure()

In [None]:
ax1 = fig.add_subplot(2, 2, 1)

In [None]:
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)

In [None]:
from numpy.random import randn
plt.plot(randn(50).cumsum(), 'k--')

In [None]:
_ = ax1.hist(randn(100), bins=20, color='k', alpha=0.3)
ax2.scatter(np.arange(30), np.arange(30) + 3 * randn(30))

In [None]:
plt.close('all')

In [None]:
fig, axes = plt.subplots(2, 3)
axes

#### Adjusting the spacing around subplots

In [None]:
plt.subplots_adjust(left=None, bottom=None, right=None, top=None,
                wspace=None, hspace=None)

In [None]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0, hspace=0)

In [None]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0, hspace=0)

### Colors, markers, and line styles

In [None]:
plt.figure()

In [None]:
plt.plot(randn(30).cumsum(), 'ko--')

In [None]:
plt.close('all')

In [None]:
data = randn(30).cumsum()
plt.plot(data, 'k--', label='Default')
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')
plt.legend(loc='best')

### Ticks, labels, and legends

#### Setting the title, axis labels, ticks, and ticklabels

In [None]:
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum())

ticks = ax.set_xticks([0, 250, 500, 750, 1000])
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],
                            rotation=30, fontsize='small')
ax.set_title('My first matplotlib plot')
ax.set_xlabel('Stages')

#### Adding legends

In [None]:
fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)
ax.plot(randn(1000).cumsum(), 'k', label='one')
ax.plot(randn(1000).cumsum(), 'k--', label='two')
ax.plot(randn(1000).cumsum(), 'k.', label='three')

ax.legend(loc='best')

### Annotations and drawing on a subplot

In [None]:
from datetime import datetime

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

data = pd.read_csv('spx.csv', index_col=0, parse_dates=True)
spx = data['SPX']

spx.plot(ax=ax, style='k-')

crisis_data = [
    (datetime(2007, 10, 11), 'Peak of bull market'),
    (datetime(2008, 3, 12), 'Bear Stearns Fails'),
    (datetime(2008, 9, 15), 'Lehman Bankruptcy')
]

for date, label in crisis_data:
    ax.annotate(label, xy=(date, spx.asof(date) + 50),
                xytext=(date, spx.asof(date) + 200),
                arrowprops=dict(facecolor='black'),
                horizontalalignment='left', verticalalignment='top')

# Zoom in on 2007-2010
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])

ax.set_title('Important dates in 2008-2009 financial crisis')

In [None]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],
                   color='g', alpha=0.5)

ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

### Saving plots to file

In [None]:
fig

In [None]:
fig.savefig('figpath.svg')

In [None]:
fig.savefig('figpath.png', dpi=400, bbox_inches='tight')

In [None]:
from io import BytesIO
buffer = BytesIO()
plt.savefig(buffer)
plot_data = buffer.getvalue()

### matplotlib configuration

In [None]:
plt.rc('figure', figsize=(10, 10))

## Plotting functions in pandas

### Line plots

In [None]:
plt.close('all')

In [None]:
s = Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
s.plot()

In [None]:
df = DataFrame(np.random.randn(10, 4).cumsum(0),
               columns=['A', 'B', 'C', 'D'],
               index=np.arange(0, 100, 10))
df.plot()

### Bar plots

In [None]:
fig, axes = plt.subplots(2, 1)
data = Series(np.random.rand(16), index=list('abcdefghijklmnop'))
data.plot(kind='bar', ax=axes[0], color='k', alpha=0.7)
data.plot(kind='barh', ax=axes[1], color='k', alpha=0.7)

In [None]:
df = DataFrame(np.random.rand(6, 4),
               index=['one', 'two', 'three', 'four', 'five', 'six'],
               columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
df
df.plot(kind='bar')

In [None]:
plt.figure()

In [None]:
df.plot(kind='barh', stacked=True, alpha=0.5)

### Histograms and density plots

In [None]:
plt.figure()

In [None]:
tips = pd.read_csv('tips.csv')
tips['tip_pct'] = tips['tip'] / tips['total_bill']
tips['tip_pct'].hist(bins=50)

In [None]:
plt.figure()

In [None]:
tips['tip_pct'].plot(kind='kde')

In [None]:
plt.figure()

In [None]:
comp1 = np.random.normal(0, 1, size=200)  # N(0, 1)
comp2 = np.random.normal(10, 2, size=200)  # N(10, 4)
values = Series(np.concatenate([comp1, comp2]))
values.hist(bins=100, alpha=0.3, color='k', density=True)
values.plot(kind='kde', style='r--')

### Scatter plots

In [None]:
macro = pd.read_csv('macrodata.csv')
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
trans_data = np.log(data).diff().dropna()
trans_data[-5:]

In [None]:
plt.scatter(macro['m1'], macro['unemp'])
plt.title('Changes in log %s vs. log %s' % ('m1', 'unemp'))

In [None]:
plt.scatter(trans_data['m1'], trans_data['unemp'])
plt.title('Changes in log %s vs. log %s' % ('m1', 'unemp'))

In [None]:
pd.plotting.scatter_matrix(trans_data, diagonal='kde', color='k', alpha=0.3, figsize=(8,8))

In [None]:
df = pd.DataFrame(np.random.rand(10,4), columns=['a','b','c','d'])
df.plot.area()

In [None]:
df.plot.area(stacked=False)

In [None]:
macro=pd.read_csv("macrodata.csv")
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
data.plot.hexbin(x='m1',y='unemp',gridsize=15,figsize=(8,8))

In [None]:
data.plot.hexbin(x='m1',y='unemp',C='cpi',gridsize=15,figsize=(8,8))

In [None]:
series = pd.Series(3*np.random.rand(4), index=['a','b','c','d'], name='series')
series.plot.pie(figsize=(6,6))

In [None]:
df = pd.DataFrame(3*np.random.rand(4,2), index=['a','b','c','d'], columns=['x','y'])
df.plot.pie(subplots=True, figsize=(8,4))

In [None]:
series.plot.pie(labels=['AA','BB','CC','DD'],
                        colors=['r','g','b','c'],
                        autopct='%.2f', fontsize=20, figsize=(6,6))

In [None]:
series=pd.Series([0.1]*4, index=['a','b','c','d'],name='series2')
series.plot.pie(figsize=(6,6))

In [None]:
from pandas.plotting import parallel_coordinates
data = pd.read_csv('iris.csv')
data.head()

In [None]:
parallel_coordinates(data,'Name')