# Plotting and Visualization

## Description

### GitHub連結

[GitHub連結](https://github.com/wesm/pydata-book)

### Required Dataset

[SPX](https://raw.githubusercontent.com/wesm/pydata-book/2nd-edition/examples/spx.csv)  
[tips](https://raw.githubusercontent.com/wesm/pydata-book/2nd-edition/examples/tips.csv)  
[macro](https://raw.githubusercontent.com/wesm/pydata-book/2nd-edition/examples/macrodata.csv)

## Required Packages

In [1]:
# If you are using Jupyter Notebook, please comment in below Python code.
%matplotlib notebook

In [2]:
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
from datetime import datetime
from io import BytesIO

In [3]:
np.random.seed(12345)

## 9.1 A Brief matplotlib API Primer

In [4]:
data = np.arange(10)
data

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [5]:
plt.plot(data)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x161cbca30>]

### Figures and Subplots

In [6]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [7]:
ax1 = fig.add_subplot(2, 2, 1)

In [8]:
ax2 = fig.add_subplot(2, 2, 2)

In [9]:
ax3 = fig.add_subplot(2, 2, 3)

In [10]:
plt.plot(np.random.randn(50).cumsum(), 'k--')

[<matplotlib.lines.Line2D at 0x161d82bb0>]

In [11]:
_ = ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)

In [12]:
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))

<matplotlib.collections.PathCollection at 0x161dc14c0>

In [13]:
fig, axes = plt.subplots(2, 3)

<IPython.core.display.Javascript object>

In [14]:
axes

array([[<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>],
       [<AxesSubplot:>, <AxesSubplot:>, <AxesSubplot:>]], dtype=object)

#### Adjusting the spacing around subplots

In [15]:
# subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=None)

In [16]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)
plt.subplots_adjust(wspace=0, hspace=0)

<IPython.core.display.Javascript object>

### Colors, Markers, and Line Styles

In [17]:
# ax.plot(x, y, 'g--')
# ax.plot(x, y, linestyle='==', color='g')

In [18]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [19]:
plt.plot(np.random.randn(30).cumsum(), 'ko--')

[<matplotlib.lines.Line2D at 0x1621d38e0>]

In [20]:
plt.plot(np.random.randn(30).cumsum(), color='k', linestyle='dashed', marker='o')

[<matplotlib.lines.Line2D at 0x1621e2220>]

In [21]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [22]:
data = np.random.randn(30).cumsum()

In [23]:
plt.plot(data, 'k--', label='Default')

[<matplotlib.lines.Line2D at 0x162218f10>]

In [24]:
plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')

[<matplotlib.lines.Line2D at 0x162224340>]

In [25]:
plt.legend(loc='best')

<matplotlib.legend.Legend at 0x162224790>

### Ticks, Labels, and Legends

#### Setting the title, axis labels, ticks, and ticklabels

In [26]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [27]:
ax = fig.add_subplot(1, 1, 1)

In [28]:
ax.plot(np.random.randn(1000).cumsum())

[<matplotlib.lines.Line2D at 0x162277c40>]

In [29]:
ticks = ax.set_xticks([0, 250, 500, 750, 1000])

In [30]:
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'], rotation=30, fontsize='small')

In [31]:
ax.set_title('My first matplotlib plot')

Text(0.5, 1.0, 'My first matplotlib plot')

In [32]:
ax.set_xlabel('Stages')

Text(0.5, 0, 'Stages')

In [33]:
props = {
    'title': 'My first matplotlib plot',
    'xlabel': 'Stages'
}
ax.set(**props)

[Text(0.5, 1.0, 'My first matplotlib plot'), Text(0.5, 0, 'Stages')]

#### Adding legends

In [34]:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

<IPython.core.display.Javascript object>

In [35]:
ax.plot(np.random.randn(1000).cumsum(), 'k', label='one')

[<matplotlib.lines.Line2D at 0x1622a93d0>]

In [36]:
ax.plot(np.random.randn(1000).cumsum(), 'k--', label='two')

[<matplotlib.lines.Line2D at 0x1622e5580>]

In [37]:
ax.plot(np.random.randn(1000).cumsum(), 'k.', label='three')

[<matplotlib.lines.Line2D at 0x1622e5a60>]

In [38]:
ax.legend(loc='best')

<matplotlib.legend.Legend at 0x1622e58b0>

### Annotations and Drawing on a Subplot

In [39]:
# ax.text(x, y, 'Hello, world!', family='monospace', fontsize=10)

In [40]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [41]:
ax = fig.add_subplot(1, 1, 1)

In [42]:
data = pd.read_csv('src/csv/SPX.csv', index_col=0, parse_dates=True)
spx = data['SPX']

In [43]:
spx.plot(ax=ax, style='k-')

<AxesSubplot:xlabel='Date'>

In [44]:
crisis_data = [
    (datetime(2007, 10, 11), 'Peak of bull market'),
    (datetime(2008, 3, 12), 'Bear Stearns Fails'),
    (datetime(2008, 9, 15), 'Lehman Bankruptcy')
]

In [45]:
for date, label in crisis_data:
    ax.annotate(
        label, 
        xy=(date, spx.asof(date) + 75),
        xytext=(date, spx.asof(date) + 225),
        arrowprops=dict(facecolor='black', headwidth=4, width=2, headlength= 4),
        horizontalalignment='left',
        verticalalignment='top'
    )

In [46]:
# Zoom in on 2007-2010
ax.set_xlim(['1/1/2007', '1/1/2011'])
ax.set_ylim([600, 1800])
ax.set_title('Important dates in the 2008-2009 financial crisis')

Text(0.5, 1.0, 'Important dates in the 2008-2009 financial crisis')

In [47]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [48]:
ax = fig.add_subplot(1, 1, 1)

In [49]:
rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)
circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)
pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]], color='g', alpha=0.5)

In [50]:
ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

<matplotlib.patches.Polygon at 0x1624d2250>

### Saving Plots to File

In [51]:
# plt.savefig('figpath.svg')
# plt.savefig('figpath.svg', dpi=400, bbox_inches='tight')

In [52]:
"""
buffer = BytesIO()
plt.savefig(buffer)
plot_data = buffer.getvalue()
"""

'\nbuffer = BytesIO()\nplt.savefig(buffer)\nplot_data = buffer.getvalue()\n'

### matplotlib Configuration

In [53]:
# plt.rc('figure', figsize=(10, 10))

In [54]:
"""
font_options = {
    'family': 'monospace',
    'weight': 'bold',
    'size': 'small',
}

plt.rc('font', **font_options)
"""

"\nfont_options = {\n    'family': 'monospace',\n    'weight': 'bold',\n    'size': 'small',\n}\n\nplt.rc('font', **font_options)\n"

## 9.2 Plotting with pandas and seaborn

### Line Plots

In [55]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [56]:
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))

In [57]:
s.plot()

<AxesSubplot:>

In [58]:
df = pd.DataFrame(
    np.random.randn(10, 4).cumsum(0),
    columns=['A', 'B', 'C', 'D'],
    index=np.arange(0, 100, 10)
)

In [59]:
df.plot()

<IPython.core.display.Javascript object>

<AxesSubplot:>

##### Series.plot method arguments

| Argument | Description |
| - | - |
| label | Label for plot legend |
| ax | matplotlib subplot object to plot on; if nothing passed, uses active matplotlib subplot |
| style | Style string, like 'ko--', to be passed to matplotlib |
| alpha | The plot fill opacity (from 0 to 1) |
| kind | Can be 'area', 'bar', 'barh', 'density', 'hist', 'kde', 'line', 'pie' |
| logy | Use logarithmic scaling on the y-axis |
| use_index | Use the object index for tick labels |
| rot | Rotation of tick labels (0 through 360) |
| xticks | Values to use for x-axis ticks |
| yticks | Values to use for y-axis ticks |
| xlim | x-axis limits (e.g., [0, 10]) |
| ylim | y-axis limits (e.g., [0, 10]) |
| grid | Display axis grid (on by default) |

##### DataFrame-specific plot arguments

| Argument | Description |
| - | - |
| subplots | Plot each DataFrame column in a separate subplot |
| sharex | If subplots=True, share the same x-axis, linking ticks and limits |
| sharey | Ifsubplots=True, share the same y-axis |
| figsize | Size of figure to create as tuple |
| title | Plot title as string |
| legend | Add a subplot legend (Trueby default) |
| sort_columns | Plot columns in alphabetical order; by default uses existing column order |

### Bar Plots

In [60]:
fig, axes = plt.subplots(2, 1)

<IPython.core.display.Javascript object>

In [61]:
data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))

In [62]:
data.plot.bar(ax=axes[0], color='k', alpha=0.7)
data.plot.barh(ax=axes[1], color='k', alpha=0.7)

<AxesSubplot:>

In [63]:
df = pd.DataFrame(
    np.random.rand(6, 4),
    index=['one', 'two', 'three', 'four', 'five', 'six'],
    columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus')
)
df

Genus,A,B,C,D
one,0.815522,0.984996,0.789212,0.984949
two,0.359175,0.264507,0.373024,0.254887
three,0.950684,0.696543,0.462334,0.902073
four,0.693608,0.028276,0.981006,0.988238
five,0.283653,0.95279,0.391232,0.160211
six,0.524785,0.214279,0.6893,0.820608


In [64]:
df.plot.bar()

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [65]:
df.plot.barh(stacked=True, alpha=0.5)

<IPython.core.display.Javascript object>

<AxesSubplot:>

In [66]:
tips = pd.read_csv('src/csv/tips.csv')

In [67]:
party_counts = pd.crosstab(tips['day'], tips['size'])
party_counts

size,1,2,3,4,5,6
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fri,1,16,1,1,0,0
Sat,2,53,18,13,1,0
Sun,0,39,15,18,3,1
Thur,1,48,4,5,1,3


In [68]:
party_counts = party_counts.loc[:, 2:5]

In [69]:
party_pcts = party_counts.div(party_counts.sum(1), axis=0)
party_pcts

size,2,3,4,5
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fri,0.888889,0.055556,0.055556,0.0
Sat,0.623529,0.211765,0.152941,0.011765
Sun,0.52,0.2,0.24,0.04
Thur,0.827586,0.068966,0.086207,0.017241


In [70]:
party_pcts.plot.bar()

<IPython.core.display.Javascript object>

<AxesSubplot:xlabel='day'>

In [71]:
tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])
tips.head()

Unnamed: 0,total_bill,tip,smoker,day,time,size,tip_pct
0,16.99,1.01,No,Sun,Dinner,2,0.063204
1,10.34,1.66,No,Sun,Dinner,3,0.191244
2,21.01,3.5,No,Sun,Dinner,3,0.199886
3,23.68,3.31,No,Sun,Dinner,2,0.162494
4,24.59,3.61,No,Sun,Dinner,4,0.172069


In [72]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [73]:
sns.barplot(x='tip_pct', y='day', data=tips, orient='h')

<AxesSubplot:xlabel='tip_pct', ylabel='day'>

In [74]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [75]:
sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')

<AxesSubplot:xlabel='tip_pct', ylabel='day'>

In [76]:
sns.set(style="whitegrid")

### Histograms and Desity Plots

In [77]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [78]:
tips['tip_pct'].plot.hist(bins=50)

<AxesSubplot:ylabel='Frequency'>

In [79]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [80]:
tips['tip_pct'].plot.density()

<AxesSubplot:ylabel='Density'>

In [81]:
fig = plt.figure()

  fig = plt.figure()


<IPython.core.display.Javascript object>

In [82]:
comp1 = np.random.normal(0, 1, size=200)
comp2 = np.random.normal(10, 2, size=200)
values = pd.Series(np.concatenate([comp1, comp2]))

In [83]:
sns.distplot(values, bins=100, color='k')



<AxesSubplot:ylabel='Density'>

### Scatter or Point Plots

In [84]:
macro = pd.read_csv('src/csv/macrodata.csv')

In [85]:
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]

In [86]:
trans_data = np.log(data).diff().dropna()

In [87]:
trans_data[-5:]

Unnamed: 0,cpi,m1,tbilrate,unemp
198,-0.007904,0.045361,-0.396881,0.105361
199,-0.021979,0.066753,-2.277267,0.139762
200,0.00234,0.010286,0.606136,0.160343
201,0.008419,0.037461,-0.200671,0.127339
202,0.008894,0.012202,-0.405465,0.04256


In [88]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [89]:
sns.regplot('m1', 'unemp', data=trans_data)



<AxesSubplot:xlabel='m1', ylabel='unemp'>

In [90]:
plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))

Text(0.5, 1.0, 'Changes in log m1 versus log unemp')

In [91]:
sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha': 0.2})

<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x162c2a970>

In [93]:
sns.factorplot(
    x='day',
    y='tip_pct',
    hue='time',
    col='smoker',
    kind='bar',
    data=tips[tips.tip_pct < 1]
)

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x16338e5e0>

In [94]:
sns.factorplot(
    x='day',
    y='tip_pct',
    row='time',
    col='smoker',
    kind='bar',
    data=tips[tips.tip_pct < 1]
)



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x16341af70>

In [95]:
sns.factorplot(
    x='tip_pct',
    y='day',
    kind='box',
    data=tips[tips.tip_pct < 0.5]
)



<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x163935040>

## 9.3 Other Python Visualization Tools

[BoKeh](https://docs.bokeh.org/en/latest/)  
[Plotly](https://github.com/plotly/plotly.py)