# Chapter 9. Plotting and Visualization

In [2]:
%matplotlib notebook

In [3]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

In [97]:
# 簡単な線形グラフ
data=np.arange(10)
plt.plot(data)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x119490cf8>]

In [90]:
# figureを使うことで、グラフのプロットのレイアウトを操作できる
fig=plt.figure()

ax1=fig.add_subplot(2,2,1)
ax2=fig.add_subplot(2,2,2)
ax3=fig.add_subplot(2,2,3)

# 細かく分割したfigそれぞれにplot
ax1.plot(np.random.randn(50).cumsum(),'k--')
ax2.hist(np.random.randn(50),bins=20,color='k',alpha=0.3)
ax3.scatter(np.arange(30),np.arange(30)+3*np.random.randn(30))

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x118ad9400>

datetime.datetime(2018, 4, 27, 20, 34, 55, 473633)

In [98]:
# axesでnumpyのndarrayのようにfigを指定可能

fig,axes=plt.subplots(2,2,sharex=True,sharey=True)
for i in range(2):
    for j in range(2):
         axes[i,j].hist(np.random.rand(500),bins=50,color='k',alpha=0.5)
# subplot_adjustで空白スペースをzeroに
plt.subplots_adjust(wspace=0,hspace=0)

<IPython.core.display.Javascript object>

datetime.datetime(2018, 4, 25, 11, 12, 37, 534152)

In [92]:
data=np.random.randn(50).cumsum()
# 'ko-はlinestyle='-',color='k',maker='o'と同じ意味
plt.plot(data,'k-',label='Default')
plt.plot(data,'k--',drawstyle='steps-post',label='steps-post')
# __no_legend__と指定すると、ラベルが表示されない
plt.plot(data,'k.',drawstyle='steps-post',label='__no_lengend__')
plt.legend(loc='best')
plt.show()

In [99]:
# rcでグローバル設定を変更可能
# font_options={'family':'monospace','weight':'bold'}
# size={'figsize':(10,10)}
# plt.rc('font',**font_options)
# plt.rc('figure',figsize=(3,3))

data2=np.random.randn(500).cumsum()
plt.plot(data2,'k-',label='Default')
plt.plot(data2,'k--',drawstyle='steps-post',label='steps-post')
plt.legend(loc='best')
plt.show()

In [6]:
from datetime import datetime


fig=plt.figure()
ax=fig.add_subplot(1,1,1)

# 日付をindexとして読み込む
data=pd.read_csv('./examples/spx.csv',index_col=0,parse_dates=True)
spx=data['SPX']
spx.plot(ax=ax,style='ko-')
# plotデータに注釈を追加
crisis_points=[
    (datetime(2007,10,11),'PointA'),
    (datetime(2008,3,12),'PointB'),
    (datetime(2008,9,13),'PointC')
]

for date,label in crisis_points:
    ax.annotate(label,xy=(date,spx.asof(date)+75),
    xytext=(date,spx.asof(date)+225),
    arrowprops=dict(facecolor='black',headwidth=6,width=2,headlength=4),
    ) 
    
ax.set_xlim(['1/1/2007','1/1/2009'])
ax.set_ylim([600,1800])
spx.plot(ax=ax,style='k-')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x113e70278>

In [4]:
# pandsのオブジェクトをそのままplot
data=pd.Series(np.arange(0,10),index=np.arange(0,100,10))
data.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x112bd0b70>

In [15]:
# 一つのfigにplot
df=pd.DataFrame(np.random.randn(1000,4).cumsum(0),
                columns=list('ABCD'),index=np.arange(0,10000,10))

df.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x11437bc18>

In [18]:
# figを分けてplot
df.plot(subplots=True,sharex=True)

<IPython.core.display.Javascript object>

array([<matplotlib.axes._subplots.AxesSubplot object at 0x114b29d68>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x114e97ac8>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x114ebeef0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x114eed4a8>],
      dtype=object)

In [45]:
# 垂直グラフと水平グラフ
fig,axes=plt.subplots(2,1)
data=pd.Series(np.random.rand(16),index=list('abcdefghijklmnop'))
data.plot.bar(ax=axes[0])
data.plot.barh(ax=axes[1])

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x117c81400>

In [42]:
# 頻度分布
fig=plt.figure()
ax=fig.add_subplot()
se=pd.Series(np.random.randint(0,10,1000))
se.value_counts().plot.bar(ax=ax)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x115a3c048>

In [49]:
# クロス集計の結果を可視化
tips=pd.read_csv('examples/tips.csv')
party_counts=pd.crosstab(tips['day'],tips['size'])
party_counts

size,1,2,3,4,5,6
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fri,1,16,1,1,0,0
Sat,2,53,18,13,1,0
Sun,0,39,15,18,3,1
Thur,1,48,4,5,1,3


In [86]:
party_pcts=party_counts.div(party_counts.sum(axis=1),axis=0)
party_pcts.cumsum(1) # 1.0になる
party_pcts.plot.bar(stacked=True)


<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x11bf823c8>

In [90]:
# seabornライブラリで可視化
import seaborn as sns

tips['tip_pct']=tips['tip']/(tips['total_bill']-tips['tip'])
tips.head()
tips.describe()

Unnamed: 0,total_bill,tip,size,tips_pict,tips_pct,tip_pct
count,244.0,244.0,244.0,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672,0.202123,0.202123,0.202123
std,8.902412,1.383638,0.9511,0.163385,0.163385,0.163385
min,3.07,1.0,1.0,0.036955,0.036955,0.036955
25%,13.3475,2.0,2.0,0.148274,0.148274,0.148274
50%,17.795,2.9,2.0,0.18311,0.18311,0.18311
75%,24.1275,3.5625,3.0,0.236821,0.236821,0.236821
max,50.81,10.0,6.0,2.452381,2.452381,2.452381


In [95]:
fig=plt.figure()
ax=fig.add_subplot()
sns.barplot(ax=ax,x='tip_pct',y='day',hue='time',data=tips,orient='h')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x11ca99198>

In [100]:
# 密度分布
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
comp1=np.random.normal(0,1,1000)
comp2=np.random.normal(10,2,1000)
comp=np.concatenate([comp1,comp2])
values=pd.Series(comp)
sns.distplot(ax=ax,a=value,bins=100)

<IPython.core.display.Javascript object>



<matplotlib.axes._subplots.AxesSubplot at 0x11d4c66d8>

In [106]:
# 散布図で関係性を考える
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
macro=pd.read_csv('examples/macrodata.csv')
data=macro[['cpi','m1','tbilrate','unemp']]
trans_data=np.log(data).diff().dropna()
sns.regplot(ax=ax,x='m1',y='unemp',data=trans_data)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x11d682dd8>

In [108]:
# すべてのcolumとの散布図とヒストグラム
sns.pairplot(data=trans_data,diag_kind='kde')

<IPython.core.display.Javascript object>

<seaborn.axisgrid.PairGrid at 0x11e6d7c88>

In [114]:
# データをカテゴライズする
sns.factorplot(x='day',y='tip_pct',row='time',col='smoker',kind='bar',data=tips)

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x11feb5128>