# 데이터 시각화

- Matplotlib으로 Pandas로 분석된 결과를 시각화
- line plot, bar plot, histogram, 산점도가 주로 사용됨

## 선 그래프
- 특정한 변수(독립변수) X가 변함에 따라 종속변수 Y가 함께 변화 하는데 사용

In [1]:
# 모듈 불러오기  nbagg : plot의 변경 가능
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib nbagg

In [2]:
# Series에 대한 line plot
s = pd.Series(np.random.randn(10).cumsum(), index = np.arange(0,100,10))
s

0    -0.227827
10   -0.383714
20    0.792916
30   -0.379770
40   -0.750899
50   -0.270916
60   -2.166485
70   -0.731587
80   -2.097287
90   -1.874708
dtype: float64

In [38]:
s.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1192fb9d0>

In [43]:
# dataframe에 대한 line plot

df = pd.DataFrame(np.random.randn(10,4).cumsum(axis=0), columns = ['A', 'B', 'C', 'D'], index=np.arange(0,100,10))

In [44]:
df.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x11963deb0>

In [45]:
df['B'].plot()

<matplotlib.axes._subplots.AxesSubplot at 0x119558220>

In [46]:
s2 = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))
s2

a    0.975427
b    0.795619
c    0.835191
d    0.968799
e    0.745725
f    0.033783
g    0.727095
h    0.091307
i    0.075855
j    0.392117
k    0.491288
l    0.672192
m    0.588830
n    0.834817
o    0.286639
p    0.176985
dtype: float64

In [50]:
s2.plot(kind='bar')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1197eedf0>

In [51]:
# 가로 막대 그래프
s2.plot(kind="barh")

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x11d11c2b0>

In [23]:
# dataframe에 대한 막대 그래프


df2 = pd.DataFrame(np.random.rand(6,4),
                   index=['one', 'two', 'three', 'four', 'five', 'six'],
                   columns = pd.Index(['A', 'B', 'C', 'D'], name ='BoxRing'))
df2

BoxRing,A,B,C,D
one,0.473981,0.78819,0.678297,0.799126
two,0.150106,0.035162,0.57629,0.608035
three,0.644129,0.376533,0.550046,0.862406
four,0.520333,0.918848,0.996545,0.439274
five,0.466755,0.860341,0.210761,0.075226
six,0.46176,0.445267,0.960182,0.060723


In [52]:
df2.plot(kind='bar')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x11dba87c0>

In [53]:
df2.plot(kind='barh')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x11e0e3b50>

In [54]:
#가로 누적 막대 그래프
df2.plot(kind='bar', stacked=True)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x11e6a7700>

### 히스토그램

In [30]:
s3 = pd.Series(np.random.normal(0,1,size = 200))
s3

0     -0.925481
1      0.754606
2      0.633663
3      0.075529
4     -0.646226
         ...   
195    0.331938
196   -0.097694
197   -1.150791
198   -0.828315
199    1.157734
Length: 200, dtype: float64

In [55]:
s3.hist()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x11e7a34f0>

### 산점도

In [34]:
x1 = np.random.normal(1,1,size=(100,1)) #평균 = 1, 표준편차=1, 100행 1열
x2 = np.random.normal(-2,4,size=(100,1))
x = np.concatenate((x1,x2), axis=1)
x[:5, :]

array([[ 2.02372505, -1.85610507],
       [ 0.80625711,  3.24227166],
       [ 0.36789966,  3.99663616],
       [ 2.61018548,  1.90141477],
       [ 1.16285746, -5.66930162]])

In [56]:
df3 = pd.DataFrame(x, columns=['x1', 'x2'])
df3

Unnamed: 0,x1,x2
0,2.023725,-1.856105
1,0.806257,3.242272
2,0.367900,3.996636
3,2.610185,1.901415
4,1.162857,-5.669302
...,...,...
95,0.275606,-0.032659
96,1.367716,-2.086926
97,1.873162,3.503374
98,-0.075836,-7.694530


In [57]:
plt.scatter(df3['x1'], df3['x2'])

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x1196bf9d0>