### Plot express 사용한 시각화
* cufflinks 보다 좀더 다양하며 사용방법은 seaborn과 비슷함
* plotly_express이용. plotly 4.1부터는 별도의 설치 없어도 됨.

In [39]:
import pandas as pd
import numpy as np
import seaborn as sns

import plotly
import cufflinks as cf

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
cf.go_offline()

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [40]:
print(sys.__version__)
print(plotly.__version__)
print(pd.__version__)

NameError: name 'sys' is not defined

#### iris 데이터 셋

In [41]:
print(px.data.iris.__doc__)


    Each row represents a flower.

    https://en.wikipedia.org/wiki/Iris_flower_data_set

    Returns:
        A `pandas.DataFrame` with 150 rows and the following columns:
        `['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species', 'species_id']`.


In [42]:
# iris데이터 불러오기
iris = px.data.iris()
print(iris.shape)
iris.head()

(150, 6)


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,species_id
0,5.1,3.5,1.4,0.2,setosa,1
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,1
3,4.6,3.1,1.5,0.2,setosa,1
4,5.0,3.6,1.4,0.2,setosa,1


In [44]:
# 시각화
fig = px.scatter(iris, x='sepal_width', y='sepal_length')
fig.show()

In [45]:
fig_color = px.scatter(iris, x='sepal_width', y='sepal_length', color = 'species')
fig_color.show()

In [46]:
fig_margin = px.scatter(iris, x = 'sepal_width', y = 'sepal_length',
                        color      = 'species', # 컬럼명 species의 값에 따라 다른 색으로 표시
                        marginal_x = 'box',
                        marginal_y = 'violin',
                        trendline  = 'ols'
                       )
fig_margin.show()

In [47]:
# 산점도 행렬
fig_mat = px.scatter_matrix(iris, dimensions=['sepal_width','sepal_length',
                                            'petal_width','petal_length'],
                                            color = 'species')
fig_mat.show()

### 다차원 범주형 데이터 시각화
* 데이터 집합의 각 변수는 직사각형 열로 표시
* 직사각형은 해당 변수에 의해 취해지는 이산형 값에 해당

In [49]:
tips = px.data.tips()
tips['size'].unique()

array([2, 3, 4, 1, 6, 5])

In [50]:
tips.columns

Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size'], dtype='object')

In [51]:
fig_tips = px.parallel_categories(tips)
fig_tips.show()

In [52]:
fig_tips_color = px.parallel_categories(tips, color='size')
fig_tips_color.show()

### gapminder 데이터 셋 시각화
* gapminder 데이터 셋?
   * 국가별 경제 수준과 의료 동향 수준을 정리한 DataSet이다.

#### 1인당 국민소득과 기대수명은 어떤 관계를 가질까

In [55]:
gapminder = px.data.gapminder()
gapminder.describe()

Unnamed: 0,year,lifeExp,pop,gdpPercap,iso_num
count,1704.0,1704.0,1704.0,1704.0,1704.0
mean,1979.5,59.474439,29601210.0,7215.327081,425.880282
std,17.26533,12.917107,106157900.0,9857.454543,248.305709
min,1952.0,23.599,60011.0,241.165876,4.0
25%,1965.75,48.198,2793664.0,1202.060309,208.0
50%,1979.5,60.7125,7023596.0,3531.846989,410.0
75%,1993.25,70.8455,19585220.0,9325.462346,638.0
max,2007.0,82.603,1318683000.0,113523.1329,894.0


In [26]:
gapminder = px.data.gapminder()

fig = px.scatter(gapminder, x='gdpPercap', y='lifeExp')
fig.show()

In [27]:
gapminder = px.data.gapminder()

fig = px.scatter(gapminder, x='gdpPercap', y='lifeExp', trendline='ols', 
                 marginal_y='violin',
                 marginal_x='box')
fig.show()  

In [28]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [29]:
fig = make_subplots(rows=2, cols=2, shared_yaxes=True)

In [30]:
gap_1952 = gapminder.query("year==1952")
gap_2007 = gapminder.query("year==2007")

gap_1952_line = go.Scatter(x=gap_1952["gdpPercap"], 
                           y=gapminder["lifeExp"], 
                           mode='markers')

gap_2007_line = go.Scatter(x=gap_2007["gdpPercap"], 
                           y=gapminder["lifeExp"], 
                           mode='markers')

fig.add_trace(gap_1952_line, row=1, col=1)
fig.add_trace(gap_2007_line, row=2, col=1)

In [31]:
layout = {
    "xaxis": {
        "title": "1인당 국민소득",
    },
    "yaxis": {
        "title": "기대수명"
    },
    
    "xaxis2": {
        "title": "1인당 국민소득",
    },
    "yaxis2": {
        "title": "기대수명"
    }
}

fig.update_layout(layout)

In [32]:
gapminder = px.data.gapminder()
fig = px.scatter(gapminder.query("year==2007"),
         x="gdpPercap",
         y="lifeExp",
         size="pop",
         color="continent",     # 색 구분 : 대륙
         hover_name="country",  # 마우스 클릭 나라명
         log_x=True, size_max=60)

fig.show()

In [33]:
import plotly.express as px
px.defaults.width = 700
px.defaults.height = 600

fig = px.scatter(gapminder,
         animation_frame="year",
         animation_group="country",
         x="gdpPercap",
         y="lifeExp",
         size="pop",
         color="continent",
         hover_name="country", 
         facet_col="continent",
         log_x=True, size_max=60)

fig

In [34]:
fig = px.scatter(gapminder, x="gdpPercap", y="lifeExp",
         animation_frame="year",
         animation_group="country",
         size="pop",
         color="continent",
         hover_name="country",
         facet_col="continent",
         log_x=True, size_max=45, range_x=[100,100000], range_y=[20,90])
fig.show()

In [35]:
import plotly.express as px
tips = px.data.tips()
fig = px.bar(tips, x="sex", y="total_bill", color="smoker", barmode="group")
fig.show()

In [36]:
df = px.data.election()
print(df.shape)
print(df.head())
print(df.columns)
print(px.data.election.__doc__)

(58, 8)
                district  Coderre  Bergeron  Joly  total    winner     result  \
0     101-Bois-de-Liesse     2481      1829  3024   7334      Joly  plurality   
1  102-Cap-Saint-Jacques     2525      1163  2675   6363      Joly  plurality   
2   11-Sault-au-Récollet     3348      2770  2532   8650   Coderre  plurality   
3           111-Mile-End     1734      4782  2514   9030  Bergeron   majority   
4         112-DeLorimier     1770      5933  3044  10747  Bergeron   majority   

   district_id  
0          101  
1          102  
2           11  
3          111  
4          112  
Index(['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result',
       'district_id'],
      dtype='object')

    Each row represents voting results for an electoral district in the 2013 Montreal
    mayoral election.

    Returns:
        A `pandas.DataFrame` with 58 rows and the following columns:
        `['district', 'Coderre', 'Bergeron', 'Joly', 'total', 'winner', 'result', 'dist

In [37]:
import plotly.express as px
election = px.data.election()
fig = px.line_3d(election, 
                 x="Joly", y="Coderre", z="Bergeron", 
                 color="winner", line_dash="winner")
fig.show()

In [38]:
fig = px.scatter_geo(gapminder,
             locations="iso_alpha",
             color="continent",
             hover_name="country",
             size="pop",
            animation_frame="year", projection="natural earth")
fig.show()