## 7.3 plotly

reference: https://plotly.com/python/

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

### Bubble Charts
**px**

```ptyhon
df = px.data.gapminder()

fig = px.scatter(
  df.query("year==2007"), 
  x="gdpPercap", y="lifeExp",
  size="pop", color="continent",
  hover_name="country", 
  log_x=True, size_max=60)
fig.show()
```

**go**

```python
fig = go.Figure(
  data=[go.Scatter(    
    x=[1, 3.2, 5.4, 7.6, 9.8, 12.5],
    y=[1, 3.2, 5.4, 7.6, 9.8, 12.5],
    mode='markers',
    marker=dict(
      color=[120, 125, 130, 135, 140, 145],
      size=[15, 30, 55, 70, 90, 110],
      showscale=True
      ))])

fig.show()
```

In [2]:
data = px.data.gapminder()

In [3]:
data

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4
1,Afghanistan,Asia,1957,30.332,9240934,820.853030,AFG,4
2,Afghanistan,Asia,1962,31.997,10267083,853.100710,AFG,4
3,Afghanistan,Asia,1967,34.020,11537966,836.197138,AFG,4
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4
...,...,...,...,...,...,...,...,...
1699,Zimbabwe,Africa,1987,62.351,9216418,706.157306,ZWE,716
1700,Zimbabwe,Africa,1992,60.377,10704340,693.420786,ZWE,716
1701,Zimbabwe,Africa,1997,46.809,11404948,792.449960,ZWE,716
1702,Zimbabwe,Africa,2002,39.989,11926563,672.038623,ZWE,716


In [5]:
px.scatter(
    data,
    x='gdpPercap',
    y='lifeExp',
    size='pop',
    color='continent',
    log_x=True
)

In [16]:
fig = go.Figure()

fig.add_trace(    
    go.Scatter(
        x=[1, 3.2, 5.4, 7.6, 9.8, 12.5],
        y=[1, 3.2, 5.4, 7.6, 9.8, 12.5],
        mode='markers',
        marker=dict(
            color=list(range(120, 150, 5)),
            size=list(range(15, 105, 15))
        )
    )
)

fig.add_trace(
    go.Scatter(
        x=np.random.random(10),
        y=np.random.random(10),
        mode='markers'
    )
)

fig.show()

In [20]:
fig = go.Figure()
for continent in data.continent.unique():
    fig.add_trace(
        go.Scatter(
            x=np.log(data.query(f'year==2007 and continent=="{continent}"').gdpPercap),
            y=data.query(f'year==2007 and continent=="{continent}"').lifeExp,
            mode='markers',
            name=continent,
            marker=dict(
                size=np.log(data.query(f'year==2007 and continent=="{continent}"')['pop'])
            )
        )
    )
fig.show()

### Scatter Plots

**px**
  - color: 그룹 별 다른 색상으로 plot
  - marginal_x: 지정된 컬럼의 x축 주변부 분포 출력
  - marginal_y: 지정된 컬럼의 y축 주변부 분포 출력
  - facet_col: 지정된 컬럼별 그래프 column 출력
  - facet_row: 지정된 컬럼별 그래프 row 출력
  - trendline: plot의 모델링 결과 표시
    - ols: 회귀직선

```python
df = px.data.iris()

fig = px.scatter(
  df, x="sepal_width", y="sepal_length", color="species",
  size='petal_length', hover_data=['petal_width'])
fig.show()
```

**go**
```python
import plotly.graph_objects as go

fig = go.Figure(
  data=go.Scatter(
    x=[1, 2, 3, 4],
    y=[10, 11, 12, 13],
    mode='markers',
    marker=dict(
      size=[40, 60, 80, 100],
      color=[0, 1, 2, 3])
      )
    )

fig.show()
```

In [21]:
data = px.data.iris()

In [23]:
px.scatter(
    data,
    x='sepal_length',
    y='sepal_width',
    trendline='ols'
)

In [25]:
px.scatter(
    px.data.tips(),
    x='total_bill',
    y='tip',
    color='smoker',
    facet_col='sex',
    facet_row='time'
)

### Line Charts

**px**

args
  - markers: bool
    - True: line 상에 점 표시
  - color: 그룹 별 다른 색상으로 plot

```python
df = px.data.gapminder().query("continent == 'Oceania'")

fig = px.line(df, x='year', y='lifeExp', color='country')
fig.show()
```
<br>

**go**

args
  - markers: bool
    - True: line 상에 점 표시
  - color: 그룹 별 다른 색상으로 plot
  - name: legend에 표기될 이름

```python
N = 100
random_x = np.linspace(0, 1, N)
random_y0 = np.random.randn(N) + 5
random_y1 = np.random.randn(N)
random_y2 = np.random.randn(N) - 5

fig = go.Figure()

# Add traces
fig.add_trace(
  go.Scatter(
    x=random_x, y=random_y0,
    mode='markers',
    name='markers'
    )
    )
fig.add_trace(
  go.Scatter(
    x=random_x, y=random_y1,
    mode='lines+markers',
    name='lines+markers')
    )
fig.add_trace(
  go.Scatter(
    x=random_x, y=random_y2,
    mode='lines',
    name='lines')
    )

fig.show()
```

In [26]:
data = px.data.gapminder()

In [27]:
px.line(
    data,
    x='year',
    y='lifeExp',
    color='country'
)

In [30]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=data.query('country=="Cambodia"').year,
        y=data.query('country=="Cambodia"').lifeExp,
        mode='markers+lines',
        name ='Cambodia'
    )
)

fig.add_trace(
    go.Scatter(
        x=data.query('country=="Rwanda"').year,
        y=data.query('country=="Rwanda"').lifeExp,
        mode='markers+lines',
        name ='Rwanda'
    )
)

### Bar Charts

**px**

args
  - color: bar에 입력한 column별로 plot
  - barmode
    - stack: 누적
    - group: 분할
  - title: plot의 제목 입력
  - text_auto
    - True: 수치 표기
  - facet_col: 지정된 컬럼별 그래프 column 출력
  - facet_row: 지정된 컬럼별 그래프 row 출력
  
```python
long_df = px.data.medals_long()

fig = px.bar(
  long_df, x="nation", y="count", color="medal", title="Long-Form Input"
  )
fig.show()
```
<br>

**go**
args
  - barmode
    - stack: 누적
    - group: 분할


```python
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

fig = go.Figure()
fig.add_trace(
  go.Bar(
    x=months,
    y=[20, 14, 25, 16, 18, 22, 19, 15, 12, 16, 14, 17],
    name='Primary Product',
    marker_color='indianred'
    )
  )
fig.add_trace(go.Bar(
    x=months,
    y=[19, 14, 22, 14, 16, 19, 15, 14, 10, 12, 12, 16],
    name='Secondary Product',
    marker_color='lightsalmon'
    )
  )

fig.update_layout(barmode='group', xaxis_tickangle=-45)
fig.show()

----------
# highlight

colors = ['lightslategray',] * 5
colors[1] = 'crimson'

fig = go.Figure(data=[go.Bar(
    x=['Feature A', 'Feature B', 'Feature C',
       'Feature D', 'Feature E'],
    y=[20, 14, 23, 25, 22],
    marker_color=colors 
)])

-------------
# compare

years = ['2016','2017','2018']

fig = go.Figure()
fig.add_trace(
  go.Bar(
    x=years, y=[500, 600, 700],
    base=[-500,-600,-700],
    marker_color='crimson',
    name='expenses')
    )
fig.add_trace(
  go.Bar(
    x=years, y=[300, 400, 700],
    base=0,
    marker_color='lightslategrey',
    name='revenue'
    )
  )

fig.show()
```

In [33]:
px.data.medals_wide()

Unnamed: 0,nation,gold,silver,bronze
0,South Korea,24,13,11
1,China,10,15,8
2,Canada,9,12,12


In [34]:
data = px.data.medals_long()

In [35]:
data

Unnamed: 0,nation,medal,count
0,South Korea,gold,24
1,China,gold,10
2,Canada,gold,9
3,South Korea,silver,13
4,China,silver,15
5,Canada,silver,12
6,South Korea,bronze,11
7,China,bronze,8
8,Canada,bronze,12


In [38]:
px.bar(
    data,
    x='nation',
    y='count',
    color='medal',
    barmode='group',
    text_auto=True
)

In [41]:
months = [
    'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
]

fig = go.Figure()

fig.add_trace(
    go.Bar(
        x=months,
        y=np.random.randint(1, 100, 12),
        base=0,
        name='Primary Product'
    )
)

fig.add_trace(
    go.Bar(
        x=months,
        y=np.random.randint(1, 100, 12),
        base=0,
        name='Secondary Product'
    )
)

fig.show()

### Box Plots


**px**

args
  - points
    - all: box plot과 데이터 분포를 같이 출력
  - color: 지정한 컬럼을 그룹으로 출력

```python
df = px.data.tips()

fig = px.box(df, x="day", y="total_bill", color="smoker")
fig.show()
```
<br>

**go**

args
  - name: legend에 표기될 이름
  - marker_color: box에 색을 입힘

```python
y0 = np.random.randn(50) - 1
y1 = np.random.randn(50) + 1

fig = go.Figure()
fig.add_trace(go.Box(y=y0))    
fig.add_trace(go.Box(y=y1))    


fig.add_trace(
  go.Box(
    y=y0, name='Sample A',
    marker_color = 'indianred')
    )       # 축을 바꾸기 위해서는 y->x로 입력 (horizontal)
fig.add_trace(
  go.Box(
    y=y1, name = 'Sample B',
    marker_color = 'lightseagreen')
    )       # 축을 바꾸기 위해서는 y->x로 입력 (horizontal)

fig.show()

--------------------------------------------------------
# grouped boxplot

x = ['day 1', 'day 1', 'day 1', 'day 1', 'day 1', 'day 1',
     'day 2', 'day 2', 'day 2', 'day 2', 'day 2', 'day 2']

fig = go.Figure()

fig.add_trace(
  go.Box(
    y=[0.2, 0.2, 0.6, 1.0, 0.5, 0.4, 0.2, 0.7, 0.9, 0.1, 0.5, 0.3],
    x=x,
    name='kale',
    marker_color='#3D9970'
))
fig.add_trace(
  go.Box(
    y=[0.6, 0.7, 0.3, 0.6, 0.0, 0.5, 0.7, 0.9, 0.5, 0.8, 0.7, 0.2],
    x=x,
    name='radishes',
    marker_color='#FF4136'
))
fig.add_trace(
  go.Box(
    y=[0.1, 0.3, 0.1, 0.9, 0.6, 0.6, 0.9, 1.0, 0.3, 0.6, 0.8, 0.5],
    x=x,
    name='carrots',
    marker_color='#FF851B'
))

fig.update_layout(
    yaxis_title='normalized moisture',
    boxmode='group'
fig.update_traces(orientation='h') # horizontal box plots    

fig.show()
```

In [42]:
data = px.data.tips()

In [43]:
px.box(
    data,
    x='day',
    y='total_bill',
    color='smoker'
)

go

In [49]:
fig = go.Figure()

fig.add_trace(
    go.Box(
        x=data.query('day=="Sun"').total_bill,
        name='Sun'
    )
)

fig.add_trace(
    go.Box(
        x=data.query('day=="Sat"').total_bill,
        name='Sat'
    )
)

fig.update_traces(orientation='h')

fig.show()

### Pie Charts

**px**

args
  - names: 어떤 범주로 pie chart를 그릴 건지 설정
  - values: 비율을 산정할 컬럼

```python
df = px.data.gapminder().query("year == 2007").query("continent == 'Europe'")
df.loc[df['pop'] < 2.e6, 'country'] = 'Other countries' 
fig = px.pie(df, values='pop', names='country', title='Population of European continent')
fig.show()
```
<br>

**go**

args
  - hole: 도넛 모양을 만들 때 얼마나 구멍을 만들지 비율
  - pull: 일정 부분을 강조
  - scalegroup: 파이 크기 만큼 그래프를 조정

```python
labels = ['Oxygen','Hydrogen','Carbon_Dioxide','Nitrogen']
values = [4500, 2500, 1053, 500]

fig = go.Figure(
  data=[go.Pie(labels=labels, values=values, hole=.3, pull=[0, 0, 0.2, 0])]
  )
fig.show()

--------------
# 여러 파이 차트 그리기 
labels = ["US", "China", "European Union", "Russian Federation", "Brazil", "India", "Rest of World"]

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=labels, values=[16, 15, 12, 6, 5, 4, 42], scalegroup='one', name="GHG Emissions"),
              1, 1)
fig.add_trace(go.Pie(labels=labels, values=[27, 11, 25, 8, 1, 3, 25], scalegroup='one', name="CO2 Emissions"),
              1, 2)

fig.update_traces(hole=.4, hoverinfo="label+percent+name")
```

In [50]:
data = px.data.gapminder().query('year==2007 and continent=="Europe"')

In [52]:
px.pie(
    data,
    values='pop',
    names='country'
)

go

In [55]:
fig = go.Figure()

fig.add_trace(
    go.Pie(
        labels=data.country,
        values=data.pop,
        hole=0.3,
        pull = [0.3] + [0]*(len(data.country)-1)
    )
)

fig.show()

TypeError: pop() missing 1 required positional argument: 'item'

In [58]:
fig = make_subplots(rows=1, cols=2, specs=[[{'type': 'domain'}, {'type': 'domain'}]])

fig.add_trace(
    go.Pie(
    labels=data.country,
    values=data['pop'],
    hole=0.3,
    pull = [0.3] + [0]*(len(data.country)-1),
    name='2007'
    ),
    1, 1
)

fig.add_trace(
    go.Pie(
    labels=px.data.gapminder().query('year==2002 and continent=="Europe"').country,
    values=px.data.gapminder().query('year==2002 and continent=="Europe"')['pop'],
    hole=0.3,
    pull = [0.3] + [0]*(len(px.data.gapminder().query('year==2002 and continent=="Europe"').country)-1),
    name='2002'
    ),
    1, 2
)

fig.show()

### Histograms

**px**

args
  - color: 그룹 별 다른 색상으로 plot
  - category_order: 범주 순서 설정
  - log_y
    - True: log scale
  - nbins: 구간 수
  - bargap: bar간 간격
  - marginal: 데이터 분포 출력
    - rug
    - box
    - violin
  - text_auto
    - True: 값 출력

```python
df = px.data.tips()
fig = px.histogram(df, x="total_bill", category_orders=dict(day=["Thur", "Fri", "Sat", "Sun"])
fig.update_layout(bargap=0.2)
fig.show()
```
<br>

**go**

args
  - histnorm
    - probability: normalized
  - barmode
    - overlay: 그래프를 겹쳐서 plot
    - stack: 그래프를 쌓아서 plot

```python
x0 = np.random.randn(500)
x1 = np.random.randn(500) + 1

fig = go.Figure()
fig.add_trace(go.Histogram(x=x0))
fig.add_trace(go.Histogram(x=x1))

fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)
fig.show()
```

### Distplots
**ff**

args
  - curve_type
    - kde: kernel density estimation
    - normal: 정규분포
  - bin_size: 그룹별 bin 크기 조절 
  - show_hist
    - False: 막대그래프를 출력하지 않음
  - show_curve
    - False: curve 그래프를 출력하지 않음
  - show_rug
    - False: 분포 그래프를 출력하지 않음

```python
x1 = np.random.randn(200) - 2
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 2
x4 = np.random.randn(200) + 4

hist_data = [x1, x2, x3, x4]

group_labels = ['Group 1', 'Group 2', 'Group 3', 'Group 4']
fig = ff.create_distplot(hist_data, group_labels, bin_size=[.1, .25, .5, 1])
fig.show()
--------------------
# pandas
df = pd.DataFrame({
  '2012': np.random.randn(200),
  '2013': np.random.randn(200) + 1
  })
fig = ff.create_distplot([df[c] for c in df.columns], df.columns, bin_size=.25)
fig.show()
```


### Heatmaps

**px**
```python
import plotly.express as px

df = px.data.medals_wide(indexed=True, text_auto=True)
fig = px.imshow(df)
fig.show()
```
<br>

**go**
```python
import plotly.graph_objects as go

fig = go.Figure(
  data=go.Heatmap(
    z=[[1, None, 30, 50, 1], [20, 1, 60, 80, 30], [30, 60, 1, -10, 20]],
    x=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'],
    y=['Morning', 'Afternoon', 'Evening'],
    hoverongaps = False)
    )
fig.show()
```

### Time Series and Date Axes

```python
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/finance-charts-apple.csv')

fig = px.line(
  df, x='Date', y='AAPL.High', title='Time Series with Range Slider and Selectors'
  )

fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=1, label="1m", step="month", stepmode="backward"),
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)
fig.show()
```