# Bar chart with Plotly Express

- Description
  - 가로축에 일반적으로 범주
  - 세로축에 값


- library : plotly
  - Module : express
- library : pandas
  - Class : dataframe 
  - Atrribute : shape, size 

- dataset : gapminder


In [1]:
# 라이브러리 임포트
import plotly.express as px
import pandas as pd


In [2]:
# 데이터 임포트
data = px.data.gapminder()

# 데이터프레임 정보
print("info")
data.info()
print("shape", data.shape)
print("size", data.size)
print(data.head(3))
 

info
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   country    1704 non-null   object 
 1   continent  1704 non-null   object 
 2   year       1704 non-null   int64  
 3   lifeExp    1704 non-null   float64
 4   pop        1704 non-null   int64  
 5   gdpPercap  1704 non-null   float64
 6   iso_alpha  1704 non-null   object 
 7   iso_num    1704 non-null   int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 106.6+ KB
shape (1704, 8)
size 13632
       country continent  year  lifeExp       pop   gdpPercap iso_alpha  \
0  Afghanistan      Asia  1952   28.801   8425333  779.445314       AFG   
1  Afghanistan      Asia  1957   30.332   9240934  820.853030       AFG   
2  Afghanistan      Asia  1962   31.997  10267083  853.100710       AFG   

   iso_num  
0        4  
1        4  
2        4  


In [24]:
# country 가 canada 인 값만 출력
data_canada = px.data.gapminder().query("country == 'Canada'")

In [25]:
# chart figure

fig1 = px.bar(data_canada, 
             x='year' , 
             y='pop',
             labels = {'pop' : 'population of Canada'},
             title= "Bar chart : Canada's Population Over Time",
             text = "pop",
             opacity = 0.4
             )

fig1.show()

In [26]:
# continent가 Oceania 값만 출력
data_Oceania = px.data.gapminder().query("continent == 'Oceania'")

In [27]:
# chart figure
fig2 = px.bar(data_Oceania, 
             x = 'year' , 
             y = 'pop' , 
             color = 'country',
             labels = {'pop' : 'Population of Oceania'} , 
             hover_data = ['lifeExp','gdpPercap'],
             barmode = 'group',
             title= "Bar chart : Austraila and New Zealand's Population Over Time",
             opacity= 0.3,
             text = "pop"
             )

fig2.show()

In [13]:
# make_subplot practice

from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Stack vs Grouped Bar

In [32]:
# chart figure
fig3 = px.bar(data_Oceania, 
             x = 'year' , 
             y = 'pop' , 
             color = 'country' ,
             labels = {'pop' : 'Population of Oceania'} , 
             hover_data = ['lifeExp','gdpPercap'],
             pattern_shape_sequence=[".",'+'],
             opacity=0.4,
             title= 'Stack vs Grouped Bar : Population of Oceania'
             )

fig3.show()


# library / module : plotly.graph_objs


In [None]:
# 라이브러리 임포트 
import plotly.graph_objs as go

In [45]:
# chart figure

# make DataFrame
fig4 = go.Figure(
    data = go.Bar(
        x = [1,2,3,5.5,10],
        y = [10,8,6,4,2]
))

# 막대 너비 설정
fig4.update_traces(width=[0.8, 0.8, 0.8, 3.5, 4])


# 레이아웃에 x축 레이블 추가 및 너비 설정
fig4.update_layout(
    title="Practice1 : plotly.graph_objs",
    xaxis_title="x_label",  # x_label을 xaxis_title로 수정
    yaxis_title="y_label"  # x_label을 xaxis_title로 수정
)

fig4.show()

----

# Line Plots

- 가로축 : 시간
- 세로축 : 값 (대표값)

In [47]:
# 데이터 임포트
data_canada = px.data.gapminder().query("country == 'Canada'")
data_canada.head(3)

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
240,Canada,Americas,1952,68.75,14785584,11367.16112,CAN,124
241,Canada,Americas,1957,69.96,17010154,12489.95006,CAN,124
242,Canada,Americas,1962,71.3,18985849,13462.48555,CAN,124


In [49]:
# 차트 생성

fig5 = px.line(data_canada, 
              x = 'year', 
              y = 'lifeExp' ,
              title = 'Line Plots : Life expectacy in Canada')

fig5.show()

In [31]:
# data_continent_Oceaina

data_Oceania.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
60,Australia,Oceania,1952,69.12,8691212,10039.59564,AUS,36
61,Australia,Oceania,1957,70.33,9712569,10949.64959,AUS,36
62,Australia,Oceania,1962,70.93,10794968,12217.22686,AUS,36
63,Australia,Oceania,1967,71.1,11872264,14526.12465,AUS,36
64,Australia,Oceania,1972,71.93,13177000,16788.62948,AUS,36


In [52]:
# chart plot
fig6 = px.line(data_Oceania, 
              x = 'year', 
              y = 'pop' , 
              color ='country', 
              symbol='country',
              title = 'Line Plots : Life expectacy in Austrailia and New Zealand')
fig6.show()

In [75]:
# 서브플롯 생성 
fig = make_subplots(rows=2, cols=3, subplot_titles=(
    "Bar chart : Canada's Population Over Time", 
    "Bar chart : Austraila and New Zealand's Population Over Time", 
    'Stack vs Grouped Bar : Population of Oceania',
    "Practice1 : plotly.graph_objs",
    'Line Plots : Life expectacy in Canada',
    'Line Plots : Life expectacy in Austrailia and New Zealand'))

# fig 1
# fig 2
# fig 3

# fig1, fig2, fig3 데이터를 각각 서브플롯에 추가
for trace in fig1.data:
    fig.add_trace(trace, row=1, col=1)

for trace in fig2.data:
    fig.add_trace(trace, row=1, col=2)

for trace in fig3.data:
    fig.add_trace(trace, row=1, col=3)

for trace in fig4.data:
    fig.add_trace(trace, row=2, col=1)

for trace in fig5.data:
    fig.add_trace(trace, row=2, col=2)

for trace in fig6.data:
    fig.add_trace(trace, row=2, col=3)

# for trace in fig7.data:
#     fig.add_trace(trace, row=3, col=1)

# for trace in fig8.data:
#     fig.add_trace(trace, row=3, col=2)

# for trace in fig9.data:
#     fig.add_trace(trace, row=3, col=3)



# 레이아웃 업데이트
fig.update_layout(title="Canada and Oceania Population Comparison", showlegend=False)
fig.show()

---
# Pie Charts
- 넣을 수 있는 정보가 제한적
- 표현되는 정보가 5개 이상이다 -> 다른 차트 권장

In [58]:
# 데이터프레임
data_Europe_2007 = px.data.gapminder().query("year == 2007").query("continent == 'Europe'")

In [60]:
# chart plot
fig7 = px.pie(data_Europe_2007, 
             values = 'pop',  
             names = 'country' , 
             title = 'Pie Charts : population of European contintent')
fig7.show()

# Donut chart

In [70]:
# data
labels = ['A','B', 'C' ,'D']
values = [300,200,100,500]

# 데이터프레임
# data_Europe_2007 = px.data.gapminder().query("year == 2007").query("continent == 'Europe'")
# data_Europe_2007.head(3)

# print(data_Europe_2007.value_counts)

In [64]:
# chart plot

fig8 = go.Figure(
    data = [
        go.Pie(
            labels = labels, 
            values = values, 
            hole=.3,
            )]) 
# 레이아웃에 제목 추가
fig8.update_layout(
    title_text="Donut Chart Example"  # 상단 제목 설정
)
fig8.show()

In [72]:
# pull option

fig9 = go.Figure(
    data = [
        go.Pie(
            labels = labels, 
            values = values, 
            pull = [0 , 0, 0.3,0]
            )])
# 레이아웃에 제목 추가
fig9.update_layout(
    title_text="Donut Chart Example : Pull option"  # 상단 제목 설정
)

fig9.show()


In [44]:
# pull option _ donut hole

fig = go.Figure(
    data = [
        go.Pie(
            labels = labels, 
            values = values, 
            hole=.3,
            pull = [0 , 0, 0.3,0]
            )]) 
fig.show()

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# 예제 데이터
labels1 = ['Category A', 'Category B', 'Category C']
values1 = [450, 250, 300]

labels2 = ['Category X', 'Category Y', 'Category Z']
values2 = [150, 300, 150]

# 1행 2열의 서브플롯을 생성하고, 각 위치에 파이 차트를 배치할 수 있도록 specs 설정
fig = make_subplots(
    rows=1, cols=3, 
    specs=[[{'type': 'pie'}, {'type': 'pie'},{'type': 'pie'}]], 
    subplot_titles=("Donut Chart 1", "Donut, Chart 2","Donut Chart 3")
)

# 첫 번째 파이 차트 (도넛 스타일)
fig.add_trace(
    go.Pie(labels=labels, values=values, hole=0.3),
    row=1, col=1
)

# 두 번째 파이 차트 (도넛 스타일)
fig.add_trace(
    go.Pie(labels=labels, values=values, hole=0.3, pull = [0 , 0, 0.3,0]),
    row=1, col=2
)

# 세 번째 파이 차트 (도넛 스타일)
# fig.add_trace(
#     go.Pie(labels=labels, values=values),
#     row=1, col=3
# )

fig3 = px.pie(data_Europe_2007, names='country', values='pop')
for trace in fig3.data:
    fig.add_trace(trace, row=1, col=3)

# 레이아웃 업데이트
fig.update_layout(
    title_text="Multiple Donut Charts",
    showlegend=False  # 범례를 제거하고 싶은 경우
)

fig.show()


---
# Heatmap
- 들어간 값들의 크기에 따라서 색의 농도로 크기를 표현


In [103]:
data = px.data.gapminder()

# 데이터 피벗: 행은 'year', 열은 'continent', 값은 'pop'의 합계
heatmap_data = data.pivot_table(values='lifeExp', index='year', columns='country', aggfunc='sum')


In [102]:
# unique 값 확인
print(data.head())
print(data['continent'].unique())
print(data['year'].unique())


       country continent  year  lifeExp       pop   gdpPercap iso_alpha  \
0  Afghanistan      Asia  1952   28.801   8425333  779.445314       AFG   
1  Afghanistan      Asia  1957   30.332   9240934  820.853030       AFG   
2  Afghanistan      Asia  1962   31.997  10267083  853.100710       AFG   
3  Afghanistan      Asia  1967   34.020  11537966  836.197138       AFG   
4  Afghanistan      Asia  1972   36.088  13079460  739.981106       AFG   

   iso_num  
0        4  
1        4  
2        4  
3        4  
4        4  
['Asia' 'Europe' 'Africa' 'Americas' 'Oceania']
[1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 2002 2007]


In [104]:
# 히트맵 생성
fig = px.imshow(
    heatmap_data,
    labels={'x': 'Continent', 'y': 'Year', 'color': 'country'},
    title="Population Heatmap by Continent and Year"
)

fig.show()

In [101]:
data.describe()

Unnamed: 0,year,lifeExp,pop,gdpPercap,iso_num
count,1704.0,1704.0,1704.0,1704.0,1704.0
mean,1979.5,59.474439,29601210.0,7215.327081,425.880282
std,17.26533,12.917107,106157900.0,9857.454543,248.305709
min,1952.0,23.599,60011.0,241.165876,4.0
25%,1965.75,48.198,2793664.0,1202.060309,208.0
50%,1979.5,60.7125,7023596.0,3531.846989,410.0
75%,1993.25,70.8455,19585220.0,9325.462346,638.0
max,2007.0,82.603,1318683000.0,113523.1329,894.0


---
# Box plot
- 많은 통계정보를 담고있음
- IQR, 4분위수
- 전통적으로 이상치를 체크하는데 사용


In [105]:
# data import
data_tips = px.data.tips()
data_tips.info()
data_tips.head(3)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   total_bill  244 non-null    float64
 1   tip         244 non-null    float64
 2   sex         244 non-null    object 
 3   smoker      244 non-null    object 
 4   day         244 non-null    object 
 5   time        244 non-null    object 
 6   size        244 non-null    int64  
dtypes: float64(2), int64(1), object(4)
memory usage: 13.5+ KB


Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3


In [106]:
# chart plot
fig = px.box(data_tips, 
             y = 'total_bill' , 
             x = 'time' , 
             points = 'all' , 
             color = 'smoker',
             title = 'Distributions of total bill per each clients who smoke or not on lunch / dinner time')

fig.show()

In [None]:
# chart plot
fig = px.box(
    data_tips, 
    y = 'total_bill' , 
    x = 'day' , 
    points = 'all' , 
    color = 'smoker',
    title = 'Distributions of total bill per each clients who smoke or not on each days')
fig.show()


# Bubble charts
- 산점도랑 비슷
- 버블의 크기에 따라서 정보의 크기를 직관적으로 파악

In [107]:
# 데이터 임포트
# data : gqpminder()
data = px.data.gapminder()

In [108]:
# chart plot
fig = px.scatter(
    data.query("year == 2007"), 
    x = 'gdpPercap' , 
    y = 'lifeExp', 
    size = 'pop', 
    color = 'continent')
fig.show()

# Treemap
- 입력되는 값들의 비중을 나타냄
- 정보가 계층적으로 들어감 - 세분화되어 표현됨

In [63]:
# data import
data = px.data.gapminder().query("year == 2007")

# figure show
fig = px.treemap(
    data, 
    path=[
        px.Constant('World'), 
        'continent',
        'country'], 
    values = 'pop' , 
    color = 'lifeExp')

fig.show()