In [120]:
import numpy as np
import pandas as pd
import cufflinks as cf
cf.go_offline(connected=True)
from plotly.offline import iplot, init_notebook_mode
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import chart_studio

In [121]:
df = pd.read_csv('raw_data.csv')
df = df.drop(columns='Unnamed: 0')
df.head()

Unnamed: 0,항목,성별구분,연령구분5세,환자수,내원일수,청구건수,요양급여비용총액,보험자부담금,본인부담금,연도
0,난청,2,17,548913,1159130,1108228,76405654,50910783,25494871,2017
1,난청,0,18,255128,541714,518350,36019274,23825287,12193987,2017
2,난청,0,0,4944,12132,11721,3783566,3151499,632067,2017
3,난청,0,1,4119,8540,8239,1076442,726551,349891,2017
4,난청,0,2,4885,9151,8701,1108291,735925,372366,2017


In [122]:
# df['요양급여비용총액'] 중에 0인 컬럼 존재! 해당 컬럼 모두 drop함
a = df[df['요양급여비용총액'] == 0].index
df = df.drop(a)

# 공제비율 = 보험자부담금계 / 요양급여비용총액
df['공제비율'] = df['보험자부담금'] / df['요양급여비용총액'] * 100
# df['공제비율'] = df['공제비율'].astype(int)
# .round()

# 필요한 칼럼만 뽑기
df = df[['항목', '성별구분', '연령구분5세', '환자수', '공제비율', '연도']]

# 성별구분 먼저
M = df[df['성별구분'] == 0]
F = df[df['성별구분'] == 1]

In [123]:
Mf[Mf['항목'] == '자궁부속기종양_악성']

Unnamed: 0,항목,성별구분,연령구분5세,환자수,공제비율,연도
6268,자궁부속기종양_악성,0,18,685,91,2017
14570,자궁부속기종양_악성,0,18,700,91,2018
22872,자궁부속기종양_악성,0,18,783,90,2019
31174,자궁부속기종양_악성,0,18,772,90,2020
39476,자궁부속기종양_악성,0,18,887,89,2021


## 남/여 연도별 공제비율 top 10 / down 10

In [124]:
# 남자 소계
Mf = M[M['연령구분5세']==18]
# 여자 소계
Ff = F[F['연령구분5세']==18]
# '고환암' 이상치로 drop
Ff[Ff['항목'] == '고환암'].index
Ff = Ff.drop([5838, 14140, 22442, 30744, 39046])

### 남자

In [125]:
# 남자 top 10
Mt2017 = Mf[Mf['연도'] == 2017].sort_values(ascending=False, by='공제비율').head(10)
Mt2018 = Mf[Mf['연도'] == 2018].sort_values(ascending=False, by='공제비율').head(10)
Mt2019 = Mf[Mf['연도'] == 2019].sort_values(ascending=False, by='공제비율').head(10)
Mt2020 = Mf[Mf['연도'] == 2020].sort_values(ascending=False, by='공제비율').head(10)
Mt2021 = Mf[Mf['연도'] == 2021].sort_values(ascending=False, by='공제비율').head(10)

# 남자 down 10
Md2017 = Mf[Mf['연도'] == 2017].sort_values(ascending=True, by='공제비율').head(10)
Md2018 = Mf[Mf['연도'] == 2018].sort_values(ascending=True, by='공제비율').head(10)
Md2019 = Mf[Mf['연도'] == 2019].sort_values(ascending=True, by='공제비율').head(10)
Md2020 = Mf[Mf['연도'] == 2020].sort_values(ascending=True, by='공제비율').head(10)
Md2021 = Mf[Mf['연도'] == 2021].sort_values(ascending=True, by='공제비율').head(10)

In [126]:
# 남자 top10
from plotly.subplots import make_subplots

fig1 = go.Figure()

fig1 = make_subplots(rows=2, cols=3,
                    subplot_titles=("2017", "2018", "2019", "2020","2021"),
                    column_widths=[0.1, 0.1, 0.1]) # 각 Subplot 별 subtitle 넣기)

fig1.add_trace(
    go.Bar(
        x = Mt2017['항목'], y = Mt2017['공제비율'],
        name = '2017',
        text = Mt2017['공제비율'],
        textposition='auto',
        marker = {'color':'#80489C'}
    ), row=1, col=1
)

fig1.add_trace(
    go.Bar(
        x = Mt2018['항목'], y = Mt2018['공제비율'],
        name = '2018',
        text = Mt2018['공제비율'],
        textposition='auto',
        marker = {'color':'#FFD372'}
    ), row=1, col=2
)

fig1.add_trace(
    go.Bar(
        x = Mt2019['항목'], y = Mt2019['공제비율'],
        name = '2019',
        text = Mt2019['공제비율'],
        textposition='auto',
        marker = {'color':'#4682B4'}
    ), row=1, col=3
)

fig1.add_trace(
    go.Bar(
        x = Mt2020['항목'], y = Mt2020['공제비율'],
        name = '2020',
        text = Mt2020['공제비율'],
        textposition='auto',
        marker = {'color':'#AACB73'}
    ), row=2, col=1
)

fig1.add_trace(
    go.Bar(
        x = Mt2021['항목'], y = Mt2021['공제비율'],
        name = '2021',
        text = Mt2021['공제비율'],
        textposition='auto',
        marker = {'color':'#FD8A8A'}
    ), row=2, col=2
)

fig1.update_yaxes(range=[60, 100])
fig1.update_layout(title=dict(text = '<b>공제비율이 높은 질병(남성)</b><br><sup>공제비율 = 보험자부담금 / 요양급여비용총액</sup>', x=0.5, y=0.87),
    yaxis_title=dict(text = "<b>공제비율(%)</b>"))
fig1.update_layout(template='plotly_white')

fig1.show()

In [127]:
# 남자 down10
from plotly.subplots import make_subplots

fig2 = go.Figure()

fig2 = make_subplots(rows=2, cols=3,
                    subplot_titles=("2017", "2018", "2019", "2020","2021"),
                    column_widths=[0.1, 0.1, 0.1]) # 각 Subplot 별 subtitle 넣기)

fig2.add_trace(
    go.Bar(
        x = Md2017['항목'], y = Md2017['공제비율'],
        name = '2017',
        text = Md2017['공제비율'],
        textposition='auto',
        marker = {'color':'#80489C'}
    ), row=1, col=1
)

fig2.add_trace(
    go.Bar(
        x = Md2018['항목'], y = Md2018['공제비율'],
        name = '2018',
        text = Md2018['공제비율'],
        textposition='auto',
        marker = {'color':'#FFD372'}
    ), row=1, col=2
)

fig2.add_trace(
    go.Bar(
        x = Md2019['항목'], y = Md2019['공제비율'],
        name = '2019',
        text = Md2019['공제비율'],
        textposition='auto',
        marker = {'color':'#4682B4'}
    ), row=1, col=3
)

fig2.add_trace(
    go.Bar(
        x = Md2020['항목'], y = Md2020['공제비율'],
        name = '2020',
        text = Md2020['공제비율'],
        textposition='auto',
        marker = {'color':'#AACB73'}
    ), row=2, col=1
)

fig2.add_trace(
    go.Bar(
        x = Md2021['항목'], y = Md2021['공제비율'],
        name = '2021',
        text = Md2021['공제비율'],
        textposition='auto',
        marker = {'color':'#FD8A8A'}
    ), row=2, col=2
)

fig2.update_yaxes(range=[30, 70])
fig2.update_layout(title=dict(text = '<b>공제비율이 낮은 질병(남성)</b><br><sup>공제비율 = 보험자부담금 / 요양급여비용총액</sup>', x=0.5, y=0.87),
    yaxis_title=dict(text = "<b>공제비율(%)</b>"))
fig2.update_layout(template='plotly_white')

fig2.show()

### 여자

In [128]:
# 여자 top 10
Ft2017 = Ff[Ff['연도'] == 2017].sort_values(ascending=False, by='공제비율').head(10)
Ft2018 = Ff[Ff['연도'] == 2018].sort_values(ascending=False, by='공제비율').head(10)
Ft2019 = Ff[Ff['연도'] == 2019].sort_values(ascending=False, by='공제비율').head(10)
Ft2020 = Ff[Ff['연도'] == 2020].sort_values(ascending=False, by='공제비율').head(10)
Ft2021 = Ff[Ff['연도'] == 2021].sort_values(ascending=False, by='공제비율').head(10)

# 여자 down 10
Fd2017 = Ff[Ff['연도'] == 2017].sort_values(ascending=True, by='공제비율').head(10)
Fd2018 = Ff[Ff['연도'] == 2018].sort_values(ascending=True, by='공제비율').head(10)
Fd2019 = Ff[Ff['연도'] == 2019].sort_values(ascending=True, by='공제비율').head(10)
Fd2020 = Ff[Ff['연도'] == 2020].sort_values(ascending=True, by='공제비율').head(10)
Fd2021 = Ff[Ff['연도'] == 2021].sort_values(ascending=True, by='공제비율').head(10)

In [129]:
# 여자 top10
from plotly.subplots import make_subplots

fig3 = go.Figure()

fig3 = make_subplots(rows=2, cols=3,
                    subplot_titles=("2017", "2018", "2019", "2020","2021"),
                    column_widths=[0.1, 0.1, 0.1]) # 각 Subplot 별 subtitle 넣기)

fig3.add_trace(
    go.Bar(
        x = Ft2017['항목'], y = Ft2017['공제비율'],
        name = '2017',
        text = Ft2017['공제비율'],
        textposition='auto',
        marker = {'color':'#80489C'}
    ), row=1, col=1
)

fig3.add_trace(
    go.Bar(
        x = Ft2018['항목'], y = Ft2018['공제비율'],
        name = '2018',
        text = Ft2018['공제비율'],
        textposition='auto',
        marker = {'color':'#FFD372'}
    ), row=1, col=2
)

fig3.add_trace(
    go.Bar(
        x = Ft2019['항목'], y = Ft2019['공제비율'],
        name = '2019',
        text = Ft2019['공제비율'],
        textposition='auto',
        marker = {'color':'#4682B4'}
    ), row=1, col=3
)

fig3.add_trace(
    go.Bar(
        x = Ft2020['항목'], y = Ft2020['공제비율'],
        name = '2020',
        text = Ft2020['공제비율'],
        textposition='auto',
        marker = {'color':'#AACB73'}
    ), row=2, col=1
)

fig3.add_trace(
    go.Bar(
        x = Ft2021['항목'], y = Ft2021['공제비율'],
        name = '2021',
        text = Ft2021['공제비율'],
        textposition='auto',
        marker = {'color':'#FD8A8A'}
    ), row=2, col=2
)

fig3.update_yaxes(range=[60, 100])
fig3.update_layout(title=dict(text = '<b>공제비율이 높은 질병(여성)</b><br><sup>공제비율 = 보험자부담금 / 요양급여비용총액</sup>', x=0.5, y=0.87),
    yaxis_title=dict(text = "<b>공제비율(%)</b>"))
fig3.update_layout(template='plotly_white')

fig3.show()

In [130]:
# 여자 down10
from plotly.subplots import make_subplots

fig4 = go.Figure()

fig4 = make_subplots(rows=2, cols=3,
                    subplot_titles=("2017", "2018", "2019", "2020","2021"),
                    column_widths=[0.1, 0.1, 0.1]) # 각 Subplot 별 subtitle 넣기)

fig4.add_trace(
    go.Bar(
        x = Fd2017['항목'], y = Fd2017['공제비율'],
        name = '2017',
        text = Fd2017['공제비율'],
        textposition='auto',
        marker = {'color':'#80489C'}
    ), row=1, col=1
)

fig4.add_trace(
    go.Bar(
        x = Fd2018['항목'], y = Fd2018['공제비율'],
        name = '2018',
        text = Fd2018['공제비율'],
        textposition='auto',
        marker = {'color':'#FFD372'}
    ), row=1, col=2
)

fig4.add_trace(
    go.Bar(
        x = Fd2019['항목'], y = Fd2019['공제비율'],
        name = '2019',
        text = Fd2019['공제비율'],
        textposition='auto',
        marker = {'color':'#4682B4'}
    ), row=1, col=3
)

fig4.add_trace(
    go.Bar(
        x = Fd2020['항목'], y = Fd2020['공제비율'],
        name = '2020',
        text = Fd2020['공제비율'],
        textposition='auto',
        marker = {'color':'#AACB73'}
    ), row=2, col=1
)

fig4.add_trace(
    go.Bar(
        x = Fd2021['항목'], y = Fd2021['공제비율'],
        name = '2021',
        text = Fd2021['공제비율'],
        textposition='auto',
        marker = {'color':'#FD8A8A'}
    ), row=2, col=2
)

fig4.update_yaxes(range=[30, 70])
fig4.update_layout(title=dict(text = '<b>공제비율이 낮은 질병(여성)</b><br><sup>공제비율 = 보험자부담금 / 요양급여비용총액</sup>', x=0.5, y=0.87),
    yaxis_title=dict(text = "<b>공제비율(%)</b>"))
fig4.update_layout(template='plotly_white')

fig4.show()

### 확인용

In [131]:
# Mt2021
# Md2021
# Mt2017['항목'] == '자궁부속기종양_악성'

In [132]:
# Ft2021
# Fd2017

## 질병별 공제비율 top 10 / down 10 (남/여)

In [133]:
# 남자
Mall = Mf.groupby('항목').sum()
Mall = Mall/5  #연도(2017, 2018, 2019, 2020, 2021로 나누기)

# 여자
Fall = Ff.groupby('항목').sum()
Fall = Fall/5  #연도(2017, 2018, 2019, 2020, 2021로 나누기)

In [134]:
# top10
# M=남자, F=여자
Mtall = Mall.sort_values(ascending=False, by='공제비율').head(10)
Mtall = Mtall.reset_index()
Ftall = Fall.sort_values(ascending=False, by='공제비율').head(10)
Ftall = Ftall.reset_index()

# down10
# M=남자, F=여자
Mdall = Mall.sort_values(ascending=True, by='공제비율').head(10)
Mdall = Mdall.reset_index()
Fdall = Fall.sort_values(ascending=True, by='공제비율').head(10)
Fdall = Fdall.reset_index()


In [135]:
# 남/여 top10
from plotly.subplots import make_subplots
fig5 = go.Figure()

fig5 = make_subplots(rows=1, cols=2,
                    subplot_titles=("남성", "여성"),
                    column_widths=[0.3, 0.3]) # 각 Subplot 별 subtitle 넣기)

fig5.add_trace(
    go.Bar(
        x = Mtall['항목'], y = Mtall['공제비율'],
        name = '남자',
        text = Ftall['공제비율'],
        textposition='auto',
        marker = {'color':'#6495ED'}      
    ), row=1, col=1
)

fig5.add_trace(
    go.Bar(
        x = Ftall['항목'], y = Ftall['공제비율'],
        name = '여자',
        text = Ftall['공제비율'],
        textposition='auto',
        marker = {'color':'#F08080'}      
    ), row=1, col=2
)

fig5.update_yaxes(range=[60, 100])
fig5.update_layout(title=dict(text = '<b>질병별 공제비율이 높은 질병 순위(남/여)</b><br><sup>공제비율 = 보험자부담금 / 요양급여비용총액</sup>', x=0.5, y=0.87),
    yaxis_title=dict(text = "<b>공제비율(%)</b>"))
fig5.update_layout(template='plotly_white')

fig5.show()

In [136]:
# 남/여 down10
from plotly.subplots import make_subplots
fig6 = go.Figure()

fig6 = make_subplots(rows=1, cols=2,
                    subplot_titles=("남성", "여성"),
                    column_widths=[0.3, 0.3]) # 각 Subplot 별 subtitle 넣기)

fig6.add_trace(
    go.Bar(
        x = Mdall['항목'], y = Mdall['공제비율'],
        name = '남자',
        text = Mdall['공제비율'],
        textposition='auto',
        marker = {'color':'#6495ED'}      
    ), row=1, col=1
)

fig6.add_trace(
    go.Bar(
        x = Fdall['항목'], y = Fdall['공제비율'],
        name = '여자',
        text = Fdall['공제비율'],
        textposition='auto',
        marker = {'color':'#F08080'}      
    ), row=1, col=2
)

fig6.update_yaxes(range=[30, 70])
fig6.update_layout(title=dict(text = '<b>질병별 공제비율이 낮은 질병 순위(남/여)</b><br><sup>공제비율 = 보험자부담금 / 요양급여비용총액</sup>', x=0.5, y=0.87),
    yaxis_title=dict(text = "<b>공제비율(%)</b>"))
fig6.update_layout(template='plotly_white')

fig6.show()

# 연령별 몰라 회의하고 할거임 뷁

In [114]:
username='chaeyoungsss'
api_key='evuch5hgbQtmq3te7rh4'
chart_studio.tools.set_credentials_file(username=username, api_key=api_key)

In [115]:
import chart_studio.plotly as py
py.plot(fig1, filename = '연도별 공제비율이 높은 질병 순위', auto_open=True)
py.plot(fig2, filename = '연도별 공제비율이 낮은 질병 순위', auto_open=True)
# py.plot(fig3, filename = '질병별 공제비율이 높은 질병 순위', auto_open=True)
# py.plot(fig4, filename = '질병별 공제비율이 낮은 질병 순위', auto_open=True)

'https://plotly.com/~chaeyoungsss/24/'