# 의약품처방정보
* https://www.data.go.kr/dataset/15007117/fileData.do
* 의약품처방정보는 2002년부터 국민건강보험 가입자 중 의약품처방이력이 있는 각 연도별 수진자 100만 명에 대한 기본정보(성, 연령대, 시도코드 등)와 의약품처방전별 개별 의약품에 대한 처방내역(요양개시일자, 1회투약량, 1일투약량, 총투여일수 등)으로 구성된 개방데이터이다.
* 약품일반성분명코드 : http://www.hira.or.kr/rf/medicine/getHistoryList.do?pgmid=HIRAA030035020000
* <img src="https://i.imgur.com/hsrpJp4.png">

http://medinavi.co.kr/search_medicine.asp?keyword1=&keyword2=%EC%9A%B0%EB%A3%A8%EC%82%AC&keyword3=%EB%8C%80%EC%9B%85%EC%A0%9C%EC%95%BD

```
우루사정100밀리그램 246501ATB
우루사정200밀리그램 246502ATB
우루사정300밀리그램 246506ATB
우루사100mg연질캅셀 246501ACS
우루사캅셀200mg 246502ACH
우루사캡슐250밀리그램 246503ACH
고덱스캡슐 427800ACH
```


<img src="https://i.imgur.com/4dEl5Dl.jpg">

출처 : http://www.docdocdoc.co.kr/news/articleView.html?idxno=1053667

## 라이브러리 로드

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

%matplotlib inline

## 데이터 로드

In [2]:
%ls open_drug/

[31mNHIS_OPEN_T60_2011_part1.CSV[m[m*
[31mNHIS_OPEN_T60_2011_part2.CSV[m[m*
[31mNHIS_OPEN_T60_2012_part1.CSV[m[m*
[31mNHIS_OPEN_T60_2012_part2.CSV[m[m*
[31mNHIS_OPEN_T60_2013_part1.CSV[m[m*
[31mNHIS_OPEN_T60_2013_part2.CSV[m[m*
[31mNHIS_OPEN_T60_2014_part1.CSV[m[m*
[31mNHIS_OPEN_T60_2014_part2.CSV[m[m*
[31mNHIS_OPEN_T60_2015_part1.CSV[m[m*
[31mNHIS_OPEN_T60_2015_part2.CSV[m[m*
[31mNHIS_OPEN_T60_2016_PART1.csv[m[m*
[31mNHIS_OPEN_T60_2016_PART2.csv[m[m*
NHIS_OPEN_T60_2018_PART1.csv
NHIS_OPEN_T60_2018_PART2.csv
NHIS_OPEN_T60_2018_PART3.csv
국민건강보험공단_의약품처방정보_2017(1).CSV
국민건강보험공단_의약품처방정보_2017(2).CSV
국민건강보험공단_의약품처방정보_2017(3).CSV


In [11]:
%ls data/ursa*

data/ursa_2011_1.csv     data/ursa_2014_2.csv     data/ursa_2017_2016.csv
data/ursa_2011_2.csv     data/ursa_2015_1.csv     data/ursa_2017_3.csv
data/ursa_2012_1.csv     data/ursa_2015_2.csv     data/ursa_2018_1.csv
data/ursa_2012_2.csv     data/ursa_2016_1.csv     data/ursa_2018_2.csv
data/ursa_2013_1.csv     data/ursa_2016_2.csv     data/ursa_2018_3.csv
data/ursa_2013_2.csv     data/ursa_2017_1.csv
data/ursa_2014_1.csv     data/ursa_2017_2.csv


In [4]:
# 2015, 2014, 2013, 2012, 2011
year = 2017
part = 3
# nhis = pd.read_csv(f"open_drug/NHIS_OPEN_T60_{year}_PART{part}.CSV", encoding="cp949")
nhis = pd.read_csv(f"open_drug/국민건강보험공단_의약품처방정보_{year}({part}).CSV", encoding="cp949")
nhis.shape

(9203451, 15)

In [12]:
# 우루사정100밀리그램 246501ATB
# 우루사정200밀리그램 246502ATB
# 우루사정300밀리그램 246506ATB
# 우루사100mg연질캅셀 246501ACS
# 우루사캅셀200mg 246502ACH
# 우루사캡슐250밀리그램 246503ACH
# 고덱스캡슐 427800ACH
# ["246501ATB", "246502ATB", "246506ATB", 
# "246501ACS", "246502ACH", "246503ACH", "427800ACH"]

ursa = nhis[
    nhis["약품일반성분명코드"].isin(
        ["246501ATB", "246502ATB", "246506ATB", 
         "246501ACS", "246502ACH", "246503ACH", "427800ACH"])]
ursa

Unnamed: 0,기준년도,가입자일련번호,처방내역일련번호,일련번호,성별코드,연령대코드(5세단위),시도코드,요양개시일자,약품일반성분명코드,1회 투약량,1일투약량,총투여일수,단가,금액,데이터 공개일자
2069,2017,764717,275265,1,1,14,11,20170524,246502ATB,1.0,3,30,181.0,16290,20181126
2074,2017,764717,4572945,1,1,14,11,20170425,246502ATB,1.0,3,30,181.0,16290,20181126
2096,2017,764717,20045648,1,1,14,11,20170123,246502ATB,1.0,3,30,181.0,16290,20181126
2105,2017,764717,28098469,1,1,14,11,20170223,246502ATB,1.0,3,30,181.0,16290,20181126
2135,2017,764717,41002118,1,1,14,11,20170327,246502ATB,1.0,3,30,181.0,16290,20181126
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9199737,2017,999901,33008472,1,1,11,28,20170302,246501ATB,1.0,2,70,90.0,12600,20181126
9199744,2017,999901,34238938,1,1,11,28,20170913,246501ATB,1.0,2,70,90.0,12600,20181126
9199755,2017,999901,47457167,1,1,11,28,20170717,246501ATB,1.0,2,56,90.0,10080,20181126
9200941,2017,999939,19460073,3,1,7,43,20170501,246501ATB,2.0,3,7,90.0,3780,20181126


In [6]:
# 고덱스인 427800ACH 만 가져와서 데이터의 갯수가 몇개인지 shape 로 봅니다.
ursa[ursa["약품일반성분명코드"] == "427800ACH"].shape

(4703, 15)

In [7]:
ursa.shape

(17042, 15)

In [8]:
# "약품일반성분명코드"로 value_counts 를 구합니다.
ursa["약품일반성분명코드"].value_counts()

246501ATB    6242
246502ATB    5958
427800ACH    4703
246506ATB     134
246503ACH       5
Name: 약품일반성분명코드, dtype: int64

In [9]:
# csv 파일로 저장합니다
ursa.to_csv(f"data/ursa_{year}_{part}.csv", index=False)

In [10]:
# 제대로 저장되었는지 확인합니다.
pd.read_csv(f"data/ursa_{year}_{part}.csv").shape

(17042, 15)