In [1]:
from sqlalchemy import create_engine
import pandas as pd
import os
from dotenv import load_dotenv

load_dotenv(verbose=True,
            dotenv_path='./.env')
engine = create_engine(f'postgresql://{os.getenv("USERNAME")}:{os.getenv("PASSWORD")}@{os.getenv("HOST")}/{os.getenv("DATABASE")}', 
                       connect_args={'options': '-csearch_path={}'.format('de')})

In [2]:
drug_exposure = pd.read_sql_table('drug_exposure', engine)

# No.3
drug_exposure 테이블은 환자가 병원에서 처방받은 약의 종류와 처방시작일과 종료일에 대한 정보를 포함하고 있습니다.  
환자번호 ‘1891866’ 환자의 약 처방 데이터에서 처방된 약의 종류별로 처음 시작일, 마지막 종료일, 복용일(마지막 종료일과 처음시작일의 차이)을 구하고 복용일이 긴 순으로 정렬하여 테이블을
생성합니다.  
- 환자번호 : person_id, 약의 종류 : drug_concept_id, 처방시작일 : drug_exposure_start_date, 처방종료일 : drug_exposure_end_date

In [3]:
drug_exposure.head()

Unnamed: 0,drug_exposure_id,person_id,drug_concept_id,drug_exposure_start_date,drug_exposure_start_datetime,drug_exposure_end_date,drug_exposure_end_datetime,verbatim_end_date,drug_type_concept_id,stop_reason,...,sig,route_concept_id,lot_number,provider_id,visit_occurrence_id,visit_detail_id,drug_source_value,drug_source_concept_id,route_source_value,dose_unit_source_value
0,40900862,26922,19073183,2017-05-04,2017-05-04 01:41:54,2017-05-18,2017-05-18 01:41:54,2017-05-18,38000177,,...,,0,0,0,99499216,0,308182,19073183,,
1,40757313,2955,40231925,2016-07-24,2016-07-24 13:28:53,2016-09-23,2016-09-23 13:28:53,2016-09-23,38000177,,...,,0,0,0,9251642,0,1049221,40231925,,
2,52808614,2955,40229134,2015-04-02,2015-04-02 13:28:53,2015-04-16,2015-04-16 13:28:53,2015-04-16,38000177,,...,,0,0,0,57618650,0,1043400,40229134,,
3,52808615,2955,1115171,2016-07-24,2016-07-24 13:28:53,2016-08-23,2016-08-23 13:28:53,2016-08-23,38000177,,...,,0,0,0,9251642,0,849574,1115171,,
4,111107864,2955,40213154,2011-04-04,2011-04-04 13:28:53,2011-04-04,2011-04-04 13:28:53,2011-04-04,581452,,...,,0,0,0,57618654,0,140,40213154,,


In [4]:
from datetime import timedelta

In [5]:
# 환자의 복용 내역과 각 복용 케이스 별 복용 기간을 구한다
patient = drug_exposure[drug_exposure.person_id == 1891866][['person_id', 'drug_concept_id', 'drug_exposure_start_date', 'drug_exposure_end_date']]
patient['period'] = patient['drug_exposure_end_date'] - patient['drug_exposure_start_date'] + timedelta(days=1)

In [6]:
patient.head()

Unnamed: 0,person_id,drug_concept_id,drug_exposure_start_date,drug_exposure_end_date,period
29761,1891866,19009384,1959-12-01,1959-12-01,1 days
29762,1891866,19009384,1965-02-02,1965-04-20,78 days
29763,1891866,19009384,1965-04-20,1965-04-27,8 days
29764,1891866,19009384,1965-04-27,1965-11-16,204 days
29765,1891866,19009384,1965-11-16,1966-02-15,92 days


In [7]:
# groupby를 이용해 약의 종류 별로 총 복용 기간의 합을 구한다
grouped = patient['period'].groupby(patient['drug_concept_id'])
answer3 = grouped.sum().sort_values(ascending=False)

In [8]:
answer3.reset_index()

Unnamed: 0,drug_concept_id,period
0,19009384,14424 days
1,1539463,5484 days
2,19030765,1214 days
3,40213154,10 days
4,40213227,1 days
