### Clinical Manual Querying
* py2sas(saspy)를 통해 임상시험 간 발생할 수 있는 query listing을 수행하고자 함
* 가능한 SAS 및 SAS SQL 래퍼(proc sql)를 사용함

#### 환경
* 임상시험 [SAS Datasets (dmisimportant)](https://cafe.naver.com/dmisimportant/104)
* SAS ODA
* miniconda, saspy

#### SAS 세션 연결 및 설정
* SAS 세션 설정 및 SAS 오브젝트 획득
* SAS 라이브러리 설정

In [20]:
import saspy
import pandas as pd

sascfgfile="c:/code/pub/ct/sascfg.py"
sas=saspy.SASsession(cfgfile=sascfgfile)
dbsfile="c:/code/CUBEDEMO2017/spec.xlsx"
dbs=pd.read_excel(dbsfile)
_=sas.submit("""
%let PATH=/home/u63544628/cubedemo;
libname CUBEDEMO "&PATH.";
""")

Using SAS Config named: oda
SAS Connection established. Subprocess id is 5108



#### Dataset 확인
* 라이브러리가 로드됐는지 확인하기 위함

In [2]:
datasets={q:sas.sasdata(q,"CUBEDEMO") for q in dbs.DOMAIN.unique()}
_=sas.submit("""
proc sql;
create table FMT as
    select * from DICTIONARY.COLUMNS where libname="CUBEDEMO"
;
run;
""")
fmt=sas.sd2df("FMT")
fmt.dropna(axis=1)

Unnamed: 0,libname,memname,memtype,name,type,length,npos,varnum,sortedby,xtype,notnull,precision,transcode
0,CUBEDEMO,AE,DATA,SUBJID,char,24.0,160.0,1.0,0.0,char,no,0.0,yes
1,CUBEDEMO,AE,DATA,VISIT,num,8.0,0.0,2.0,0.0,num,no,0.0,yes
2,CUBEDEMO,AE,DATA,SEQ,num,8.0,8.0,3.0,0.0,num,no,0.0,yes
3,CUBEDEMO,AE,DATA,AETERM,char,765.0,184.0,4.0,0.0,char,no,0.0,yes
4,CUBEDEMO,AE,DATA,AESTDTC,char,30.0,949.0,5.0,0.0,char,no,0.0,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...
286,CUBEDEMO,VS,DATA,SYSBP,num,8.0,32.0,7.0,0.0,num,no,0.0,yes
287,CUBEDEMO,VS,DATA,DIABP,num,8.0,40.0,8.0,0.0,num,no,0.0,yes
288,CUBEDEMO,VS,DATA,PULSE,num,8.0,48.0,9.0,0.0,num,no,0.0,yes
289,CUBEDEMO,VS,DATA,RESP,num,8.0,56.0,10.0,0.0,num,no,0.0,yes


#### SAE Listing
* SAE Reconciliation을 위한 listing
* CUBEDEMO 데이터셋에는 **특정 도메인에만 존재하는 USUBJID나 대부분의 값이 null인 row가 있음**
    * 쿼리 결과에 인덱스나 변수값 대부분이 null인 row가 있을 수 있음

In [15]:
_=sas.submit("""
data LBLYN;
infile datalines dlm="," dsd;
input CD LBL $50.;
datalines;
1,YES
2,NO
;
run;

data LBLSEX;
infile datalines dlm="," dsd;
input CD SEX $50.;
datalines;
1,MALE
2,FEMALE
;
run;

data LBLOUT;
infile datalines dlm="," dsd;
input CD OUTCOME $50.;
datalines;
1,FATAL
2,NOT RECOVERED/NOT RESOLVED
3,RECOVERED/RESOLVED
4,RECOVERED/RESOLVED WITH SEQUELAE
5,RECOVERING/RESOLVING
6,UNKNOWN
;
run;

data LBLSER;
infile datalines dlm="," dsd;
input CD SAE_CATEGORY $50.;
datalines;
1,NO
2,DEATH
3,HOSPITALIZATION
4,LIFE THREATENING
5,CONGENITAL ANOMALY OR BIRTH DEFECT
6,SIGNIFICANT DISABILITY
7,OTHER MEDICALLY IMPORTANT EVENT
;
run;

data LBLSEV;
infile datalines dlm="," dsd;
input CD SEVERITY $50.;
datalines;
1,MILD
2,MODERATE
3,SEVERE
;
run;

data LBLREL;
infile datalines dlm="," dsd;
input CD RELATIONSHIP $50.;
datalines;
1,NOT RELATED
2,UNLIKELY RELATED
3,POSSIBILY RELATED
4,RELATED
;
run;

data LBLACN;
infile datalines dlm="," dsd;
input CD ACTION_TAKEN $50.;
datalines;
1,DOSE INCREASED
2,DOSE NOT CHANGED
3,DOSE REDUCED
4,DRUG INTERRUPTED
5,DRUG WITHDRAWN
6,NOT APPLICABLE
7,UNKNOWN
;
run;

data IDXDM;
set CUBEDEMO.DM;
drop FERTILE BRTHDTC;
USUBJID=SUBJID;
STUDYID=substr(SUBJID,1,1);
SITEID=substr(SUBJID,find(SUBJID,"-")+1,2);
SUBJID=substr(SUBJID,length(SUBJID)-2,3);
run;

proc sql;
create table SAEL as
select 
USUBJID,STUDYID,SITEID,SUBJID,SEX,AGE,SEQ,AETERM,AESTDTC,AEENDTC,
SEVERITY,RELATIONSHIP,ACTION_TAKEN,OUTCOME,SERIOUS,SAE_CATEGORY
from (
    select IDXDM.USUBJID,IDXDM.STUDYID,IDXDM.SITEID,IDXDM.SUBJID,IDXDM.AGE,
    AE.SEQ,AE.AETERM,AE.AESTDTC,AE.AEENDTC,
    IDXDM.SEX as SEX_,
    AE.AESEV as SEV_,
    AE.AEREL as REL_,
    AE.AEACN as ACN_,
    AE.AEOUT as OUT_,
    AE.AESER as SER_,
    case when (AE.AESER=1 or AE.AESER is null) then "NO" else "YES" end as SERIOUS
    from IDXDM
    right join CUBEDEMO.AE as AE on IDXDM.USUBJID=AE.SUBJID
    )
left join LBLSEX on SEX_=LBLSEX.CD
left join LBLSEV on SEV_=LBLSEV.CD
left join LBLREL on REL_=LBLREL.CD
left join LBLACN on ACN_=LBLACN.CD
left join LBLOUT on OUT_=LBLOUT.CD
left join LBLSER on SER_=LBLSER.CD
order by SITEID,SUBJID
;
run;
""")
sael=sas.sd2df("SAEL").set_index("USUBJID")
sael.head()

Unnamed: 0_level_0,STUDYID,SITEID,SUBJID,SEX,AGE,SEQ,AETERM,AESTDTC,AEENDTC,SEVERITY,RELATIONSHIP,ACTION_TAKEN,OUTCOME,SERIOUS,SAE_CATEGORY
USUBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
,,,,,,1.0,TEST,2017-02-01,2017-02-16,SEVERE,UNLIKELY RELATED,DOSE REDUCED,NOT RECOVERED/NOT RESOLVED,YES,LIFE THREATENING
,,,,,,1.0,asdf,2017-02-08,,,,,RECOVERING/RESOLVING,NO,
,,,,,,2.0,Cold,2017-02-02,2017-02-14,MILD,RELATED,DRUG INTERRUPTED,UNKNOWN,YES,DEATH
S-1Z-005,S,1Z,5.0,FEMALE,33.0,1.0,Limb discomfort,2016-02-10,2016-07-02,,,DOSE INCREASED,FATAL,YES,DEATH
S-1Z-005,S,1Z,5.0,FEMALE,33.0,2.0,Angina pectoris,2016-06-15,2016-06-15,,,DOSE REDUCED,,NO,


#### AE Action Taken이 약물 투여이나 관련 병용약물 없음
* AE의 AEACNOTH에 따라 CM에 해당 AE indicating 약물이 없는 경우를 확인함

In [4]:
_=sas.submit("""
proc sql;
create table AECM as
select distinct SUBJID as USUBJID,AETERM,
""||trim(SUBJID)||": having AECM action for AE ("||trim(AETERM)||") without CM" as MSG
from CUBEDEMO.AE as AE
left join (
    select SUBJID as USUBJID,
    substr(CMINDCAE,3) as CMINDCAE,
    CMTRT from CUBEDEMO.CM ) as CM
on (AE.AETERM=CM.CMINDCAE and AE.SUBJID=CM.USUBJID)
where (AE.AEACNOTH in (2,4) and CMINDCAE is null)
order by USUBJID
;
run;
""")
aecm=sas.sd2df("AECM").set_index("USUBJID")
aecm

Unnamed: 0_level_0,AETERM,MSG
USUBJID,Unnamed: 1_level_1,Unnamed: 2_level_1
S-1Z-005,Asthma,S-1Z-005: having AECM action for AE (Asthma) w...
S-1Z-005,Limb discomfort,S-1Z-005: having AECM action for AE (Limb disc...
S-1Z-005,Liver damage,S-1Z-005: having AECM action for AE (Liver dam...
S-1Z-024,Syncope,S-1Z-024: having AECM action for AE (Syncope) ...
S-1Z-034,Dizziness,S-1Z-034: having AECM action for AE (Dizziness...
S-1Z-036,Fatigue extreme,S-1Z-036: having AECM action for AE (Fatigue e...
S-2Z-004,Nausea,S-2Z-004: having AECM action for AE (Nausea) w...
S-2Z-008,Astigmatism,S-2Z-008: having AECM action for AE (Astigmati...
S-2Z-009,AETEST01,S-2Z-009: having AECM action for AE (AETEST01)...
S-2Z-020,other SAE,S-2Z-020: having AECM action for AE (other SAE...


#### Concomitant Medication vs. Administration Route
* 비정형 데이터인 CMTRT에 대해 adm. route가 특이한 경우를 확인함

In [22]:
_=sas.submit("""
proc sql;
create table CMPO as
select SUBJID as USUBJID,CMTRT,CMROUTE,
""||trim(SUBJID)||": unusual adm. route: "||trim(CMTRT)||": "||trim(CMROUTE)||"" as MSG
from CUBEDEMO.CM
where CMROUTE<>'Oral' and (
CMTRT like '%CAP%' or
CMTRT like '%SC%' or
CMTRT like '%TAB%' or
CMTRT like '%SYR%' or
CMTRT like '%SOLN%')
order by USUBJID
;
""")
cmpo=sas.sd2df("CMPO").set_index("USUBJID")
cmpo

Unnamed: 0_level_0,CMTRT,CMROUTE,MSG
USUBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
S-1Z-007,DESARTAN TAB 16mg,,S-1Z-007: unusual adm. route: DESARTAN TAB 16mg:
S-1Z-007,ATENOLOL KYEUNG IN TAB 100mg,,S-1Z-007: unusual adm. route: ATENOLOL KYEUNG ...
S-1Z-012,DESARTAN TAB 16mg,Intramuscular,S-1Z-012: unusual adm. route: DESARTAN TAB 16m...
S-1Z-012,NEWROSTIN TAB,,S-1Z-012: unusual adm. route: NEWROSTIN TAB:
S-1Z-036,ERDOS CAP,dasd,S-1Z-036: unusual adm. route: ERDOS CAP: dasd
S-1Z-037,ESPROLE TAB 20mg,,S-1Z-037: unusual adm. route: ESPROLE TAB 20mg:
S-2Z-003,KANGJUNGBING TAB,Intramuscular,S-2Z-003: unusual adm. route: KANGJUNGBING TAB...
S-2Z-005,ROUTINES TAB,Topical,S-2Z-005: unusual adm. route: ROUTINES TAB: To...
S-2Z-005,ROUTINES TAB,Intravenous,S-2Z-005: unusual adm. route: ROUTINES TAB: In...
S-2Z-020,TYLENOL ER TAB 325mg,Intramuscular,S-2Z-020: unusual adm. route: TYLENOL ER TAB 3...


#### CM Administration Deviation per AE Term
* CM로 대응하는 AE에 대해, 해당 AE가 끝났는데도 대응 CM이 지속 투여되는 것으로 짐작되는 경우
    * 해당 AE가 CM으로 대응되는 경우 (AEACNOTH in (2,4))
        * CM이 지속 투여중인데 AE 발현종료일이 있는 경우 (CMONGO==1 and notna(AEENDTC))
        * CM이 지속 투여중인데 CM 투여종료일이 있는 경우 (CMONGO==1 and notna(CMENDTC))
        * CM 투여종료일이 AE 발현종료일의 이전인 경우 (CMENDTC > AEENDTC)
* 결과는 구문적으로 확인이 필요한 경우이며 의미적으로 틀린 경우도 있음
    * Anxiety Aggravated에 도파민이 투약됨
    * Death 발현종료일 이후 로수바스타틴이 투약됨
    * Cold 발현종료일 이후 질정이 투약됨
    * Eye abcess에 대해 에스시탈로프람이 투약됨

In [6]:
_=sas.submit("""
proc sql;
create table AECMWD as
select distinct AE.SUBJID as USUBJID,
AE.AETERM,CM.CMINDCAE,CM.CMTRT,
translate(CM.CMENDTC,"01","UK") as CMENDTC,
translate(AE.AEENDTC,"01","UK") as AEENDTC,
CM.CMONGO,
""||trim(AE.SUBJID)||": CM adm. date / status deviates from AE" as MSG
from CUBEDEMO.AE as AE
inner join ( 
	select CM.SUBJID,CM.CMTRT,CM.CMENDTC,CM.CMONGO,
	substr(CM.CMINDCAE,3) as CMINDCAE
	from CUBEDEMO.CM ) as CM
on AE.SUBJID=CM.SUBJID and 
(AE.AETERM is not null and CM.CMINDCAE is not null and AE.AETERM=CM.CMINDCAE)
where AEACNOTH in (2,4) and
(CMONGO=1 and AEENDTC is not null) or
(CMONGO=1 and CMENDTC is not null) or
(CMENDTC > AEENDTC)
order by USUBJID
;
run;
""")
aecmwd=sas.sd2df("AECMWD")
aecmwd.set_index("USUBJID")

Unnamed: 0_level_0,AETERM,CMINDCAE,CMTRT,CMENDTC,AEENDTC,CMONGO,MSG
USUBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
S-1Z-018,Localized itching,Localized itching,PLAKON POWD 3mg,2016-12-04,2016-12-02,,S-1Z-018: CM adm. date / status deviates from AE
S-1Z-022,Anxiety aggravated,Anxiety aggravated,DOPAMINE DAEWOO INJ 200mg/5ml,2017-02-17,,,S-1Z-022: CM adm. date / status deviates from AE
S-1Z-022,Anxiety aggravated,Anxiety aggravated,XANAX XR TAB 0.5mg,2017-02-02,,,S-1Z-022: CM adm. date / status deviates from AE
S-1Z-026,Vomiting,Vomiting,BUSCOPAN TAB,2016-02-10,2016-02-08,,S-1Z-026: CM adm. date / status deviates from AE
S-1Z-029,Drug allergy,Drug allergy,TALLERGY TAB 10mg,2017-02-02,2017-02-01,,S-1Z-029: CM adm. date / status deviates from AE
S-1Z-032,Fever,Fever,TYLENOL TAB 500mg,2016-10-28,,,S-1Z-032: CM adm. date / status deviates from AE
S-1Z-033,Fever,Fever,TYLENOL TAB 500mg,2016-10-28,,,S-1Z-033: CM adm. date / status deviates from AE
S-1Z-033,Fever,Fever,TYLENOL TAB 500mg,2016-10-28,2016-10-21,,S-1Z-033: CM adm. date / status deviates from AE
S-1Z-033,Fever,Fever,TYLENOL TAB 500mg,2017-02-19,,,S-1Z-033: CM adm. date / status deviates from AE
S-1Z-033,Fever,Fever,TYLENOL TAB 500mg,2017-02-19,2016-10-21,,S-1Z-033: CM adm. date / status deviates from AE


#### Informed Consented Date vs. Baseline Visit Date
* ICDTC가 Baseline SVDTC보다 최근인 경우를 확인함

In [7]:
_=sas.submit("""
proc sql;
create table ENSV as
select EN.SUBJID as USUBJID,EN.ICDTC,SV.SVDTC
from CUBEDEMO.EN as EN
left join CUBEDEMO.SV as SV on (EN.SUBJID=SV.SUBJID and SV.VISIT=1)
;
create table ENSV as
select *,""||trim(USUBJID)||": earlier or no visit date: "||trim(ICDTC)||", "||trim(SVDTC)||"" as MSG
from ENSV 
where ICDTC > SVDTC
order by USUBJID
;
run;
""")
ensv=sas.sd2df("ENSV").set_index("USUBJID")
ensv

Unnamed: 0_level_0,ICDTC,SVDTC,MSG
USUBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
S-1Z-004,2017-02-02,,"S-1Z-004: earlier or no visit date: 2017-02-02,"
S-1Z-008,2017-02-08,2017-02-07,S-1Z-008: earlier or no visit date: 2017-02-08...
S-1Z-009,2017-02-07,2017-02-01,S-1Z-009: earlier or no visit date: 2017-02-07...
S-1Z-011,2017-02-08,2017-02-01,S-1Z-011: earlier or no visit date: 2017-02-08...
S-1Z-012,2017-02-17,2016-02-01,S-1Z-012: earlier or no visit date: 2017-02-17...
S-1Z-013,2017-02-05,2017-02-01,S-1Z-013: earlier or no visit date: 2017-02-05...
S-2Z-001,2017-02-17,2017-02-01,S-2Z-001: earlier or no visit date: 2017-02-17...
S-2Z-026,2017-02-02,,"S-2Z-026: earlier or no visit date: 2017-02-02,"
S-2Z-035,2017-02-03,2017-02-02,S-2Z-035: earlier or no visit date: 2017-02-03...
S-3Z-019,2017-02-02,2017-01-16,S-3Z-019: earlier or no visit date: 2017-02-02...


#### Extraneous Medical History
* 프로토콜에 180일 이전 병력은 입력하지 않는 것으로 명시됨을 가정, 해당 경우를 확인함

In [8]:
_=sas.submit("""
proc sql;
create table MHSV as 
    select MH.SUBJID,MH.MHTERM,
    translate(MH.MHENDTC,"01","UK") as MHENDTC,
    SV.SVDTC 
    from CUBEDEMO.MH as MH 
    left join CUBEDEMO.SV as SV
    on MH.SUBJID=SV.SUBJID
    where (SV.VISIT=1 and SV.SVDTC is not null)
    ;
run;

data MHSV;
    set MHSV;
    DTDELTA=input(MHENDTC,yymmdd10.)-input(SVDTC,yymmdd10.);
run;

proc sql;
create table MHSV as 
    select *,""||trim(SUBJID)||": extraneous MH as per protocol: "||put(DTDELTA,8.)||" days" as MSG
	from MHSV 
    where (DTDELTA<=-180 and DTDELTA is not null)
	;
run;
""")
mhsv=sas.sd2df("MHSV").set_index("SUBJID")
mhsv

Unnamed: 0_level_0,MHTERM,MHENDTC,SVDTC,DTDELTA,MSG
SUBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
S-1Z-012,Benign tumor of liver,2015-02-04,2016-02-01,-362.0,S-1Z-012: extraneous MH as per protocol: -...
S-1Z-030,Hyper HDL cholesterolaemia,2016-01-01,2016-07-21,-202.0,S-1Z-030: extraneous MH as per protocol: -...
S-1Z-036,Angina unstable,2016-03-17,2017-03-02,-350.0,S-1Z-036: extraneous MH as per protocol: -...
S-2Z-010,Pneumonia,2016-07-01,2017-01-29,-212.0,S-2Z-010: extraneous MH as per protocol: -...
S-2Z-014,Ache breast,2009-02-03,2016-02-02,-2555.0,S-2Z-014: extraneous MH as per protocol: -2...
S-2Z-015,Head banging,2012-10-14,2016-03-03,-1236.0,S-2Z-015: extraneous MH as per protocol: -1...
S-2Z-022,Ache breast,2016-01-01,2016-08-08,-220.0,S-2Z-022: extraneous MH as per protocol: -...
S-2Z-038,Pain biliary,2014-09-14,2015-03-15,-182.0,S-2Z-038: extraneous MH as per protocol: -...
S-2Z-038,Pain,2014-09-15,2015-03-15,-181.0,S-2Z-038: extraneous MH as per protocol: -...
S-3Z-003,Dorsal slit of prepuce,2004-02-17,2015-03-04,-4033.0,S-3Z-003: extraneous MH as per protocol: -4...


In [9]:
sas.endsas()

SAS Connection terminated. Subprocess id was 14452


### 후기
* Listing에서 발견되는 의미적 오류를 통해 clinical manual query의 필요성을 알 수 있음
    * 비정형 데이터와 관련된 문제들: 이상한 적응증의 CM과 administration route 등
* 구문적 쿼리는 최대한 edit check로 구현해야 함
* 의미적 쿼리는 가능한 구문적 쿼리에 가깝게 만들어 listing해야 함
    * Listing 전에 AE, CM, MH 코딩이 이루어져야 함
* Data Validation Specification의 Query Specification, Manual Query의 내용을 파싱하고 자동화하기