### Clinical Manual Querying
* py2sas(saspy)를 통해 임상시험 간 발생할 수 있는 query listing을 수행하고자 함
* 가능한 SAS 및 SAS SQL 래퍼(proc sql)를 사용함

#### 환경
* 임상시험 [SAS Datasets (dmisimportant)](https://cafe.naver.com/dmisimportant/104)
* SAS ODA
* miniconda, saspy

#### SAS 세션 연결 및 설정
* SAS 세션 설정 및 SAS 오브젝트 획득
* SAS 라이브러리 설정

In [38]:
import saspy
import pandas as pd

sascfgfile="c:/code/pub/ct/sascfg.py"
sas=saspy.SASsession(cfgfile=sascfgfile)
dbsfile="c:/code/CUBEDEMO2017/spec.xlsx"
dbs=pd.read_excel(dbsfile)
_=sas.submit("""
%let ROOT=/home/u63544628;
%let CUBE=/home/u63544628/cubedemo;
%let EXTN=/home/u63544628/extern;
libname CUBEDEMO "&CUBE.";
""")

Using SAS Config named: oda
SAS Connection established. Subprocess id is 16616



#### Dataset 확인
* 라이브러리가 로드됐는지 확인함

In [14]:
# datasets={q:sas.sasdata(q,"CUBEDEMO") for q in dbs.DOMAIN.unique()}
_=sas.submit("""
proc sql;
create table FMT as
    select * from DICTIONARY.COLUMNS where libname="CUBEDEMO"
;
run;
""")
fmt=sas.sd2df("FMT")
fmt.dropna(axis=1).sample(5)

Unnamed: 0,libname,memname,memtype,name,type,length,npos,varnum,sortedby,xtype,notnull,precision,transcode
20,CUBEDEMO,AE,DATA,INV_SOCCD,num,8.0,104.0,21.0,0.0,num,no,0.0,yes
273,CUBEDEMO,SN,DATA,SNNAME,char,30.0,32.0,3.0,0.0,char,no,0.0,yes
69,CUBEDEMO,CY,DATA,SUBJID,char,24.0,16.0,1.0,0.0,char,no,0.0,yes
205,CUBEDEMO,PD,DATA,PDTJRE,num,8.0,80.0,13.0,0.0,num,no,0.0,yes
281,CUBEDEMO,SV,DATA,SVNVDES,num,8.0,8.0,4.0,0.0,num,no,0.0,yes


#### SAE Listing #1
* SAE Reconciliation을 위한 listing
* CUBEDEMO 데이터셋에는 **특정 도메인에만 존재하는 USUBJID나 대부분의 값이 null인 row가 있음**
    * 쿼리 결과에 인덱스나 변수값 대부분이 null인 row가 있을 수 있음

In [15]:
_=sas.submit("""
data LBLYN;
infile datalines dlm="," dsd;
input CD LBL $50.;
datalines;
1,YES
2,NO
;
run;

data LBLSEX;
infile datalines dlm="," dsd;
input CD SEX $50.;
datalines;
1,MALE
2,FEMALE
;
run;

data LBLOUT;
infile datalines dlm="," dsd;
input CD OUTCOME $50.;
datalines;
1,FATAL
2,NOT RECOVERED/NOT RESOLVED
3,RECOVERED/RESOLVED
4,RECOVERED/RESOLVED WITH SEQUELAE
5,RECOVERING/RESOLVING
6,UNKNOWN
;
run;

data LBLSER;
infile datalines dlm="," dsd;
input CD SAE_CATEGORY $50.;
datalines;
1,NO
2,DEATH
3,HOSPITALIZATION
4,LIFE THREATENING
5,CONGENITAL ANOMALY OR BIRTH DEFECT
6,SIGNIFICANT DISABILITY
7,OTHER MEDICALLY IMPORTANT EVENT
;
run;

data LBLSEV;
infile datalines dlm="," dsd;
input CD SEVERITY $50.;
datalines;
1,MILD
2,MODERATE
3,SEVERE
;
run;

data LBLREL;
infile datalines dlm="," dsd;
input CD RELATIONSHIP $50.;
datalines;
1,NOT RELATED
2,UNLIKELY RELATED
3,POSSIBILY RELATED
4,RELATED
;
run;

data LBLACN;
infile datalines dlm="," dsd;
input CD ACTION_TAKEN $50.;
datalines;
1,DOSE INCREASED
2,DOSE NOT CHANGED
3,DOSE REDUCED
4,DRUG INTERRUPTED
5,DRUG WITHDRAWN
6,NOT APPLICABLE
7,UNKNOWN
;
run;

data IDXDM;
set CUBEDEMO.DM;
drop FERTILE BRTHDTC;
USUBJID=SUBJID;
STUDYID=substr(SUBJID,1,1);
SITEID=substr(SUBJID,find(SUBJID,"-")+1,2);
SUBJID=substr(SUBJID,length(SUBJID)-2,3);
run;

proc sql;
create table SAEL as
select 
USUBJID,STUDYID,SITEID,SUBJID,SEX,AGE,SEQ,AETERM,AESTDTC,AEENDTC,
SEVERITY,RELATIONSHIP,ACTION_TAKEN,OUTCOME,SERIOUS,SAE_CATEGORY
from (
    select IDXDM.USUBJID,IDXDM.STUDYID,IDXDM.SITEID,IDXDM.SUBJID,IDXDM.AGE,
    AE.SEQ,AE.AETERM,AE.AESTDTC,AE.AEENDTC,
    IDXDM.SEX as SEX_,
    AE.AESEV as SEV_,
    AE.AEREL as REL_,
    AE.AEACN as ACN_,
    AE.AEOUT as OUT_,
    AE.AESER as SER_,
    case when (AE.AESER=1 or AE.AESER is null) then "NO" else "YES" end as SERIOUS
    from IDXDM
    right join CUBEDEMO.AE as AE on IDXDM.USUBJID=AE.SUBJID
    )
left join LBLSEX on SEX_=LBLSEX.CD
left join LBLSEV on SEV_=LBLSEV.CD
left join LBLREL on REL_=LBLREL.CD
left join LBLACN on ACN_=LBLACN.CD
left join LBLOUT on OUT_=LBLOUT.CD
left join LBLSER on SER_=LBLSER.CD
order by SITEID,SUBJID
;
run;
""")
sael=sas.sd2df("SAEL").set_index("USUBJID")
sael.sample(5)

Unnamed: 0_level_0,STUDYID,SITEID,SUBJID,SEX,AGE,SEQ,AETERM,AESTDTC,AEENDTC,SEVERITY,RELATIONSHIP,ACTION_TAKEN,OUTCOME,SERIOUS,SAE_CATEGORY
USUBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
S-2Z-039,S,2Z,39,MALE,10.0,3.0,,,,,,DOSE REDUCED,,NO,
S-MJ-004,S,MJ,4,FEMALE,34.0,2.0,,,,,,,,YES,DEATH
S-2Z-022,S,2Z,22,MALE,29.0,1.0,Accident at work,2016-12-01,2016-12-01,SEVERE,RELATED,NOT APPLICABLE,FATAL,YES,DEATH
S-2Z-011,S,2Z,11,FEMALE,30.0,1.0,Head cold,2017-02-02,,MILD,NOT RELATED,NOT APPLICABLE,RECOVERING/RESOLVING,NO,NO
S-1Z-012,S,1Z,12,FEMALE,43.0,1.0,,,,,,,,YES,SIGNIFICANT DISABILITY


#### SAE Listing #2
* SAS의 proc format을 이용한 listing
    * Proc format과 variable label(column label)은 view에만 적용딤
    * Proc report, proc import로 세이브, 로드 필요

In [16]:
sas.submit("""
proc format;
value
yn 0="none" 1="Yes" 2="No";
value 
sex 0="none" 1="Male" 2="Female";
value
fertile 0="none" 1="Fertile" 2="Sterile";
value
aerel 0="none" 1="Not Related" 2="Unlikely Related" 3="Possibily Related" 4="Related";
value 
aeacn 0="none" 1="Dose Increased" 2="Dose Not Changed" 3="Dose Reduced" 4="Drug Interrupted" 5="Drug Withdrawn" 6="Not Applicable" 7="Unknown";
value
aeout 0="none" 1="Fatal" 2="Not Recovered/Not Resolved" 3="Recovered/Resolved" 4="Recovered/Resolved with Sequelae" 5="Recovering/Resolving" 6="Unknown";
value
aeser 0="none" 1="No" 2="Death" 3="Hospitalization" 4="Life Threatening" 5="Congenital Anomaly or Birth Defect" 6="Significant Disability" 7="Other Medically Important Event";
value
aesev 0="none" 1="Mild" 2="Moderate" 3="Severe";
run;

data SAEL1;
merge CUBEDEMO.DM (in=x) CUBEDEMO.AE (in=y);
by SUBJID;
format sex sex.
fertile fertile.
aerel aerel.
aeacn aeacn.
aeout aeout.
aeser aeser.
aesev aesev.
aeteae yn.;
length IsSerious $3.;
if y;
USUBJID=SUBJID;
STUDYID=substr(SUBJID,1,1);
SITEID =substr(SUBJID,find(SUBJID,"-")+1,2);
SUBJID =substr(SUBJID,6);
if AESER=. then IsSerious=.;
else if AESER=1 then IsSerious="No";
else IsSerious="Yes";
keep USUBJID STUDYID SITEID SUBJID SEX AGE AETERM AESTDTC AEENDTC AESEV AEREL AEACN AEOUT AESER AETEAE IsSerious;
run;

data SAEL1;
retain USUBJID STUDYID SITEID SUBJID;
set SAEL1;
run;

proc import datafile="&CUBE./spec.xlsx"
dbms=xlsx replace
out=CUBEDEMO.DBS(keep=ITEMID ITEM_LABEL);
sheet="DBSPEC";
getnames=yes;
run;

proc sort data=CUBEDEMO.DBS out=CUBEDEMO.DBL nodupkey; by ITEMID;
run;

%macro label(libname,ds,dict,dict_col,dict_lbl);
data _null_;
set &dict;
call symput("var"||left(_N_),left(&dict_col));
call symput("lbl"||left(_N_),left(&dict_lbl));
call symput("end",left(_N_));
run;

proc datasets library=&libname memtype=data nolist;
modify &ds;
label
/* for &q in range(&stop) -> &(&var[&q])=&(&lbl[&q]) */
%do q=1 %to &end;
&&var&q=&&lbl&q
%end;
;
quit;
run;
%mend;

%label(WORK,SAEL1,CUBEDEMO.DBL,ITEMID,ITEM_LABEL);

ods csv file="&EXTN./SAEL1.csv";
proc report data=SAEL1;
columns _all_;
run;
ods excel close;

proc import datafile="&EXTN./SAEL1.csv"
dbms=csv replace
out=SAEL1;
getnames=yes;
run;
""")

sael=sas.sd2df("SAEL1")
sael.set_index(["USUBJID"]).sample(5)

Unnamed: 0_level_0,STUDYID,SITEID,Screening Number,Age,Sex,Adverse event,Start date,TEAE,Outcome,End date,Serious,Severity,Relationship to study treatment,Action taken with study treatme,IsSerious
USUBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
S-1Z-033,S,1Z,33.0,36,Male,Fever,2016-10-17,No,Recovered/Resolved,2016-10-21,No,Mild,Not Related,Not Applicable,No
S-2Z-008,S,2Z,8.0,66,Male,Astigmatism,2017-01-24,No,Not Recovered/Not Resolved,,Other Medically Importan,Moderate,Possibily Related,Dose Not Changed,Yes
S-1Z-009,S,1Z,9.0,97,Male,,2017-01-03,No,,,,,,,
S-3Z-035,S,3Z,35.0,30,Male,Broken ankle,2017-02-02,Yes,Recovered/Resolved with Sequelae,2017-02-16,Congenital Anomaly or Bi,Severe,Not Related,Dose Reduced,Yes
S-2Z-030,S,2Z,30.0,40,Female,Hand and foot skin reacti,2017-02-08,No,Recovered/Resolved,2017-02-15,Hospitalization,Moderate,Not Related,Dose Not Changed,Yes


#### AE Action Taken이 약물 투여이나 관련 병용약물 없음
* AE의 AEACNOTH에 따라 CM에 해당 AE indicating 약물이 없는 경우를 확인함

In [23]:
_=sas.submit("""
proc sql;
create view _AECM as
	select distinct SUBJID as USUBJID,
	AETERM,
	cats(SUBJID,"_",AETERM) as IDX
	from CUBEDEMO.AE
	where AEACNOTH in (2,4)
	order by IDX
;
create view _CMAE as
    select distinct SUBJID as USUBJID,
    substr(CMINDCAE,3) as AETERM,
    cats(SUBJID,"_",substr(CMINDCAE,3)) as IDX
    from CUBEDEMO.CM
    where CMINDCAE is not null and CMTRT is not null
    order by IDX
;
create view AEWOCM as
	select USUBJID,AETERM,
	""||USUBJID||": having AE ("||AETERM||") with CM taken without corresponding CM" as MSG
	from _AECM
	where IDX not in (select IDX from _CMAE)
;
run;
""")
aewocm=sas.sd2df("AEWOCM").set_index("USUBJID")
aewocm.sample(5)

Unnamed: 0_level_0,AETERM,MSG
USUBJID,Unnamed: 1_level_1,Unnamed: 2_level_1
S-1Z-005,Asthma,S-1Z-005 : having AE (Asthma ...
S-3Z-004,Tooth abscess,S-3Z-004 : having AE (Tooth abs...
S-2Z-020,other SAE,S-2Z-020 : having AE (other SAE...
S-2Z-008,Astigmatism,S-2Z-008 : having AE (Astigmati...
S-2Z-004,Nausea,S-2Z-004 : having AE (Nausea ...


#### Concomitant Medication vs. Administration Route
* 비정형 데이터인 CMTRT에 대해 adm. route가 특이한 경우를 확인함

In [None]:
_=sas.submit("""
proc sql;
create table CMPO as
    select SUBJID as USUBJID,CMTRT,CMROUTE,
    ""||trim(SUBJID)||": unusual adm. route: "||trim(CMTRT)||": "||trim(CMROUTE)||"" as MSG
    from CUBEDEMO.CM
    where CMROUTE<>'Oral' and (
    lower(CMTRT) like '%cap%' or
    lower(CMTRT) like '% sc %' or
    lower(CMTRT) like '%tab%' or
    lower(CMTRT) like '%syr%' or
    lower(CMTRT) like '%soln%' )
    order by USUBJID
;
run;
""")
cmpo=sas.sd2df("CMPO").set_index("USUBJID")
cmpo.sample(5)

Unnamed: 0_level_0,CMTRT,CMROUTE,MSG
USUBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
S-JP-001,GEWORIN I TAB,,S-JP-001: unusual adm. route: GEWORIN I TAB:
S-2Z-005,ROUTINES TAB,Topical,S-2Z-005: unusual adm. route: ROUTINES TAB: To...
S-2Z-027,LITEC COMP CAP,Intravenous,S-2Z-027: unusual adm. route: LITEC COMP CAP: ...
S-2Z-020,TYLENOL ER TAB 325mg,Intramuscular,S-2Z-020: unusual adm. route: TYLENOL ER TAB 3...
S-1Z-012,DESARTAN TAB 16mg,Intramuscular,S-1Z-012: unusual adm. route: DESARTAN TAB 16m...


#### CM Administration Deviation per AE Term
* CM로 대응하는 AE에 대해, 해당 AE가 끝났는데도 대응 CM이 지속 투여되는 것으로 짐작되는 경우
    * 해당 AE가 CM으로 대응되는 경우 (AEACNOTH in (2,4) and AETERM==CMINDCAE)
        * CM이 지속 투여중인데 AE 발현종료일이 있는 경우 (CMONGO==1 and notna(AEENDTC))
        * CM이 지속 투여중인데 CM 투여종료일이 있는 경우 (CMONGO==1 and notna(CMENDTC))
        * CM 투여종료일이 AE 발현종료일의 이전인 경우 (CMENDTC > AEENDTC)
* 맥락적으로 틀린 경우도 발견됨
    * Death 발현종료일 이후 로수바스타틴 투약
    * Eye abscess에 에스시탈로프람 투약

In [42]:
_=sas.submit("""
proc sql;
create table AECMWD as
	select distinct AE.SUBJID as USUBJID,
	AE.AETERM,CM.CMINDCAE,CM.CMTRT,
	translate(CM.CMENDTC,"01","UK") as CMENDTC,
	translate(AE.AEENDTC,"01","UK") as AEENDTC,
	CM.CMONGO,
	""||trim(AE.SUBJID)||": CM adm. date / status deviates from AE cond." as MSG
	from CUBEDEMO.AE as AE
	inner join (
		select SUBJID,CMTRT,CMENDTC,CMONGO,
		substr(CMINDCAE,3) as CMINDCAE
		from CUBEDEMO.CM where CMINDCAE is not null ) as CM
	on AE.SUBJID=CM.SUBJID and AE.AETERM=CM.CMINDCAE and AE.AETERM is not null
	where AEACNOTH in (2,4) and
	(CMONGO=1 and AEENDTC is not null) or
	(CMONGO=1 and CMENDTC is not null) or
	(CMENDTC > AEENDTC)
	order by USUBJID
;
run;
""")
aecmwd=sas.sd2df("AECMWD")
aecmwd.set_index("USUBJID").sample(5)

Unnamed: 0_level_0,AETERM,CMINDCAE,CMTRT,CMENDTC,AEENDTC,CMONGO,MSG
USUBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
S-1Z-018,Localized itching,Localized itching,PLAKON POWD 3mg,2016-12-04,2016-12-02,,S-1Z-018: CM adm. date / status deviates from ...
S-1Z-022,Anxiety aggravated,Anxiety aggravated,XANAX XR TAB 0.5mg,2017-02-02,,,S-1Z-022: CM adm. date / status deviates from ...
S-1Z-033,Fever,Fever,TYLENOL TAB 500mg,2016-10-28,2016-10-21,,S-1Z-033: CM adm. date / status deviates from ...
S-2Z-017,Ache stomach,Ache stomach,IBOPROFEN KANGNAM TAB 200mg,2016-09-02,,,S-2Z-017: CM adm. date / status deviates from ...
S-2Z-035,Haemangioma,Haemangioma,cefazolin,,2017-02-10,1.0,S-2Z-035: CM adm. date / status deviates from ...


#### Lipid-lowering Medication with Primary Endpoint
* 프로토콜에 lipid-lowering medication을 복용하는 경우 LC(센트럴 랩) LDL-C가 범위내 있어야 하는 것으로 명시됨을 가정, 해당 경우를 확인함

In [25]:
_=sas.submit("""
proc sql;
create view _LLM as
	select SUBJID,CMTRT from CUBEDEMO.CM
	where (
	lower(CMTRT) like '%statin%' or
	lower(CMTRT) like '%velam%' or
	lower(CMTRT) like '%mibe%' or
	lower(CMTRT) like '%cumab%')
;
create table _LDL as
select _LLM.SUBJID as USUBJID,CMTRT,LCLDL,
""||trim(_LLM.SUBJID)||": having a lipid-lowering agent ("||trim(CMTRT)||")" as MSG
from _LLM
left join (
	select SUBJID,LCLDL,LCDTC
	from CUBEDEMO.LC
	where LCLDL is not null) as LC
on _LLM.SUBJID=LC.SUBJID
order by CMTRT
;
run;
""")
sas.sd2df("_LDL").set_index("USUBJID").sample(5)

Unnamed: 0_level_0,CMTRT,LCLDL,MSG
USUBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
S-4Z-001,ROSUVAMIBE TAB 10/10mg,LC-7-LCLDL,S-4Z-001: having a lipid-lowering agent (ROSUV...
S-4Z-001,rosuvastatin and ezetimibe,LC-2-LCLDL,S-4Z-001: having a lipid-lowering agent (rosuv...
S-4Z-001,rosuvastatin and ezetimibe,LC-5-LCLDL,S-4Z-001: having a lipid-lowering agent (rosuv...
S-4Z-001,simvastatin,LC-2-LCLDL,S-4Z-001: having a lipid-lowering agent (simva...
S-4Z-001,simvastatin,LC-7-LCLDL,S-4Z-001: having a lipid-lowering agent (simva...


#### Informed Consented Date vs. Screening Visit Date
* ICDTC가 Screening SVDTC보다 최근인 경우를 확인함
    * CUBEDEMO 프로토콜을 알 수 없으므로 SV에서 VISIT==1이면 Screening Visit으로 가정함
    * 이 데이터셋에서는 사실 ICDTC=Screening Visit Date인 것으로 추측됨

In [None]:
_=sas.submit("""
proc sql;
create table ENSV as
    select EN.SUBJID as USUBJID,EN.ICDTC,SV.SVDTC from CUBEDEMO.EN as EN
    left join CUBEDEMO.SV as SV
    on (EN.SUBJID=SV.SUBJID and SV.VISIT=1)
;
create table ENSV as
    select *,""||trim(USUBJID)||": earlier or no visit date: "||trim(ICDTC)||", "||trim(SVDTC)||"" as MSG
    from ENSV where ICDTC > SVDTC
    order by USUBJID
;
run;
""")
ensv=sas.sd2df("ENSV").set_index("USUBJID")
ensv.sample(5)

Unnamed: 0_level_0,ICDTC,SVDTC,MSG
USUBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
S1Z006,2017-02-08,,"S1Z006: earlier or no visit date: 2017-02-08,"
S-MJ-016,2017-06-28,,"S-MJ-016: earlier or no visit date: 2017-06-28,"
S-1Z-013,2017-02-05,2017-02-01,S-1Z-013: earlier or no visit date: 2017-02-05...
S-3Z-022,2017-02-14,,"S-3Z-022: earlier or no visit date: 2017-02-14,"
S-1Z-008,2017-02-08,2017-02-07,S-1Z-008: earlier or no visit date: 2017-02-08...


#### Extraneous Medical History
* 프로토콜에 180일 이전 병력은 입력하지 않는 것으로 명시됨을 가정, 해당 경우를 확인함

In [None]:
_=sas.submit("""
proc sql;
create table MHSV as 
    select MH.SUBJID,MH.MHTERM,
    translate(MH.MHENDTC,"01","UK") as MHENDTC,
    SV.SVDTC 
    from CUBEDEMO.MH as MH 
    left join CUBEDEMO.SV as SV
    on MH.SUBJID=SV.SUBJID
    where (SV.VISIT=1 and SV.SVDTC is not null)
    ;
run;

data MHSV;
    set MHSV;
    DTDELTA=input(MHENDTC,yymmdd10.)-input(SVDTC,yymmdd10.);
run;

proc sql;
create table MHSV as 
    select *,""||trim(SUBJID)||": extraneous MH as per protocol: "||put(DTDELTA,8.)||" days" as MSG
	from MHSV 
    where (DTDELTA<-180 and DTDELTA is not null)
	;
run;
""")
mhsv=sas.sd2df("MHSV").set_index("SUBJID")
mhsv.sample(5)

Unnamed: 0_level_0,MHTERM,MHENDTC,SVDTC,DTDELTA,MSG
SUBJID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
S-2Z-010,Pneumonia,2016-07-01,2017-01-29,-212.0,S-2Z-010: extraneous MH as per protocol: -...
S-2Z-014,Ache breast,2009-02-03,2016-02-02,-2555.0,S-2Z-014: extraneous MH as per protocol: -2...
S-MJ-019,Headache,2017-01-01,2017-07-06,-186.0,S-MJ-019: extraneous MH as per protocol: -...
S-2Z-022,Ache breast,2016-01-01,2016-08-08,-220.0,S-2Z-022: extraneous MH as per protocol: -...
S-1Z-012,Benign tumor of liver,2015-02-04,2016-02-01,-362.0,S-1Z-012: extraneous MH as per protocol: -...


#### Python Equivalents of Above

In [None]:
# IO
csvfile=[csvfile for csvfile in os.scandir("c:/code/cubedemo2017/sasset") if csvfile.path.lower().endswith(".csv")]
csvdata={os.path.splitext(csvfile.name)[0].upper():pd.read_csv(csvfile.path) for csvfile in csvfile}

# AE with CM taken without CM history
_aecm=csvdata["AE"].query("AEACNOTH in [2,4]")
_aecm=_aecm.assign(IDX=_aecm.SUBJID+"_"+_aecm.AETERM)
_cmae=csvdata["CM"].query("CMTRT.notna() and CMINDCAE.notna()")
_cmae=_cmae.assign(IDX=_cmae.SUBJID+"_"+_cmae.CMINDCAE.apply(lambda q:q[2:]))
aewocm=_aecm[~_aecm.IDX.isin(_cmae.IDX)][["SUBJID","AETERM"]].drop_duplicates()
aewocm=aewocm.assign(MSG=aewocm.apply(lambda q:f"{q.SUBJID} having AE ({q.AETERM}) with med. taken without corresponding CM",axis=1))

# Unusual Adm. Route
cmpo=csvdata["CM"].query("CMTRT.notna() and CMTRT.str.contains('cap|tab|syr|soln|sc',case=False,na=False) and CMROUTE!='Oral'")
cmpo=cmpo[["SUBJID","CMTRT","CMROUTE"]].assign(MSG=cmpo.apply(lambda q:f"{q.SUBJID} having CM ({q.CMTRT}) with unusual adm. route ({q.CMROUTE})",axis=1))

# AE with CM administered deviates from
aecmwd=_aecm.merge(_cmae,how="inner",on=["SUBJID","IDX"]).query("(CMONGO==1 and (AEENDTC.notna() or CMENDTC.notna())) or (CMENDTC > AEENDTC)")
aecmwd=aecmwd[["SUBJID","AETERM","CMINDCAE","CMTRT","CMENDTC","AEENDTC","CMONGO"]]
aecmwd=aecmwd.assign(MSG=aecmwd.apply(lambda q:f"{q.SUBJID} having CM ({q.CMTRT}) adm. deviates from AE ({q.AETERM}) cond.",axis=1))

# SUBJID with LLAs
_cmlla=csvdata["CM"].query("CMTRT.str.contains('statin|mibe|velam|cumab',case=False,na=False)")
_lcldl=csvdata["LC"]
cmldl=_cmlla.merge(_lcldl,how="left",on="SUBJID")[["SUBJID","CMTRT","LCLDL"]]
cmldl=_cmldl.assign(MSG=_cmldl.apply(lambda q:f"{q.SUBJID} having LLAs ({q.CMTRT})",axis=1))

# Premature ICDTC
ensv=csvdata["EN"].query("ICDTC.notna()").merge(csvdata["SV"].query("VISIT==1"),how="left",on="SUBJID").query("ICDTC>SVDTC or SVDTC.isna()")

# Extraneous MH
mhsv=csvdata["MH"].query("MHTERM.notna()").merge(csvdata["SV"].query("VISIT==1 and SVDTC.notna()"),how="left",on="SUBJID")
mhsv.MHENDTC=mhsv.MHENDTC.str.replace("UK","01")
mhsv=mhsv.assign(DTDELTA=(pd.to_datetime(mhsv.MHENDTC)-pd.to_datetime(mhsv.SVDTC)).apply(lambda q:q.days)).query("DTDELTA<=-180")

### 후기
* 구문적 쿼리는 최대한 edit check로 구현해야 함
    * 값 범위와 형, 빈 값 등 논리적 오류
* 맥락적 쿼리는 가능한 구문적 쿼리에 가깝게 만들어 listing해야 함
    * Listing 전에 AE, CM, MH 코딩이 이루어져야 함
* Listing에서 발견되는 맥락적 오류를 통해 manual query, medical review의 필요성을 알 수 있음
    * 틀린 적응증의 CM과 administration route 등
* Data Validation Specification의 Query Specification, Manual Query의 내용을 자동화하기