#### 0. saspy

In [1]:
import saspy
from IPython.display import HTML

#### 1. Config

In [2]:
sascfg="c:/code/pub/ct/sascfg.py"
sas=saspy.SASsession(cfgfile=sascfg)
sas.HTML_Style="PowerPointDark"
_=sas.submit("""
%let ROOT=/home/u63544628;
libname CUBE "/home/u63544628/cubedemo";
""")

Using SAS Config named: oda
SAS Connection established. Subprocess id is 18212



#### 2. BL - Manipulating

In [3]:
resp=sas.submit("""
title font="Consolas" "BL";

proc import datafile="&ROOT./bl.csv" out=bl dbms=csv replace;
    getnames=yes; guessingrows=300;
    run;

data bl_; set bl;
    rename RESULT=SCORE;
    format TYPE $3. RESULT 1. TYPEX $10.;
    INPUT=trim(OUTPUT);
    OUTPUT=trim(OUTPUT);
    RESULT=put(substr(RESULT,1,1),1.);

    NAN=cmiss(of BATCH--RESULT);

    if BATCH="MAS" then TYPE="MAC";
    else if BATCH="AAS" then TYPE=.;
    else TYPE="#NA";

    TYPEX=put(cats(BATCH,"-",LOCALE,"-",TYPE),$10.);

    SINCE=intck("day",DATE,today());

    if cmiss(of _all_) then delete;

    run;

data bl_; set bl_; keep BATCH INPUT OUTPUT SCORE DATE NAN SINCE; run;

proc sort data=bl_ out=bl_; by INPUT; run;
proc sort data=bl out=bl; by INPUT; run;

data bl_; merge bl_(in=x) bl(in=y); by INPUT; if x; run;

data bl_; set bl_; drop BATCH; IDX="bl"||left(put(_N_,10.)); run;

proc print data=bl_(obs=3) noobs; run;
""")
HTML(resp["LST"])

INPUT,OUTPUT,DATE,SCORE,NAN,SINCE,LOCALE,RESULT,IDX
#1 Companion For Fortnit,#1 Companion For Fortnite,2023-04-15,4,0,145,,,bl1
#DRIVE,#DRIVE,2023-05-12,2,0,118,,,bl2
#DRIVE,#DRIVE,2022-07-18,5,0,416,,,bl3


#### 3. CUBE - CMDM - Manipulating

In [4]:
resp=sas.submit("""
title font="Consolas" "DMCM with CMINDCAE";

/* 포맷 딕셔너리 구성 */
proc format;
	value BOOL .="False" 1="True";
	value YN .="none" 1="Yes" 2="No";
	value SEX .="none" 1="Nam" 2="Yeo";
	run;

/* DM 딕셔너리 구성, SUBJID 축약 */
data DM; set CUBE.DM; keep SUBJID AGE SEX;
	SUBJID=compress(SUBJID,"-");
    DMIDX="DM"||left(_N_);
	run;

/* CM 정리 */
data CM; set CUBE.CM;
    format CMONGO BOOL. CMSTATUS $10.;
    keep CMIDX SUBJID SEQ CMTRT CMINDCAE CMONGO CMSTATUS;
    where not missing(CMINDCAE);
    
    SUBJID=compress(SUBJID,"-");
    CMIDX="CM"||left(_N_);
    CMINDCAE=substr(CMINDCAE,find(CMINDCAE,"^","i")+1);

    if CMONGO=. then do; CMSTATUS="none"; end;
    if CMONGO=1 then do; CMSTATUS="having"; end;

    array varchar{*} _character_;
        do q=1 to dim(varchar);
        varchar{q}=trim(upcase(varchar{q}));
        end;
    run;

/* CM_에 DM 딕셔너리 left join */
data CM_; retain CMIDX SUBJID SEX AGE SEQ CMTRT CMINDCAE; set CM;
    format SEX SEX.;
    merge CM(in=x) DM(in=y); by SUBJID; if x;
    run;

/* sort_values(by="SUBJID") */
proc sort data=CM_ out=CM_; by SUBJID SEQ CMTRT CMINDCAE; run;

proc print data=CM_(obs=10) noobs; run;
""")
HTML(resp["LST"])

CMIDX,SUBJID,SEX,AGE,SEQ,CMTRT,CMINDCAE,CMONGO,CMSTATUS
CM1,S1Z018,Yeo,39,1,PLAKON POWD 3MG,LOCALIZED ITCHING,False,NONE
CM2,S1Z018,Yeo,39,2,COUGH SYR 20ML,PNEUMONIA,False,NONE
CM3,S1Z020,Nam,48,2,COUGH SYR 20ML,COMMON COLD,False,NONE
CM4,S1Z022,Yeo,38,1,XANAX XR TAB 0.5MG,ANXIETY AGGRAVATED,False,NONE
CM5,S1Z022,Yeo,38,2,DOPAMINE DAEWOO INJ 200MG/5ML,ANXIETY AGGRAVATED,False,NONE
CM6,S1Z025,Yeo,49,1,COMP URSA SC,AST INCREASED,True,HAVING
CM7,S1Z026,Yeo,50,1,BUSCOPAN TAB,VOMITING,False,NONE
CM8,S1Z029,Nam,45,1,TALLERGY TAB 10MG,DRUG ALLERGY,False,NONE
CM9,S1Z032,Nam,35,1,TYLENOL TAB 500MG,FEVER,False,NONE
CM10,S1Z032,Nam,35,2,EZN6 DOUBLE SC,FEVER,True,HAVING


#### 4. CUBE - CMDMLB - Manipulating

In [5]:
resp=sas.submit("""
title font="Consolas" "LB without LBTEST value";

data LB; rename SEQ=LBSEQ VISIT=LBVISIT; set CUBE.LB;
	SUBJID=compress(SUBJID,"-");
	LBTEST=trim(upcase(LBTEST));
	run;

proc print data=LB(where=(LBTEST="")) noobs; run;
""")
HTML(resp["LST"])

SUBJID,LBVISIT,LBSEQ,LBTEST,LBORRES,LBNOR,LBCLSIG
SHW001,1,14,,9.68,.,.


In [6]:
resp=sas.submit("""
title font="Consolas" "Unstacking LB";

proc sort data=LB out=LB_(drop=LBNOR LBCLSIG);
	by SUBJID LBVISIT LBSEQ;
	run;

proc transpose data=LB_ out=LB_(drop=_NAME_);
	by SUBJID LBVISIT LBSEQ; id LBTEST; var LBORRES;
	run;

proc print data=LB_(obs=5) noobs; run;
""")
HTML(resp["LST"])

SUBJID,LBVISIT,LBSEQ,ERYTHROCYTES,HEMOGLOBIN,HEMATOCRIT,PLATELETS,LEUKOCYTES,PROTEIN,ALBUMIN,ASPARTATE AMINOTRANSFERASE,ALANINE AMINOTRANSFERASE,BILIRUBIN,HEMOGLOBIN A1C,SPECIFIC GRAVITY[U],PH[U],ALBUMIN [U],OCCULT BLOOD[U],PROTEIN[U],CREATININE,"GLOMERULAR FILTRATION RATE, ESTI"
S1Z005,1,1,10,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.
S1Z005,1,2,.,18,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.
S1Z005,1,3,.,.,50,.,.,.,.,.,.,.,.,.,.,.,.,.,.,.
S1Z005,1,4,.,.,.,500,.,.,.,.,.,.,.,.,.,.,.,.,.,.
S1Z005,1,5,.,.,.,.,12,.,.,.,.,.,.,.,.,.,.,.,.,.


#### 5. CUBE - 
* Read sets

In [77]:
import os
import pandas as pd
cube=[q for q in os.scandir("c:/code/CUBEDEMO2017/SASSET") if q.name.lower().endswith(".csv")]
data={os.path.splitext(q.name)[0].upper():pd.read_csv(q.path) for q in cube}
dm=data["DM"]
cm=data["CM"]
lb=data["LB"]

* Columns with smallest NaNs are index (key)

In [69]:
cm.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 24 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   SUBJID     195 non-null    object 
 1   VISIT      195 non-null    int64  
 2   SEQ        195 non-null    int64  
 3   CMTRT      185 non-null    object 
 4   CMDOSTOT   157 non-null    float64
 5   CMDOSU     170 non-null    object 
 6   CMROUTE    169 non-null    object 
 7   CMSTDTC    179 non-null    object 
 8   CMONGO     105 non-null    float64
 9   CMENDTC    78 non-null     object 
 10  CMINDC     177 non-null    float64
 11  CMINDCMH   38 non-null     object 
 12  CMINDCAE   70 non-null     object 
 13  CMINDCO    14 non-null     object 
 14  ATCVER     99 non-null     float64
 15  LV2        99 non-null     object 
 16  LV4        91 non-null     object 
 17  GENERIC    149 non-null    object 
 18  CODINGCMT  0 non-null      float64
 19  ATCCD      3 non-null      object 
 20  LV1       

In [70]:
keys=["SUBJID","VISIT","SEQ"]
cm[keys].isna().sum()

SUBJID    0
VISIT     0
SEQ       0
dtype: int64

* Dictionary for NaNs in each columns: For preventing exceptions in using high-level methods

In [80]:
nanDict={
    "CMTRT":"#medName",
    "CMDOSTOT":"#doseTotal",
    "CMDOSU":"#doseUnit",
    "CMROUTE":"#txRoute",
    "CMINDCAE":"#indcatedAE",
    "CMINDC":"#indication"
    }
cm=cm.fillna(nanDict)
cm[cm.CMTRT==nanDict["CMTRT"]]

Unnamed: 0,SUBJID,VISIT,SEQ,CMTRT,CMDOSTOT,CMDOSU,CMROUTE,CMSTDTC,CMONGO,CMENDTC,...,ATCVER,LV2,LV4,GENERIC,CODINGCMT,ATCCD,LV1,LV3,LV5,INV_ATCCD
5,S-1Z-010,5004,1,#medName,#doseTotal,#doseUnit,#txRoute,,1.0,,...,,,,,,,,,,
116,S-2Z-038,5004,2,#medName,#doseTotal,#doseUnit,#txRoute,2015-04-20,,2015-02-14,...,,,,,,,,,,
154,S-4Z-001,3004,4,#medName,#doseTotal,Tablet,Intramuscular,,,,...,,,,,,,,,,
155,S-4Z-001,3004,5,#medName,#doseTotal,Capsule,Topical,,,,...,,,,,,,,,,
156,S-4Z-001,3004,6,#medName,#doseTotal,#doseUnit,#txRoute,,,,...,,,,,,,,,,
160,S-4Z-001,5004,4,#medName,#doseTotal,Tablet,Intramuscular,,,,...,,,,,,,,,,
161,S-4Z-001,5004,5,#medName,#doseTotal,Capsule,Topical,,,,...,,,,,,,,,,
162,S-4Z-001,5004,6,#medName,#doseTotal,#doseUnit,#txRoute,,,,...,,,,,,,,,,
192,S1Z008,3004,1,#medName,#doseTotal,#doseUnit,#txRoute,,,,...,,,,,,,,,,
193,S1Z008,5004,1,#medName,#doseTotal,#doseUnit,#txRoute,2017-02-08,,,...,,,,,,,,,,


* Aggregating: Per-column values

In [81]:
cm=cm.assign(_ctnt=cm.apply(lambda q:f"{q.CMTRT},{q.CMINDCAE}")

TypeError: sequence item 1: expected str instance, float found