# About the notebook

In [1]:
### import libraries
suppressMessages(suppressWarnings(library(tidyverse)))
suppressMessages(suppressWarnings(library(MPSK)))

###
dat_dir = "/data/clintko/SMPK"

**helper function**

In [2]:
allclose = function(mat1, mat2, tol = 10^(-5)){
    dim1 = dim(mat1)
    dim2 = dim(mat2)
    stopifnot(all(dim1 == dim2))
    
    x = sum(mat1 - mat2)
    return(x < tol)
} # end func

check the function

In [3]:
### read "ep8cs_mpsk_prior05.RDS"
prior = "05"
fn  = paste0("ep8cs_mpsk_prior", prior, ".RDS")
lst = readRDS(file.path(dat_dir, fn))

In [4]:
print(names(lst[[1]]))
cat("=============\n")
print(names(lst[[2]]))
cat("=============\n")
print(names(lst[[3]]))

[1] "chain" "data"  "prior" "pmc"  
[1] "Y_cal"                    "calibration_distribution"
[3] "calibration_median"      
 [1] "xi0"      "psi"      "alpha"    "W"        "xi"       "Omega"   
 [7] "Sigma"    "E"        "meanvec"  "meanvec0" "t"        "S"       
[13] "varphi"   "a0"      


In [5]:
matrix(1:10, 2, 5)

0,1,2,3,4
1,3,5,7,9
2,4,6,8,10


In [6]:
allclose(matrix(1:10, 1, 10), matrix(1:10, 2, 5))

ERROR: Error: all(dim1 == dim2) is not TRUE


In [7]:
allclose(matrix(1:10, 2, 5), matrix(1:10, 2, 5))

# Get Raw data

Marker

In [8]:
### read markers data and arrange markers
markers = read_table(file.path("/data/SMPK", "markers.txt"), col_names = FALSE)
markers = separate(
    markers, 
    col  = "X1", 
    into = c("label", "color", "measure"), 
    sep  = " ")

### assign scatter names
markers$color[1:2]   = c("FSC-A", "SSC-A")
markers$measure[1:2] = c("FSC-A", "SSC-A")

### print the results
markers

Parsed with column specification:
cols(
  X1 = col_character()
)
“Expected 3 pieces. Missing pieces filled with `NA` in 2 rows [1, 2].”

label,color,measure
FSC-A,FSC-A,FSC-A
SSC-A,SSC-A,SSC-A
Aqua,Amine,FLR-A
CD3,APC-H7,FLR-A
CD4,PE-Cy7,FLR-A
CD8,PerCP-Cy55,FLR-A
TNFa,FITC,FLR-A
IL2,BV421,FLR-A
IFNg,APC,FLR-A
CD107a,PE,FLR-A


read two MPSK results

In [34]:
prior = "05"
fn    = paste0("ep8cs_mpsk_prior", prior, ".RDS")
lst1  = readRDS(file.path(dat_dir, fn))

prior = "06"
fn    = paste0("ep8cs_mpsk_prior", prior, ".RDS")
lst2  = readRDS(file.path(dat_dir, fn))

In [35]:
allclose(lst1[[1]]$data$Y, lst2[[2]]$data$Y)

Raw data

In [36]:
### init
tmp = lst1[[1]]$data

### get the raw data
Y_raw = tmp$Y
colnames(Y_raw) = markers$label

### get the sample id 
C0    = tmp$C %>% as.vector %>% str_pad(., 2, pad = "0")
dat = cbind(C0, Y_raw) %>% as.data.frame
colnames(dat)[1] = "sample"

### store the results
dat_mpsk_raw = dat

### check the result
head(dat_mpsk_raw)

sample,FSC-A,SSC-A,Aqua,CD3,CD4,CD8,TNFa,IL2,IFNg,CD107a
1,-1.14163633466038,-0.0998663297424269,-0.421067346083575,0.776101389553108,2.27313461740812,0.644449819581802,-0.193782896321195,-0.0201573743059543,-0.273448665686352,0.607103389533675
1,-0.840589282014768,-0.738580789812436,1.24079568244088,-0.422247218863449,-0.565880069839085,0.655720949526245,-0.611851812386504,-0.0237740919025737,-0.349648560256141,-0.0375702726874197
1,0.510717913710444,-0.846093843932749,-1.05982488841118,-0.366178880979045,-0.588924663736387,-1.85273535637385,-1.1840413301604,-0.0344726425982347,-0.425906878914202,-0.0267224278715797
1,-1.56818018038748,-0.579969789551865,-0.558816545209534,0.231985045771453,1.84152051476522,-0.150230219784546,-0.559565415643016,0.0214669603191592,-0.51009832198181,-0.0139190201834252
1,-1.53712285656791,0.324638432758144,-0.253565063213273,-0.677212987074833,2.08721883541798,-1.31663162011636,-0.645374923534049,-0.221684093395212,-0.888716670500201,0.150536943444253
1,-0.29597369902642,-0.297036369707201,-0.200570062232991,-0.630230673248019,0.491917617809735,-0.845420011596455,-0.264651458023937,2.44124150050734,0.0264719492124281,0.338990913722236


In [37]:
write_delim(dat_mpsk_raw, file.path(dat_dir, "ep8cs_dat_raw.txt"), delim = "\t")

# Extract Calibration results

In [31]:
grep("ep8cs_mpsk_prior", dir(dat_dir), value = TRUE)

In [32]:
fnames = grep("ep8cs_mpsk_prior", dir(dat_dir), value = TRUE)
priors = str_extract(fnames, "([0-9][0-9])")
print(priors)

 [1] "05" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15" "20" "25" "30" "35"
[16] "40" "45" "50"


In [33]:
for (prior in priors) {
    print(paste("res_p", prior))
    
    ### get the results
    fn  = paste0("ep8cs_mpsk_prior", prior, ".RDS")
    lst = readRDS(file.path(dat_dir, fn))
    resRelab      = lst[[1]]
    resCalibrated = lst[[2]]
    chainSummary  = lst[[3]]
    
    ### extract needed data
    clust = chainSummary$t
    Y_cal = resCalibrated$Y_cal
    C0    = resRelab$data$C %>% as.vector %>% str_pad(., 2, pad = "0")
    
    ### assign column names
    colnames(Y_cal) = markers$label

    ### combine MPSK cluster label with calibrated data
    dat = cbind(C0, clust, Y_cal) %>% as.data.frame
    colnames(dat)[1:2] = c("sample", "cluster")
    
    ### store the results
    fn  = paste0("ep8cs_dat_cal_prior", prior, ".txt")
    write_delim(dat, file.path(dat_dir, fn), delim = "\t")
} # end for loop

[1] "res_p 05"
[1] "res_p 06"
[1] "res_p 07"
[1] "res_p 08"
[1] "res_p 09"
[1] "res_p 10"
[1] "res_p 11"
[1] "res_p 12"
[1] "res_p 13"
[1] "res_p 14"
[1] "res_p 15"
[1] "res_p 20"
[1] "res_p 25"
[1] "res_p 30"
[1] "res_p 35"
[1] "res_p 40"
[1] "res_p 45"
[1] "res_p 50"


In [26]:
fn

In [25]:
head(dat)

sample,cluster,FSC-A,SSC-A,Aqua,CD3,CD4,CD8,TNFa,IL2,IFNg,CD107a
1,3,-0.98423150292396,-0.16179342298508,-0.795520988622445,0.713207215414791,2.01062749661674,0.564377519812175,-0.290899686381339,-0.392517277473855,-0.209602530629948,0.400671774743342
1,5,-0.806028562483723,-0.721101037252474,1.09392118884391,-0.75723963706419,-0.933950298336434,0.745650932812885,-0.546633644246002,-0.619454385253928,-0.294870043265272,-0.262249440731844
1,4,0.75269105527197,-0.797409392676305,-1.39024611271943,-0.57610314126629,-0.97506550355159,-2.11399200191819,-0.854901812582802,-0.749277340384801,-0.306363607198003,-0.0913008538079166
1,3,-1.41077534865106,-0.641896882794518,-0.933270187748404,0.169090871633136,1.57901339397384,-0.230302519554173,-0.65668220570316,-0.350892942848741,-0.446252186925406,-0.220350634973757
1,3,-1.37971802483149,0.262711339515491,-0.628018705752143,-0.74010716121315,1.8247117146266,-1.39670391988598,-0.742491713594193,-0.594043996563113,-0.824870535443797,-0.0558946713460795
1,3,-0.123036655415598,-0.344019718473991,-0.562671117410012,-0.708774259895499,0.217465050249512,-0.944924910166226,-0.346659100168537,2.04986332617732,0.105045316671014,0.151658991872607
