# About the notebook

Setting Python environment

In [1]:
### libraries
import numpy as np
import pickle
import glob
import os
import re

from rpy2.robjects import r
import matplotlib.pyplot as plt

### helper function
head = lambda x, n = 6: x[:n]
tail = lambda x, n = 6: x[-n:]

### set directories
dat_dir01 = "/data/SMPK"
dat_dir02 = "/data/clintko/SMPK"

In [2]:
%matplotlib inline
%load_ext rpy2.ipython

Setting R environment

In [3]:
%%R -i dat_dir01,dat_dir02

suppressMessages(suppressWarnings(library(tidyverse)))
library(MPSK)
cat("Set Directories\n")
print(dat_dir01)
print(dat_dir02)

Set Directories
[1] "/data/SMPK"
[1] "/data/clintko/SMPK"


result files

In [4]:
fnames = !ls /data/SMPK/res_*_GEN070XT_CMVpp65.pic
for fn in fnames:
    print(fn)

/data/SMPK/res_05_GEN070XT_CMVpp65.pic
/data/SMPK/res_06_GEN070XT_CMVpp65.pic
/data/SMPK/res_07_GEN070XT_CMVpp65.pic
/data/SMPK/res_08_GEN070XT_CMVpp65.pic
/data/SMPK/res_09_GEN070XT_CMVpp65.pic
/data/SMPK/res_10_GEN070XT_CMVpp65.pic
/data/SMPK/res_11_GEN070XT_CMVpp65.pic
/data/SMPK/res_12_GEN070XT_CMVpp65.pic
/data/SMPK/res_13_GEN070XT_CMVpp65.pic
/data/SMPK/res_14_GEN070XT_CMVpp65.pic
/data/SMPK/res_15_GEN070XT_CMVpp65.pic
/data/SMPK/res_20_GEN070XT_CMVpp65.pic
/data/SMPK/res_25_GEN070XT_CMVpp65.pic
/data/SMPK/res_30_GEN070XT_CMVpp65.pic
/data/SMPK/res_35_GEN070XT_CMVpp65.pic
/data/SMPK/res_40_GEN070XT_CMVpp65.pic
/data/SMPK/res_45_GEN070XT_CMVpp65.pic
/data/SMPK/res_50_GEN070XT_CMVpp65.pic


In [5]:
priors = [re.search("res_(.*)_GEN070XT_CMVpp65.pic", fn).group(1) for fn in fnames]
r.assign("priors", priors)
print(priors)

['05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '20', '25', '30', '35', '40', '45', '50']


# import data

In [6]:
for fn in fnames:
    ### subtract the prior number of each results 
    prior = re.search("res_(.*)_GEN070XT_CMVpp65.pic", fn).group(1)
    
    ### read in the 
    print(fn)
    print(prior)
    with open(fn, 'rb') as f:
        res = pickle.load(f)
        r.assign("res_p" + prior, res)

/data/SMPK/res_05_GEN070XT_CMVpp65.pic
05
/data/SMPK/res_06_GEN070XT_CMVpp65.pic
06
/data/SMPK/res_07_GEN070XT_CMVpp65.pic
07
/data/SMPK/res_08_GEN070XT_CMVpp65.pic
08
/data/SMPK/res_09_GEN070XT_CMVpp65.pic
09
/data/SMPK/res_10_GEN070XT_CMVpp65.pic
10
/data/SMPK/res_11_GEN070XT_CMVpp65.pic
11
/data/SMPK/res_12_GEN070XT_CMVpp65.pic
12
/data/SMPK/res_13_GEN070XT_CMVpp65.pic
13
/data/SMPK/res_14_GEN070XT_CMVpp65.pic
14
/data/SMPK/res_15_GEN070XT_CMVpp65.pic
15
/data/SMPK/res_20_GEN070XT_CMVpp65.pic
20
/data/SMPK/res_25_GEN070XT_CMVpp65.pic
25
/data/SMPK/res_30_GEN070XT_CMVpp65.pic
30
/data/SMPK/res_35_GEN070XT_CMVpp65.pic
35
/data/SMPK/res_40_GEN070XT_CMVpp65.pic
40
/data/SMPK/res_45_GEN070XT_CMVpp65.pic
45
/data/SMPK/res_50_GEN070XT_CMVpp65.pic
50


In [7]:
%%R
print(grep("res", ls(), value = TRUE))
cat("=======================\n")
print(names(res_p05))
print(names(res_p06))
print(class(res_p05))
print(class(res_p06))

 [1] "res_p05" "res_p06" "res_p07" "res_p08" "res_p09" "res_p10" "res_p11"
 [8] "res_p12" "res_p13" "res_p14" "res_p15" "res_p20" "res_p25" "res_p30"
[15] "res_p35" "res_p40" "res_p45" "res_p50"
[1] "chain" "data"  "prior" "pmc"  
[1] "chain" "data"  "prior" "pmc"  
[1] "MPSK"
[1] "MPSK"


Markers

In [8]:
%%R -i dat_dir01

### read markers data and arrange markers
markers = read_table(file.path(dat_dir01, "markers.txt"), col_names = FALSE)
markers = separate(
    markers, 
    col  = "X1", 
    into = c("label", "color", "measure"), 
    sep  = " ")
### assign scatter names
markers$color[1:2]   = c("FSC-A", "SSC-A")
markers$measure[1:2] = c("FSC-A", "SSC-A")
### print the results
markers

cols(
  X1 = col_character()
)



# A tibble: 10 x 3
   label  color      measure
   <chr>  <chr>      <chr>  
 1 FSC-A  FSC-A      FSC-A  
 2 SSC-A  SSC-A      SSC-A  
 3 Aqua   Amine      FLR-A  
 4 CD3    APC-H7     FLR-A  
 5 CD4    PE-Cy7     FLR-A  
 6 CD8    PerCP-Cy55 FLR-A  
 7 TNFa   FITC       FLR-A  
 8 IL2    BV421      FLR-A  
 9 IFNg   APC        FLR-A  
10 CD107a PE         FLR-A  


# Calibration and Summarize

The code for calibration and summarizing
```
resRelab      = relabelChain(res)
resCalibrated = calibrate(resRelab)
chainSummary  = summarizeChain(resRelab)
```

Reference for the code below
- [Converting a String to a Variable Name On-The-Fly and Vice-versa in R](https://stackoverflow.com/questions/6034655/convert-string-to-a-variable-name)
- [Convert string to a variable name
](https://www.r-bloggers.com/converting-a-string-to-a-variable-name-on-the-fly-and-vice-versa-in-r/)

testing the MPSK results

```
%%R
varname = "res_p05"
tmp = eval(parse(text = varname))
print(class(tmp))
print(names(tmp))
cat("===================\n")
tmp_relab     = relabelChain(tmp)
print(class(tmp_relab))
print(names(tmp_relab))
cat("===================\n")
tmp_cal       = calibrate(tmp_relab)
print(class(tmp_cal))
print(names(tmp_cal))
cat("===================\n")
tmp_chain_sum = summarizeChain(tmp_relab)
print(class(tmp_chain_sum))
print(names(tmp_chain_sum))
```

```
%%R
### initialization
lst_relab        = list()
lst_calibrated   = list()
lst_chainSummary = list()

for (idx in 1:2) {
    ### set prior and variable name
    prior = priors[idx]
    varname = paste0("res_p", prior)
    #print(varname)
    
    ### MPSK calibration and summarization
    res = eval(parse(text = varname))
    resRelab      = relabelChain(res)
    resCalibrated = calibrate(resRelab)
    chainSummary  = summarizeChain(resRelab)
    
    ### observe the results
    cat("###################\n")
    cat(varname, "res\n")
    print(class(res))
    print(names(res))
    cat("===================\n")
    cat("resRelab\n")
    print(class(resRelab))
    print(names(resRelab))
    cat("===================\n")
    cat("resCalibrated\n")
    print(class(resCalibrated))
    print(names(resCalibrated))
    cat("===================\n")
    cat("chainSummary\n")
    print(class(chainSummary))
    print(names(chainSummary))
   
    
    ### store
    lst_relab[[idx]]        = resRelab
    lst_calibrated[[idx]]   = resCalibrated
    lst_chainSummary[[idx]] = chainSummary
    
    ### Recheck the results
    cat("+++++++++++++++++++\n")
    cat("tmp_relab\n")
    tmp_relab     = lst_relab[[idx]]
    print(class(tmp_relab))
    print(names(tmp_relab))
    cat("===================\n")
    cat("tmp_cal\n")
    tmp_cal       = lst_calibrated[[idx]]
    print(class(tmp_cal))
    print(names(tmp_cal))
    cat("===================\n")
    cat("tmp_chain_sum\n")
    tmp_chain_sum = lst_chainSummary[[idx]]
    print(class(tmp_chain_sum))
    print(names(tmp_chain_sum))
    
    ###
    #varname = paste0("res_p", prior, "_calibrate")
    #assign(varname, resCalibrated)
    ###
    #varname = paste0("res_p", prior, "_chain_sum")
    #assign(varname, chainSummary)
} # end for loop
```

Calibrate and summarize all the files

In [9]:
%%R

### initialization
lst_relab        = list()
lst_calibrated   = list()
lst_chainSummary = list()

for (idx in 1:length(priors)) {
    ### set prior and variable name
    prior = priors[idx]
    varname = paste0("res_p", prior)
    print(varname)
    
    ### MPSK calibration and summarization
    res = eval(parse(text = varname))
    resRelab      = relabelChain(res)
    resCalibrated = calibrate(resRelab)
    chainSummary  = summarizeChain(resRelab)
    
    ### store
    lst_relab[[idx]]        = resRelab
    lst_calibrated[[idx]]   = resCalibrated
    lst_chainSummary[[idx]] = chainSummary
} # end for loop

### reset the name of lists
names(lst_relab)        = priors
names(lst_calibrated)   = priors
names(lst_chainSummary) = priors

[1] "res_p05"
[1] "res_p06"
[1] "res_p07"
[1] "res_p08"
[1] "res_p09"
[1] "res_p10"
[1] "res_p11"
[1] "res_p12"
[1] "res_p13"
[1] "res_p14"
[1] "res_p15"
[1] "res_p20"
Error in calib(x$data$Y, matrix(C, ncol = 1), Z, x$chain$xi, dim(x$chain$xi),  : 
  std::bad_alloc


error: 

  std::bad_alloc





In [19]:
%%R
varname = "res_p06"
tmp = eval(parse(text = varname))
print(class(tmp))
print(names(tmp))
cat("===================\n")
tmp_relab     = relabelChain(tmp)
print(class(tmp_relab))
print(names(tmp_relab))
cat("===================\n")
tmp_cal       = calibrate(tmp_relab)
print(class(tmp_cal))
print(names(tmp_cal))
cat("===================\n")
tmp_chain_sum = summarizeChain(tmp_relab)
print(class(tmp_chain_sum))
print(names(tmp_chain_sum))

[1] "MPSK"
[1] "chain" "data"  "prior" "pmc"  
[1] "MPSK"
[1] "chain" "data"  "prior" "pmc"  
Error in calib(x$data$Y, matrix(C, ncol = 1), Z, x$chain$xi, dim(x$chain$xi),  : 
  std::bad_alloc


check the #{samples} of the results

In [10]:
%%R
print(length(lst_relab))
print(length(lst_calibrated))
print(length(lst_chainSummary))

[1] 11
[1] 11
[1] 11


In [11]:
%%R
### init
lst_dat_mpsk_raw = list()
lst_dat_mpsk_cal = list()

### loop through each prior
for (prior in priors) {
    ### get 
    print(paste("Prior:", prior))
    resRelab      = lst_relab[[prior]]
    resCalibrated = lst_calibrated[[prior]]
    chainSummary  = lst_chainSummary[[prior]]
    
    ### extract needed data
    clust = chainSummary$t
    C     = resRelab$data$C
    Y_raw = resRelab$data$Y
    Y_cal = resCalibrated$Y_cal
    C0    = resRelab$data$C %>% as.vector %>% str_pad(., 2, pad = "0")

    ### assign column names
    colnames(Y_raw) = markers$label
    colnames(Y_cal) = markers$label

    ### combine MPSK cluster label with raw data
    dat = cbind(C, clust, Y_raw) %>% as.data.frame
    dat$V1 = C0
    colnames(dat)[1:2] = c("sample", "cluster")
    lst_dat_mpsk_raw[[prior]] = dat
    dat_mpsk_raw = dat

    ### combine MPSK cluster label with calibrated data
    dat = cbind(C, clust, Y_cal) %>% as.data.frame
    dat$V1 = C0
    colnames(dat)[1:2] = c("sample", "cluster")
    lst_dat_mpsk_cal[[prior]] = dat
} # end for loop

[1] "Prior: 05"
Error in `colnames<-`(`*tmp*`, value = markers$label) : 
  attempt to set 'colnames' on an object with less than two dimensions


  attempt to set 'colnames' on an object with less than two dimensions



check the results

In [12]:
%%R
print(length(lst_dat_mpsk_raw))
print(length(lst_dat_mpsk_cal))

[1] 0
[1] 0


In [13]:
%%R
print(class(lst_dat_mpsk_raw[["05"]]))
print( head(lst_dat_mpsk_raw[["05"]]))

[1] "NULL"
NULL


In [14]:
%%R
print(class(lst_dat_mpsk_cal[["05"]]))
print( head(lst_dat_mpsk_cal[["05"]]))

[1] "NULL"
NULL


# Store the results

store the results into RDS file

In [15]:
%%R
saveRDS(dat_mpsk_raw,  file.path(dat_dir02, "ep8cs_priors_dat_mpsk_raw.RDS"))
saveRDS(dat_mpsk_cal,  file.path(dat_dir02, "ep8cs_priors_dat_mpsk_cal.RDS"))


Error in saveRDS(dat_mpsk_raw, file.path(dat_dir02, "ep8cs_priors_dat_mpsk_raw.RDS")) : 
  object 'dat_mpsk_raw' not found


  object 'dat_mpsk_raw' not found



Check if the files really exists

In [16]:
!ls /data/clintko/SMPK

OSError: [Errno 12] Cannot allocate memory