# About the notebook

In [2]:
### import libraries
suppressMessages(suppressWarnings(library(tidyverse)))
suppressMessages(suppressWarnings(library(MPSK)))
suppressMessages(suppressWarnings(library(tsne)))
suppressMessages(suppressWarnings(library(pheatmap)))
suppressMessages(suppressWarnings(library(RColorBrewer)))
suppressMessages(suppressWarnings(library(flowCore)))
suppressMessages(suppressWarnings(library(flowStats)))
suppressMessages(suppressWarnings(library(flowViz)))
suppressMessages(suppressWarnings(library(flowMatch)))

### set directories
dat_dir01 = "/data/SMPK"
dat_dir02 = "/data/clintko/SMPK"

# Import data

previous code

```
%%Python
target_dir = "/data/SMPK"
with open(os.path.join(target_dir, "res0_GEN070XT_CMVpp65.pic"), 'rb') as f:
    res = pickle.load(f)

%%R -i res
resRelab      = relabelChain(res)
resCalibrated = calibrate(resRelab)
chainSummary  = summarizeChain(resRelab)
```

import results from MPSK

In [27]:
res           = readRDS(file = file.path(dat_dir02, "ep8cs_mpsk_raw.RDS"))
resRelab      = readRDS(file = file.path(dat_dir02, "ep8cs_mpsk_relab.RDS")) 
resCalibrated = readRDS(file = file.path(dat_dir02, "ep8cs_mpsk_cal.RDS"))
chainSummary  = readRDS(file = file.path(dat_dir02, "ep8cs_mpsk_chainSummary.RDS")) 

In [28]:
print(names(res))
print("=====================")
print(names(resRelab))
print("=====================")
print(names(resCalibrated))
print("=====================")
print(names(chainSummary))

[1] "chain" "data"  "prior" "pmc"  
[1] "chain" "data"  "prior" "pmc"  
[1] "Y_cal"                    "calibration_distribution"
[3] "calibration_median"      
 [1] "xi0"      "psi"      "alpha"    "W"        "xi"       "Omega"   
 [7] "Sigma"    "E"        "meanvec"  "meanvec0" "t"        "S"       
[13] "varphi"   "a0"      


the cluster label assigned by MPSK is `chainSummary$t`

In [32]:
### shape of data
print(dim(res$data$Y))
print(dim(resCalibrated$Y_cal))

### size of label should be equal to the number of observations
print(length(chainSummary$t))

[1] 180000     10
[1] 180000     10
[1] 180000


Import markers

In [33]:
markers = read_table(file.path(dat_dir01, "markers.txt"), col_names = FALSE)
markers = separate(
    markers, 
    col  = "X1", 
    into = c("label", "color", "measure"), 
    sep  = " ")
markers$color[1:2]   = c("FSC-A", "SSC-A")
markers$measure[1:2] = c("FSC-A", "SSC-A")

markers

Parsed with column specification:
cols(
  X1 = col_character()
)
“Expected 3 pieces. Missing pieces filled with `NA` in 2 rows [1, 2].”

label,color,measure
FSC-A,FSC-A,FSC-A
SSC-A,SSC-A,SSC-A
Aqua,Amine,FLR-A
CD3,APC-H7,FLR-A
CD4,PE-Cy7,FLR-A
CD8,PerCP-Cy55,FLR-A
TNFa,FITC,FLR-A
IL2,BV421,FLR-A
IFNg,APC,FLR-A
CD107a,PE,FLR-A


# Rearrange the results
combine the MPSK cluster label and sample label

In [40]:
### extract needed data
clust = chainSummary$t
C     = resRelab$data$C
Y     = resRelab$data$Y
Y_cal = resCalibrated$Y_cal

### assign column names
colnames(Y)     = markers$label
colnames(Y_cal) = paste(markers$label, "Cal")

### raw and calibrated data
dat_mpsk = cbind(C, clust, Y) %>% as.data.frame
colnames(dat_mpsk)[1:2] = c("sample", "cluster")

dat_mpsk_cal = cbind(C, clust, Y_cal) %>% as.data.frame
colnames(dat_mpsk_cal)[1:2] = c("sample", "cluster")

### observe the results
cat("=======================\n")
head(dat_mpsk)

cat("=======================\n")
head(dat_mpsk_cal)

cat("===========================")
print(table(dat_mpsk$sample))



sample,cluster,FSC-A,SSC-A,Aqua,CD3,CD4,CD8,TNFa,IL2,IFNg,CD107a
1,4,-1.1416363,-0.09986633,-0.4210673,0.7761014,2.2731346,0.6444498,-0.1937829,-0.02015737,-0.27344867,0.60710339
1,3,-0.8405893,-0.73858079,1.2407957,-0.4222472,-0.5658801,0.6557209,-0.6118518,-0.02377409,-0.34964856,-0.03757027
1,1,0.5107179,-0.84609384,-1.0598249,-0.3661789,-0.5889247,-1.8527354,-1.1840413,-0.03447264,-0.42590688,-0.02672243
1,4,-1.5681802,-0.57996979,-0.5588165,0.231985,1.8415205,-0.1502302,-0.5595654,0.02146696,-0.51009832,-0.01391902
1,8,-1.5371229,0.32463843,-0.2535651,-0.677213,2.0872188,-1.3166316,-0.6453749,-0.22168409,-0.88871667,0.15053694
1,1,-0.2959737,-0.29703637,-0.2005701,-0.6302307,0.4919176,-0.84542,-0.2646515,2.4412415,0.02647195,0.33899091




sample,cluster,FSC-A Cal,SSC-A Cal,Aqua Cal,CD3 Cal,CD4 Cal,CD8 Cal,TNFa Cal,IL2 Cal,IFNg Cal,CD107a Cal
1,4,-0.96173656,-0.2160766,-0.862618,0.85745,2.0318348,0.6358294,-0.46653128,-0.417213,-0.3461938,0.39110337
1,3,-0.82900217,-0.7012622,1.1034122,-0.741483,-0.9433545,0.7592594,-0.5265506,-0.6186187,-0.2796178,-0.12884297
1,1,0.75371023,-0.7951407,-1.3776832,-0.6027146,-0.9719115,-2.1713238,-0.84220419,-0.7038922,-0.309145,-0.08001407
1,4,-1.39160671,-0.6938895,-1.0015626,0.3112666,1.6011798,-0.1616408,-0.83067807,-0.3725543,-0.5801865,-0.22684893
1,8,-1.40720126,0.2935883,-0.7124403,-0.8000352,1.8639001,-1.5923484,-0.72045281,-0.5326612,-0.9030198,-0.02101916
1,1,-0.05520724,-0.2477739,-0.5175293,-0.8658211,0.1091478,-1.160491,0.07482237,1.7730925,0.141918,0.28623963


    1     2     3     4     5     6     7     8     9    10    11    12    13 
10000 10000 10000 10000 10000 10000 10000 10000 10000 10000 10000 10000 10000 
   14    15    16    17    18 
10000 10000 10000 10000 10000 


# 