# About the notebook

In [1]:
import numpy as np
import os
import pickle

head = lambda x, n = 6: x[:n]
tail = lambda x, n = 6: x[-n:]

%matplotlib inline
%load_ext rpy2.ipython

In [2]:
%%R
suppressMessages(suppressWarnings(library(tidyverse)))
library(MPSK)

# import data

In [3]:
### set directories
dat_dir01 = "/data/SMPK"
dat_dir02 = "/data/clintko/SMPK"
file_path = os.path.join(dat_dir01, 'res1_GEN070XT_CMVpp65.pic')

In [4]:
with open(file_path, 'rb') as f:
    res = pickle.load(f)

In [5]:
%%R -i dat_dir01

### read markers data
markers = read_table(file.path(dat_dir01, "markers.txt"), col_names = FALSE)

### arrange markers data frame
markers = separate(
    markers, 
    col  = "X1", 
    into = c("label", "color", "measure"), 
    sep  = " ")

### assign scatter names
markers$color[1:2]   = c("FSC-A", "SSC-A")
markers$measure[1:2] = c("FSC-A", "SSC-A")

### print the results
markers

cols(
  X1 = col_character()
)



# A tibble: 10 x 3
   label  color      measure
   <chr>  <chr>      <chr>  
 1 FSC-A  FSC-A      FSC-A  
 2 SSC-A  SSC-A      SSC-A  
 3 Aqua   Amine      FLR-A  
 4 CD3    APC-H7     FLR-A  
 5 CD4    PE-Cy7     FLR-A  
 6 CD8    PerCP-Cy55 FLR-A  
 7 TNFa   FITC       FLR-A  
 8 IL2    BV421      FLR-A  
 9 IFNg   APC        FLR-A  
10 CD107a PE         FLR-A  


# Calibration and Summarize

In [6]:
%%R -i res
resRelab      = relabelChain(res)
resCalibrated = calibrate(resRelab)
chainSummary  = summarizeChain(resRelab)

check the output

In [7]:
%%R
cat("Results\n"); cat("=======================\n")
print(names(res))
cat("\nRelabel\n"); cat("=======================\n")
print(names(resRelab))
cat("\nCalibrated\n"); cat("=======================\n")
print(names(resCalibrated))
cat("\nChain Summary\n"); cat("=======================\n")
print(names(chainSummary))

Results
[1] "chain" "data"  "prior" "pmc"  

Relabel
[1] "chain" "data"  "prior" "pmc"  

Calibrated
[1] "Y_cal"                    "calibration_distribution"
[3] "calibration_median"      

Chain Summary
 [1] "xi0"      "psi"      "alpha"    "W"        "xi"       "Omega"   
 [7] "Sigma"    "E"        "meanvec"  "meanvec0" "t"        "S"       
[13] "varphi"   "a0"      


# Arrange the results

In [8]:
%%R

### extract needed data
clust = chainSummary$t
C     = resRelab$data$C
Y     = resRelab$data$Y
Y_cal = resCalibrated$Y_cal
C2    = resRelab$data$C %>% as.vector %>% str_pad(., 2, pad = "0")

### assign column names
colnames(Y)     = markers$label
colnames(Y_cal) = paste(markers$label)

### combine MPSK cluster label with raw data
dat = cbind(C, clust, Y) %>% as.data.frame
dat$V1 = C2
colnames(dat)[1:2] = c("sample", "cluster")
dat_mpsk_raw = dat

### combine MPSK cluster label with calibrated data
dat = cbind(C, clust, Y_cal) %>% as.data.frame
dat$V1 = C2
colnames(dat)[1:2] = c("sample", "cluster")
dat_mpsk_cal = dat

### observe the results
cat("=======================\n")
print(head(dat_mpsk_raw))
cat("=======================\n")
print(head(dat_mpsk_cal))
cat("===========================\n")
print(table(dat_mpsk_raw$cluster))
cat("===========================\n")
print(table(dat_mpsk_raw$sample))

  sample cluster      FSC-A       SSC-A       Aqua        CD3        CD4
1     01       8 -1.1416363 -0.09986633 -0.4210673  0.7761014  2.2731346
2     01      21 -0.8405893 -0.73858079  1.2407957 -0.4222472 -0.5658801
3     01      37  0.5107179 -0.84609384 -1.0598249 -0.3661789 -0.5889247
4     01       8 -1.5681802 -0.57996979 -0.5588165  0.2319850  1.8415205
5     01       8 -1.5371229  0.32463843 -0.2535651 -0.6772130  2.0872188
6     01      45 -0.2959737 -0.29703637 -0.2005701 -0.6302307  0.4919176
         CD8       TNFa         IL2        IFNg      CD107a
1  0.6444498 -0.1937829 -0.02015737 -0.27344867  0.60710339
2  0.6557209 -0.6118518 -0.02377409 -0.34964856 -0.03757027
3 -1.8527354 -1.1840413 -0.03447264 -0.42590688 -0.02672243
4 -0.1502302 -0.5595654  0.02146696 -0.51009832 -0.01391902
5 -1.3166316 -0.6453749 -0.22168409 -0.88871667  0.15053694
6 -0.8454200 -0.2646515  2.44124150  0.02647195  0.33899091
  sample cluster      FSC-A       SSC-A       Aqua        CD3        

# Store the results

In [9]:
%%R -i dat_dir02
dat_dir = "/data/SMPK"
write_delim(dat_mpsk_raw,  file.path(dat_dir02, "ep8cs_prior50_dat_mpsk_raw.txt"),  delim = "\t")
write_delim(dat_mpsk_cal,  file.path(dat_dir02, "ep8cs_prior50_dat_mpsk_cal.txt"),  delim = "\t")

In [12]:
!ls -1 "/data/clintko/SMPK"

C.txt
ep8cs_dat_gaussnorm.txt
ep8cs_dat_mpsk_cal.txt
ep8cs_dat_mpsk_raw.txt
ep8cs_mpsk_cal.RDS
ep8cs_mpsk_chainSummary.RDS
ep8cs_mpsk_raw_prior50.RDS
ep8cs_mpsk_raw.RDS
ep8cs_mpsk_relab.RDS
ep8cs_prior50_dat_mpsk_cal.txt
ep8cs_prior50_dat_mpsk_raw.txt
ep8cs_tsne_gaussnorm.txt
ep8cs_tsne_idx.txt
ep8cs_tsne_mpsk_cal.txt
ep8cs_tsne_raw.txt
ep8cs_tsne_test.txt
Y_raw.txt


In [14]:
%%R
markers$label

 [1] "FSC-A"  "SSC-A"  "Aqua"   "CD3"    "CD4"    "CD8"    "TNFa"   "IL2"   
 [9] "IFNg"   "CD107a"
