# tut 1

NSCLC PBMCs Single Cell RNA-Seq (Fig. 2a,b):
* This example builds a signature matrix from single cell RNA sequencing data from NSCLC PBMCs and enumerates the proportions of the different cell types in a RNA-seq dataset profiled from whole blood using S-mode batch correction.


## set up some stuff

In [None]:
import logging

In [None]:
logging.basicConfig()

## download data

In [None]:
%%bash

export BASE_URL="https://cibersortx.stanford.edu/inc/inc.download.page.handler.php"
pushd /mnt/liulab/csx_example_files/
# curl -O -J -L {$BASE_URL}?file=NSCLC_PBMCs_Single_Cell_RNA-Seq_Fig2ab.zip
# unzip NSCLC_PBMCs_Single_Cell_RNA-Seq_Fig2ab.zip
curl -O -J -L {$BASE_URL}?file=RNA-Seq_mixture_melanoma_Tirosh_Fig2b-d.txt
tree -h
popd

### read data into dataframes

In [None]:
import pandas as pd
logging.getLogger('pandas').setLevel('DEBUG')

In [None]:
path = (
    "/mnt/liulab/csx_example_files/"
    "Fig2ab-NSCLC_PBMCs/Fig2ab-NSCLC_PBMCs_scRNAseq_refsample.txt"
)

nsclc_pbmc_sc = pd.read_csv(
    path,
    sep='\t',
    index_col=0
)

In [None]:
nsclc_pbmc_sc.sum(axis=0)

In [None]:
!ls -l /mnt/liulab/csx_example_files/RNA-Seq_mixture_melanoma_Tirosh_Fig2b-d.txt

In [None]:
path = (
    "/mnt/liulab/csx_example_files/"
    "RNA-Seq_mixture_melanoma_Tirosh_Fig2b-d.txt"
)

tirosh_tumor_mixtures = pd.read_csv(
    path,
    sep='\t',
    index_col=0
)

tirosh_tumor_mixtures

## run csx with docker

```
docker run \
    -v absolute/path/to/input/dir:/src/data \
    -v absolute/path/to/output/dir:/src/outdir \
    cibersortx/fractions \
    --username email_address_registered_on_CIBERSORTx_website \
    --token token_obtained_from_CIBERSORTx_website \
    --single_cell TRUE \
    --refsample Fig2ab-NSCLC_PBMCs_scRNAseq_refsample.txt \
    --mixture Fig2b-WholeBlood_RNAseq.txt \
    --fraction 0 \
    --rmbatchSmode TRUE 

```

In [None]:
!ls -l /mnt/liulab/csx_example_files

In [None]:
%%bash

export CSX_INPUT_DIR="/home/jupyter/csx/input"
export CSX_OUTPUT_DIR="/home/jupyter/csx/output"

export MIXTURE_FILE="RNA-Seq_mixture_melanoma_Tirosh_Fig2b-d.txt"
export REFSAMPLE_FILE="Fig2ab-NSCLC_PBMCs_scRNAseq_refsample.txt"

rsync -v $(find /mnt/liulab/ -name "$MIXTURE_FILE") $CSX_INPUT_DIR/mixture.txt
rsync -v $(find /mnt/liulab/ -name "$REFSAMPLE_FILE") $CSX_INPUT_DIR/refsample.txt

ls -hl $CSX_INPUT_DIR

docker run \
    --rm \
    -v $CSX_INPUT_DIR:/src/data \
    -v $CSX_OUTPUT_DIR:/src/outdir \
    --user "$(id -u):$(id -g)" \
    cibersortx/fractions:latest \
    --username lyronctk@stanford.edu \
    --token dfeba2c8b9d61daebee5fa87026b8e56 \
    --single_cell TRUE \
    --refsample refsample.txt \
    --mixture mixture.txt \
    --rmbatchSmode TRUE \
    --verbose TRUE

#     --perm 10 \
#     --fraction 0 \
#     --sourceGEPs signature_matrix.txt

In [None]:
!tree -h /home/jupyter/csx/

In [None]:
path = "/home/jupyter/csx/output/CIBERSORTx_sigmatrix_Adjusted.txt"

learned_sigmatrix = pd.read_csv(
    path,
    sep='\t',
    index_col=0
)

In [None]:
learned_sigmatrix

In [None]:
tirosh_tumor_mixtures['53']

In [None]:
pd.merge(learned_sigmatrix, tirosh_tumor_mixtures['53'], left_index=True, right_index=True)

In [None]:
pd.merge(learned_sigmatrix, tirosh_tumor_mixtures['53'], left_index=True, right_index=True)

# attempt inferring fractions myself with sigmatrix, mixture

In [None]:
from sklearn.svm import NuSVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [None]:
_combined_data = pd.merge(learned_sigmatrix, tirosh_tumor_mixtures['53'], left_index=True, right_index=True)
y = _combined_data.values[:, -1]
X = _combined_data.values[:, :-1]
y.shape, X.shape

In [None]:
regr = make_pipeline(StandardScaler(), NuSVR(kernel='linear'))
regr.fit(X, y)

In [None]:
_ = regr.named_steps['nusvr'].coef_
import numpy as np
_ / np.sum(_)

# check fractions inferred by csx

In [None]:
!find /home/jupyter/csx/output -name '*txt'

In [None]:
path = "/home/jupyter/csx/output/CIBERSORTx_Adjusted.txt"

pd.read_csv(
    path,
    sep='\t',
    index_col=0
).loc[53]

# extra

In [None]:
pd.read_csv(
    "/mnt/liulab/csx_example_files/Fig2ab-NSCLC_PBMCs/Fig2ab-NSCLC_PBMCs_scRNAseq_sigmatrix.txt",
    sep='\t',
    index_col=0
)

In [None]:
pd.read_csv(
    "/mnt/liulab/csx_example_files/Fig2ab-NSCLC_PBMCs/Fig2b-WholeBlood_RNAseq.txt",
    sep='\t',
    index_col=0
)