# tut 1

Single Cell RNA-Seq Melanoma (Fig. 3c-e) (Tutorial 1):

This example builds a signature matrix from single cell RNA sequencing data from melanoma (Tirosh et al., Science, 2016) and enumerates the proportions of the different cell types in bulk melanoma tumors reconstituted from single cell RNA-Seq data.

```
docker run \
    -v absolute/path/to/input/dir:/src/data \
    -v absolute/path/to/output/dir:/src/outdir \
    cibersortx/fractions \
    --username email_address_registered_on_CIBERSORTx_website \
    --token token_obtained_from_CIBERSORTx_website \
    --single_cell TRUE \
    --refsample scRNA-Seq_reference_melanoma_Tirosh_SuppFig_3b-d.txt \
    --mixture mixture_melanoma_Tirosh_SuppFig_3b-d.txt

```

## import stuff

In [None]:
import os

In [None]:
import pandas as pd

In [None]:
import numpy as np

np.set_printoptions(precision=6, suppress=True)  # don't use scientific notation

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.io

plotly.io.renderers.default = 'jupyterlab+png'  # makes plots visible on github

### find input files

In [None]:
!find /mnt/buckets/liulab/ -type f -iname '*tirosh*' -o -type f -iname '*melanoma*'

### read data into dataframes

#### single cell

In [None]:
path = (
    "/mnt/buckets/liulab/csx_example_files/Single_Cell_RNA-Seq_Melanoma_SuppFig_3b-d/"
    "scRNA-Seq_reference_melanoma_Tirosh_SuppFig_3b-d.txt"
)

tirosh_sc = pd.read_csv(
    path,
    sep='\t',
    index_col=0
)

tirosh_sc

figure out where derek's single cell data was...

In [None]:
!grep -Ril "CY116CD45neg" /mnt/buckets/liulab/

In [None]:
path = "/mnt/buckets/liulab/derek/scRNAseq/melanoma/metadata_all.txt"

tirosh_sc_derek_metadata = pd.read_csv(
    path,
    sep='\t',
#     index_col=0,
    skiprows=[1]
)

#### figuring out this data...
looks like 'CLINIC' == TRUE means it's a cell (FALSE means it's a fragment?)

In [None]:
tirosh_sc_derek_metadata['CLINIC'].value_counts()

In [None]:
tirosh_sc_derek_metadata['LABEL'].str.contains("Mel").value_counts()

In [None]:
tirosh_sc_derek_metadata.sample(10, random_state=0)

In [None]:
tirosh_sc_derek_metadata.query('CLINIC == "TRUE"')

In [None]:
path = (
    "/mnt/buckets/liulab/csx_example_files/Single_Cell_RNA-Seq_Melanoma_SuppFig_3b-d/"
    "mixture_melanoma_Tirosh_SuppFig_3b-d.txt"
)

tirosh_mixtures = pd.read_csv(
    path,
    sep='\t',
    index_col=0
)

tirosh_mixtures

## run csx with docker

In [None]:
!./scripts/tut1-csx_fractions_melanoma.sh

## inspect csx results

### find output files

In [None]:
!find /mnt/buckets/liulab/csx-runs/ -name CIBERSORTx_Adjusted.txt | sort

In [None]:
csx_run_dir = "/mnt/buckets/liulab/csx-runs/20210715_160345/"

### look at sig matrix, mixtures

In [None]:
path = os.path.join(csx_run_dir, "out", "CIBERSORTx_sigmatrix_Adjusted.txt")
print(path)

learned_sigmatrix = pd.read_csv(
    path,
    sep='\t',
    index_col=0
)

In [None]:
learned_sigmatrix.sample(10)

In [None]:
path = os.path.join(csx_run_dir, "in", "mymixture.txt")
print(path)

mixtures = pd.read_csv(
    path,
    sep='\t',
    index_col=0
)

In [None]:
pd.merge(learned_sigmatrix, mixtures['53'], left_index=True, right_index=True)

### attempt inferring fractions myself with sigmatrix, mixture

In [None]:
import numpy as np

from sklearn.svm import NuSVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [None]:
np.set_printoptions(precision=6, suppress=True)

In [None]:
_combined_data = pd.merge(learned_sigmatrix, mixtures['53'], left_index=True, right_index=True)
y = _combined_data.values[:, -1]
X = _combined_data.values[:, :-1]
y.shape, X.shape

In [None]:
regr = make_pipeline(StandardScaler(), NuSVR(nu=0.5, kernel='linear'))
regr.fit(X, y)
_ = regr.named_steps['nusvr'].coef_
_ / np.sum(_)

## check fractions inferred by csx

In [None]:
path = os.path.join(csx_run_dir, "out", "CIBERSORTx_Adjusted.txt")
print(path)

inferred_fractions = pd.read_csv(
    path,
    sep='\t',
    index_col=0
)

inferred_fractions

# check groundtruth fractions

did they have data for this?

In [None]:
path = "/mnt/buckets/liulab/csx_example_files/groundtruth_Melanoma_Tirosh_et_al_SuppFig3b-d.txt"
print(path)

gt_fractions = pd.read_csv(
    path,
    sep='\t',
    index_col=0
)

gt_fractions

In [None]:
inferred_fractions[gt_fractions.columns]

In [None]:
gt_fractions_stacked = gt_fractions.stack()
gt_fractions_stacked.rename('proportion', inplace=True)
gt_fractions_stacked.index.set_names('Mixture', level=0, inplace=True)
gt_fractions_stacked.index.set_names('cell_type', level=1, inplace=True)
gt_fractions_stacked

In [None]:
inferred_fractions_stacked = inferred_fractions[gt_fractions.columns].stack()
inferred_fractions_stacked.rename('proportion', inplace=True)
inferred_fractions_stacked.index.set_names('cell_type', level=1, inplace=True)
inferred_fractions_stacked

In [None]:
inferred_and_truth = pd.merge(gt_fractions_stacked, inferred_fractions_stacked, left_index=True, right_index=True)

In [None]:
px.scatter(inferred_and_truth.values)