In [1]:
import json
import os

import pandas as pd

# set up env vars

In [None]:
csx_env_vars = dict(
    CIBERSORTX_INPUT_DATA_DIR=os.path.abspath("/tmp/csx/input"),
    CIBERSORTX_OUTPUT_DATA_DIR=os.path.abspath("/tmp/csx/output"),
    CIBERSORTX_MIXTURES=os.path.expanduser(
        "/mnt/buckets/liulab/csx_example_files/Single_Cell_RNA-Seq_Melanoma_SuppFig_3b-d/mixture_melanoma_Tirosh_SuppFig_3b-d.txt"
    ),
    CIBERSORTX_SIGMATRIX=os.path.expanduser(
        "/mnt/buckets/liulab/csx_example_files/LM22.txt"
    ),
)

os.environ.update(csx_env_vars)

# verify input data

In [None]:
pd.read_csv(
    os.environ["CIBERSORTX_MIXTURES"],
    sep="\t",
    index_col=0,
).sort_index()

In [None]:
pd.read_csv(os.environ["CIBERSORTX_SIGMATRIX"], sep="\t", index_col=0).sort_index()

# run CIBERSORTx

## set up files and folders

In [None]:
!rm -rf $CIBERSORTX_INPUT_DATA_DIR
!rm -rf $CIBERSORTX_OUTPUT_DATA_DIR

!mkdir -p $CIBERSORTX_INPUT_DATA_DIR
!mkdir -p $CIBERSORTX_OUTPUT_DATA_DIR

!rsync \
  $CIBERSORTX_MIXTURES \
  $CIBERSORTX_INPUT_DATA_DIR/mixtures.txt

!rsync \
  $CIBERSORTX_SIGMATRIX \
  $CIBERSORTX_INPUT_DATA_DIR/signature_matrix.txt

!tree -h $CIBERSORTX_INPUT_DATA_DIR

In [None]:
import numpy as np

input_staging_path = os.path.join(
    os.environ["CIBERSORTX_INPUT_DATA_DIR"], "mixtures.txt"
)


def make_more_samples():
    df = pd.read_csv(
        input_staging_path,
        sep="\t",
        index_col=0,
    )
    rng = np.random.default_rng(seed=0)
    noise = rng.exponential(scale=1.0, size=df.values.shape)
    df_with_noise = df + noise
    df_with_noise.rename(columns=lambda name: f"{name}_noisy", inplace=True)
    return pd.concat([df, df_with_noise], axis=1)


make_more_samples().to_csv(input_staging_path, sep="\t")

In [None]:
pd.read_csv(
    input_staging_path,
    sep="\t",
    index_col=0,
).sort_index()

## infer fractions


### run

In [None]:
!docker run \
  --rm \
  -v $CIBERSORTX_INPUT_DATA_DIR:/src/data \
  -v $CIBERSORTX_OUTPUT_DATA_DIR:/src/outdir \
  --user "$(id -u):$(id -g)" \
  cibersortx/fractions:latest \
  --username lyronctk@stanford.edu \
  --token dfeba2c8b9d61daebee5fa87026b8e56 \
  --mixture mixtures.txt \
  --sigmatrix signature_matrix.txt \
  --perm 10 \
  --verbose TRUE
  # --rmbatchSmode TRUE \
  # --sourceGEPs signature_matrix.txt


### check results

In [6]:
!tree -L 2 /mnt/buckets/liulab/csx-runs/20210715_160345/

[01;34m/mnt/buckets/liulab/csx-runs/20210715_160345/[00m
├── [01;34min[00m
│   ├── mymixture.txt
│   └── myrefsample.txt
└── [01;34mout[00m
    ├── CIBERSORTx_Adjusted.txt
    ├── CIBERSORTx_Mixtures_Adjusted.txt
    ├── CIBERSORTx_cell_type_sourceGEP.txt
    ├── CIBERSORTx_myrefsample_inferred_phenoclasses.CIBERSORTx_myrefsample_inferred_refsample.bm.K999.pdf
    ├── CIBERSORTx_myrefsample_inferred_phenoclasses.CIBERSORTx_myrefsample_inferred_refsample.bm.K999.txt
    ├── CIBERSORTx_myrefsample_inferred_phenoclasses.txt
    ├── CIBERSORTx_myrefsample_inferred_refsample.txt
    └── CIBERSORTx_sigmatrix_Adjusted.txt

2 directories, 10 files


In [None]:
fractions_inferred = pd.read_csv(
    os.path.join(os.environ["CIBERSORTX_OUTPUT_DATA_DIR"], "CIBERSORTx_Results.txt"),
    sep="\t",
    index_col=0,
)[0:19]

fractions_inferred["B cells total"] = (
    fractions_inferred["B cells naive"] + fractions_inferred["B cells memory"]
)

fractions_inferred

In [None]:
from pprint import pprint

pprint(list(sorted(fractions_inferred.columns)))

# compare fractions with groundtruth

In [None]:
fractions_groundtruth = pd.read_csv(
    "/mnt/buckets/liulab/csx_example_files/groundtruth_Melanoma_Tirosh_et_al_SuppFig3b-d.txt",
    sep="\t",
    index_col=0,
)

fractions_groundtruth

In [None]:
from pprint import pprint

pprint(list(sorted(fractions_groundtruth.columns)))

In [None]:
import plotly.express as px

In [None]:
fig = px.scatter(
    x=fractions_groundtruth[["T cells CD8", "B cells"]].stack().values,
    y=fractions_inferred[["T cells CD8", "B cells total"]].stack().values,
    color=fractions_inferred[["T cells CD8", "B cells total"]]
    .stack()
    .reset_index()["level_1"],
)
fig.update_layout(
    width=600,
    height=500,
)
fig.update_xaxes(range=(0, 1))
fig.update_yaxes(
    range=(0, 1),
)

## infer GEPs

In [None]:
!docker run \
  --rm \
  -v $CIBERSORTX_INPUT_DATA_DIR:/src/data \
  -v $CIBERSORTX_OUTPUT_DATA_DIR:/src/outdir \
  --user "$(id -u):$(id -g)" \
  cibersortx/gep:latest \
  --username lyronctk@stanford.edu \
  --token dfeba2c8b9d61daebee5fa87026b8e56 \
  --mixture mixtures.txt \
  --sigmatrix signature_matrix.txt \
  --cibresults CIBERSORTx_Results.txt \
  --threads 4


### check results

In [None]:
!ls -lRt $CIBERSORTX_OUTPUT_DATA_DIR

In [None]:
inferred_geps = pd.read_csv(
    os.path.join(
        os.environ["CIBERSORTX_OUTPUT_DATA_DIR"], "CIBERSORTxGEP_GEPs_Filtered.txt"
    ),
    sep="\t",
    index_col=0,
)

In [None]:
inferred_geps  # .clip(upper=100000)

In [None]:
import plotly.graph_objects as go

fig = go.Figure(
    data=go.Heatmap(
        z=np.log(inferred_geps.fillna(0).clip(upper=10000).values + 1),
        x=inferred_geps.columns,
        y=inferred_geps.index,
        hoverongaps=False,
    )
)
fig.show()

# CLI instructions

In [None]:
!docker run \
  --rm \
  -v $CIBERSORTX_INPUT_DATA_DIR:/src/data \
  -v $CIBERSORTX_OUTPUT_DATA_DIR:/src/outdir \
  cibersortx/gep:latest  # image names: gep, fractions, hires

* with batch correction, usually S mode?
* add permutations for significance testing
* QN - FALSE

```
--rmbatchSmode TRUE \
--perm 100 \
```

