In [1]:
import numpy as np
import pandas as pd

In [2]:
# rng = np.random.default_rng(seed=0)

In [3]:
s = 101  # number of samples
samples = list(f"fake_sample_{j:02d}" for j in range(s))

In [4]:
c = 2  # number of cell types
cell_types = list(f"fake_cell_type_{i:02d}" for i in range(c))
cell_types

['fake_cell_type_00', 'fake_cell_type_01']

In [5]:
fractions = pd.DataFrame(
    index=samples
).rename_axis('Mixture')

fractions[cell_types[0]] = np.linspace(1, 0, s)
fractions[cell_types[1]] = 1.0 - fractions[cell_types[0]]

fractions

Unnamed: 0_level_0,fake_cell_type_00,fake_cell_type_01
Mixture,Unnamed: 1_level_1,Unnamed: 2_level_1
fake_sample_00,1.00,0.00
fake_sample_01,0.99,0.01
fake_sample_02,0.98,0.02
fake_sample_03,0.97,0.03
fake_sample_04,0.96,0.04
...,...,...
fake_sample_96,0.04,0.96
fake_sample_97,0.03,0.97
fake_sample_98,0.02,0.98
fake_sample_99,0.01,0.99


In [18]:
g = 1  # number of genes
# genes = list(f"fake_gene_{i:02d}" for i in range(g))
# genes

In [7]:
cell_type_geps = pd.DataFrame(
    columns=cell_types
).rename_axis('GeneSymbol')
# cell_type_geps[cell_types[0]] = [30.0]
# cell_type_geps[cell_types[1]] = [80.0]
cell_type_geps.loc["fake_gene_foo"] = [30.0, 80.0]
cell_type_geps.loc["fake_gene_bar"] = [40.0, 50.0]
cell_type_geps.loc["fake_gene_baz"] = [100.0, 20.0]
cell_type_geps

Unnamed: 0_level_0,fake_cell_type_00,fake_cell_type_01
GeneSymbol,Unnamed: 1_level_1,Unnamed: 2_level_1
fake_gene_foo,30.0,80.0
fake_gene_bar,40.0,50.0
fake_gene_baz,100.0,20.0


In [8]:
# compute matmul of (genes, cell types) (cell_types, samples)
# which is......... (cell_type_geps   ) (fractions.T.       )

mixtures = cell_type_geps.dot(fractions.T)

mixtures

Mixture,fake_sample_00,fake_sample_01,fake_sample_02,fake_sample_03,fake_sample_04,fake_sample_05,fake_sample_06,fake_sample_07,fake_sample_08,fake_sample_09,...,fake_sample_91,fake_sample_92,fake_sample_93,fake_sample_94,fake_sample_95,fake_sample_96,fake_sample_97,fake_sample_98,fake_sample_99,fake_sample_100
GeneSymbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
fake_gene_foo,30.0,30.5,31.0,31.5,32.0,32.5,33.0,33.5,34.0,34.5,...,75.5,76.0,76.5,77.0,77.5,78.0,78.5,79.0,79.5,80.0
fake_gene_bar,40.0,40.1,40.2,40.3,40.4,40.5,40.6,40.7,40.8,40.9,...,49.1,49.2,49.3,49.4,49.5,49.6,49.7,49.8,49.9,50.0
fake_gene_baz,100.0,99.2,98.4,97.6,96.8,96.0,95.2,94.4,93.6,92.8,...,27.2,26.4,25.6,24.8,24.0,23.2,22.4,21.6,20.8,20.0


# set up cibersortx GEP imputation

In [9]:
import os

In [10]:
base_path = os.path.abspath("./5_results")
!rm -r $base_path
os.makedirs(base_path, exist_ok=True)
os.makedirs(os.path.join(base_path, "in"), exist_ok=True)
!tree $base_path

[01;34m/home/jupyter/deconv/5_results[00m
└── [01;34min[00m

1 directory, 0 files


In [11]:
path = os.path.join(base_path, "in", "mixtures.txt")
mixtures.to_csv(
    path,
    sep="\t"
)
print(path)
# !head -5 $path

/home/jupyter/deconv/5_results/in/mixtures.txt


In [12]:
path = os.path.join(base_path, "fractions.txt")
fractions.to_csv(
    path,
    sep="\t"
)
print(path)
!head -5 $path

/home/jupyter/deconv/5_results/fractions.txt
Mixture	fake_cell_type_00	fake_cell_type_01
fake_sample_00	1.0	0.0
fake_sample_01	0.99	0.010000000000000009
fake_sample_02	0.98	0.020000000000000018
fake_sample_03	0.97	0.030000000000000027


In [13]:
path = os.path.join(base_path, "in", "cell_type_geps_known.txt")

cell_type_geps.to_csv(
    path,
    sep="\t"
)
print(path)
!head -5 $path

/home/jupyter/deconv/5_results/in/cell_type_geps_known.txt
GeneSymbol	fake_cell_type_00	fake_cell_type_01
fake_gene_foo	30.0	80.0
fake_gene_bar	40.0	50.0
fake_gene_baz	100.0	20.0


In [14]:
!docker run \
    --rm \
    -it \
    -v $base_path/in:/src/data \
    -v $base_path:/src/outdir \
    cibersortx/hires:latest


CIBERSORTx - impute high resolution cell type-specific expression from bulk genomic profiles

For instructions and terms of use, see cibersort.stanford.edu

Usage:
docker run <bind_mounts> cibersortxhires [Options] 

Bind Mounting:
> 2 directories must be bind mounted to be accessed within the docker container: 
    1) Input file dir 
        > Format: -v {dir_path}:/src/data 
        > contains (if applicable): {mixture, sigmatrix, classes, sourceGEPs 
                                     groundtruth, degclasses, qvalues,  
                                     subsetgenes}  
        > if rmbatchBmode or useadjustedmixtures set to FALSE, contains <mixture> 
    2) Outdir 
        > Format: -v {dir_path}:/src/outdir 
        > contains (if applicable): {cibresults, filtered} 
        > if rmbatchBmode and useadjustedmixtures set to TRUE, contains <mixture> 
> Note: Absolute paths required

Authorization Parameters:
--username      <string>  Email used for login to cibersortx.stanford.e

In [15]:
!rm $base_path/CIBERSORT*

!docker run \
    --rm \
    -it \
    -v $base_path/in:/src/data \
    -v $base_path:/src/outdir \
    cibersortx/hires:latest \
    --username lyronctk@stanford.edu \
    --token dfeba2c8b9d61daebee5fa87026b8e56 \
    --username lyronctk@stanford.edu \
    --mixture mixtures.txt \
    --cibresults fractions.txt

'''
    --sigmatrix cell_type_geps_known.txt
'''

!sudo chown -R jupyter:jupyter $base_path

rm: cannot remove '/home/jupyter/deconv/5_results/CIBERSORT*': No such file or directory
>Running CIBERSORTx high-resolution GEP imputation...
>[Options] username: lyronctk@stanford.edu
>[Options] token: dfeba2c8b9d61daebee5fa87026b8e56
>[Options] username: lyronctk@stanford.edu
>[Options] mixture: mixtures.txt
>[Options] cibresults: fractions.txt
>Loaded 101 mixture samples, 3 genes, and 2 cell subsets...
>Window size adaptively set to 8
>Imputing high-resolution cell type GEPs...done.
>Writing output to disk ...done.
>Running time (sec): 1


In [16]:
!tree -h $base_path

[01;34m/home/jupyter/deconv/5_results[00m
├── [7.5K]  CIBERSORTxHiRes_NA_fake_cell_type_00_Window8.txt
├── [7.5K]  CIBERSORTxHiRes_NA_fake_cell_type_01_Window8.txt
├── [3.7K]  fractions.txt
└── [4.0K]  [01;34min[00m
    ├── [ 120]  cell_type_geps_known.txt
    └── [3.5K]  mixtures.txt

1 directory, 5 files


In [19]:
pd.read_csv(
    os.path.join(base_path, "CIBERSORTxHiRes_NA_fake_cell_type_00_Window8.txt"),
    sep="\t",
    index_col=0
)

Unnamed: 0_level_0,fake_sample_00,fake_sample_01,fake_sample_02,fake_sample_03,fake_sample_04,fake_sample_05,fake_sample_06,fake_sample_07,fake_sample_08,fake_sample_09,...,fake_sample_91,fake_sample_92,fake_sample_93,fake_sample_94,fake_sample_95,fake_sample_96,fake_sample_97,fake_sample_98,fake_sample_99,fake_sample_100
GeneSymbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
fake_gene_foo,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fake_gene_bar,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
fake_gene_baz,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
