In [1]:
import numpy as np
import pandas as pd

In [2]:
np.set_printoptions(
    suppress=True,
#     formatter={'float_kind': '{:0.4f}'.format},
#     linewidth=120
)

# pd.options.display.float_format = '{:,.4f}'.format

In [3]:
rng = np.random.default_rng(seed=0)

In [4]:
s = 101  # number of samples
samples = list(f"sample_{j:03d}" for j in range(s))

In [5]:
c = 2  # number of cell types
cell_types = list(f"cell_{i:02d}" for i in range(c))
cell_types

['cell_00', 'cell_01']

In [6]:
g = 200  # number of genes
genes = list(f"FAKE{i:03d}" for i in range(g))

In [7]:
fractions = pd.DataFrame(
    index=samples
).rename_axis('Mixture')

fractions[cell_types[0]] = np.linspace(1, 0, s)
fractions[cell_types[1]] = 1.0 - fractions[cell_types[0]]

fractions

Unnamed: 0_level_0,cell_00,cell_01
Mixture,Unnamed: 1_level_1,Unnamed: 2_level_1
sample_000,1.00,0.00
sample_001,0.99,0.01
sample_002,0.98,0.02
sample_003,0.97,0.03
sample_004,0.96,0.04
...,...,...
sample_096,0.04,0.96
sample_097,0.03,0.97
sample_098,0.02,0.98
sample_099,0.01,0.99


In [8]:
cell_type_geps = pd.DataFrame(
    rng.uniform(low=20.0, high=200.0, size=(g, c)),
    columns=cell_types,
    index=genes
).rename_axis('GeneSymbol')

cell_type_geps

Unnamed: 0_level_0,cell_00,cell_01
GeneSymbol,Unnamed: 1_level_1,Unnamed: 2_level_1
FAKE000,134.653104,68.561608
FAKE001,27.375234,22.974974
FAKE002,166.388643,184.296004
FAKE003,129.194440,151.309381
FAKE004,117.852498,188.313036
...,...,...
FAKE195,170.649873,156.367385
FAKE196,144.428866,184.335339
FAKE197,168.105284,52.231284
FAKE198,154.680370,35.602638


In [9]:
# compute matmul of (genes, cell types) (cell_types, samples)
# which is......... (cell_type_geps   ) (fractions.T.       )

mixture_noise = rng.uniform(low=-1.0, high=1.0, size=(g, s))

mixtures = cell_type_geps.dot(fractions.T) + mixture_noise

# mixtures.style.format(precision=0)

mixtures

Mixture,sample_000,sample_001,sample_002,sample_003,sample_004,sample_005,sample_006,sample_007,sample_008,sample_009,...,sample_091,sample_092,sample_093,sample_094,sample_095,sample_096,sample_097,sample_098,sample_099,sample_100
GeneSymbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
FAKE000,134.057440,134.867999,132.520827,131.680158,131.655286,132.330018,130.217004,130.688088,128.712011,128.877626,...,74.116245,73.940426,73.412697,72.748695,71.631859,71.336816,71.515893,69.739487,69.908553,67.724256
FAKE001,28.125691,28.214644,26.810957,26.267429,27.165241,26.520646,28.054481,27.862613,27.944543,27.186950,...,24.161946,23.246345,23.793229,23.209244,23.612392,22.785343,23.886713,22.594396,22.031331,23.417306
FAKE002,166.741852,166.881520,167.121620,167.098392,166.335495,167.622419,166.476282,167.007844,167.662989,167.757044,...,183.388056,183.163810,182.590903,183.733440,183.271516,184.545237,183.616237,184.612249,183.146014,184.732446
FAKE003,128.991397,129.413608,129.034390,130.716911,129.478327,130.423360,130.716027,131.459372,130.896968,131.844567,...,149.752246,149.284668,149.184105,149.800987,150.081770,151.415383,151.362803,151.108900,150.476067,151.685210
FAKE004,118.370496,117.707881,119.020682,119.620010,120.811783,121.681632,121.442906,122.724055,124.473677,123.225652,...,182.678208,182.035104,183.331248,184.250409,185.329650,186.376569,186.300436,187.747062,187.281549,188.841676
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FAKE195,169.944376,169.951569,170.067371,170.087322,169.202699,169.479284,170.783015,169.755240,169.905255,170.101374,...,158.625332,157.998984,157.402573,156.320802,157.613435,156.531422,157.601515,157.300847,156.174198,156.561594
FAKE196,144.847848,145.820019,145.228259,145.815023,145.427479,146.067131,147.039130,147.466818,147.075901,147.858507,...,179.766013,180.370093,181.655064,182.808203,182.553507,183.142752,182.937720,184.423022,183.316285,185.123124
FAKE197,167.533250,167.766147,164.959946,163.825281,163.119605,162.640280,160.234174,160.762430,159.310305,157.954253,...,61.797920,62.394482,60.901263,59.104962,58.165690,56.860397,56.138237,54.754392,53.385341,52.133430
FAKE198,155.275945,152.606270,152.766743,151.443956,149.923550,149.539899,148.064437,147.193798,145.384113,144.159819,...,46.248190,45.735166,43.175705,41.974796,40.643691,40.682768,38.904444,37.419579,36.983603,34.785333


In [10]:
(cell_type_geps.values < 0).any()

False

In [11]:
(mixtures.values < 0).any()

False

# set up cibersortx GEP imputation

In [12]:
import os

In [13]:
base_path = os.path.abspath("./5_results")
!rm -r $base_path
os.makedirs(base_path, exist_ok=True)
os.makedirs(os.path.join(base_path, "in"), exist_ok=True)
!tree $base_path

[01;34m/home/jupyter/deconv/5_results[00m
└── [01;34min[00m

1 directory, 0 files


In [14]:
path = os.path.join(base_path, "in", "mixtures.txt")
mixtures.to_csv(
    path,
    sep="\t"
)
print(path)
# !head -5 $path

/home/jupyter/deconv/5_results/in/mixtures.txt


In [15]:
path = os.path.join(base_path, "fractions.txt")
fractions.to_csv(
    path,
    sep="\t"
)
print(path)
!head -5 $path

/home/jupyter/deconv/5_results/fractions.txt
Mixture	cell_00	cell_01
sample_000	1.0	0.0
sample_001	0.99	0.010000000000000009
sample_002	0.98	0.020000000000000018
sample_003	0.97	0.030000000000000027


In [16]:
path = os.path.join(base_path, "in", "cell_type_geps_known.txt")

cell_type_geps.to_csv(
    path,
    sep="\t"
)
print(path)
!head -5 $path

/home/jupyter/deconv/5_results/in/cell_type_geps_known.txt
GeneSymbol	cell_00	cell_01
FAKE000	134.65310371786177	68.56160847749666
FAKE001	27.375234308515044	22.974974395135238
FAKE002	166.38864305604903	184.2960039099899
FAKE003	129.19443963809238	151.3093809771197


In [17]:
!docker run \
    --rm \
    -it \
    -v $base_path/in:/src/data \
    -v $base_path:/src/outdir \
    cibersortx/hires:latest


CIBERSORTx - impute high resolution cell type-specific expression from bulk genomic profiles

For instructions and terms of use, see cibersort.stanford.edu

Usage:
docker run <bind_mounts> cibersortxhires [Options] 

Bind Mounting:
> 2 directories must be bind mounted to be accessed within the docker container: 
    1) Input file dir 
        > Format: -v {dir_path}:/src/data 
        > contains (if applicable): {mixture, sigmatrix, classes, sourceGEPs 
                                     groundtruth, degclasses, qvalues,  
                                     subsetgenes}  
        > if rmbatchBmode or useadjustedmixtures set to FALSE, contains <mixture> 
    2) Outdir 
        > Format: -v {dir_path}:/src/outdir 
        > contains (if applicable): {cibresults, filtered} 
        > if rmbatchBmode and useadjustedmixtures set to TRUE, contains <mixture> 
> Note: Absolute paths required

Authorization Parameters:
--username      <string>  Email used for login to cibersortx.stanford.e

In [18]:
!rm $base_path/CIBERSORT*

!docker run \
    --rm \
    -it \
    -v $base_path/in:/src/data \
    -v $base_path:/src/outdir \
    cibersortx/hires:latest \
    --username lyronctk@stanford.edu \
    --token dfeba2c8b9d61daebee5fa87026b8e56 \
    --username lyronctk@stanford.edu \
    --mixture mixtures.txt \
    --cibresults fractions.txt

'''
    --sigmatrix cell_type_geps_known.txt
'''

!sudo chown -R jupyter:jupyter $base_path

rm: cannot remove '/home/jupyter/deconv/5_results/CIBERSORT*': No such file or directory
>Running CIBERSORTx high-resolution GEP imputation...
>[Options] username: lyronctk@stanford.edu
>[Options] token: dfeba2c8b9d61daebee5fa87026b8e56
>[Options] username: lyronctk@stanford.edu
>[Options] mixture: mixtures.txt
>[Options] cibresults: fractions.txt
>Loaded 101 mixture samples, 200 genes, and 2 cell subsets...
>Window size adaptively set to 8
>Imputing high-resolution cell type GEPs...done.
>Writing output to disk ...done.
>Running time (sec): 3


In [19]:
!tree -h $base_path

[01;34m/home/jupyter/deconv/5_results[00m
├── [ 37K]  CIBERSORTxHiRes_NA_Heatmap_cell_00_Window8.png
├── [ 40K]  CIBERSORTxHiRes_NA_Heatmap_cell_01_Window8.png
├── [429K]  CIBERSORTxHiRes_NA_cell_00_Window8.txt
├── [428K]  CIBERSORTxHiRes_NA_cell_01_Window8.txt
├── [3.2K]  fractions.txt
└── [4.0K]  [01;34min[00m
    ├── [8.8K]  cell_type_geps_known.txt
    └── [367K]  mixtures.txt

1 directory, 7 files


In [20]:
# pd.set_option('display.precision', 3)

In [30]:
pd.read_csv(
    os.path.join(base_path, "CIBERSORTxHiRes_NA_cell_00_Window8.txt"),
    sep="\t",
    index_col=0
).mean(axis=1).to_frame().T

GeneSymbol,FAKE000,FAKE001,FAKE002,FAKE003,FAKE004,FAKE005,FAKE006,FAKE007,FAKE008,FAKE009,...,FAKE190,FAKE191,FAKE192,FAKE193,FAKE194,FAKE195,FAKE196,FAKE197,FAKE198,FAKE199
0,134.801225,27.112119,166.833867,129.893131,118.025047,166.827573,174.160086,151.016928,175.318942,74.165154,...,28.059972,190.357232,173.049113,90.671692,168.468571,169.824184,144.355822,168.104639,155.256798,96.359932


In [22]:
cell_type_geps["cell_00"].to_frame().T

GeneSymbol,FAKE000,FAKE001,FAKE002,FAKE003,FAKE004,FAKE005,FAKE006,FAKE007,FAKE008,FAKE009,...,FAKE190,FAKE191,FAKE192,FAKE193,FAKE194,FAKE195,FAKE196,FAKE197,FAKE198,FAKE199
cell_00,134.653104,27.375234,166.388643,129.19444,117.852498,166.85364,174.33277,151.33798,175.372206,73.94814,...,27.311161,190.104437,173.36942,90.432876,168.320325,170.649873,144.428866,168.105284,154.68037,96.654123
