In [1]:
import os
import shutil
import tempfile

import pandas as pd

from helpers.cell_type_naming import nice_to_weirds, weird_to_nice
from helpers.running_cibersortx.old_running_csx import DockerJob, Experiment, InputFile

In [2]:
experiments_root = "gs://liulab/csx_experiments/cell_type_grouping"
experiment = Experiment(experiments_root, "combine_lymphocytes")

In [3]:
!tree -h {experiment.local_path}/..

/mnt/buckets/liulab/csx_experiments/cell_type_grouping/combine_lymphocytes/.. [error opening dir]

0 directories, 0 files


## set up input files

### make version of sc refsample with t cells combined

In [4]:
original_sc_refsample = InputFile(
    "screfsampletirosh.txt",
    "gs://liulab/csx_example_files/Single_Cell_RNA-Seq_Melanoma_SuppFig_3b-d/scRNA-Seq_reference_melanoma_Tirosh_SuppFig_3b-d.txt",
)

modified_sc_refsample = InputFile(
    "screfsamplemodified.txt", os.path.join(experiments_root, "screfsamplemodified.txt")
)

In [5]:
# not necessary, because i write the entire file when modifying the cell type labels
# !gsutil cp {original_sc_refsample.source_uri} {modified_sc_refsample.source_uri}

In [6]:
# combined_type = "T"
# cell_types_to_combine = ["T CD8", "T CD4"]

combined_type = "Lymphocytes"
cell_types_to_combine = ["T CD8", "T CD4", "B", "NK"]

In [7]:
thing = os.path.dirname(modified_sc_refsample.source_local_path)
os.makedirs(thing, exist_ok=True)
!tree -h {thing}

[01;34m/mnt/buckets/liulab/csx_experiments/cell_type_grouping[00m
├── [   0]  [01;34mcombine_t_cells[00m
│   ├── [3.9K]  CIBERSORTx_Adjusted.txt
│   ├── [854K]  CIBERSORTx_Mixtures_Adjusted.txt
│   ├── [3.4M]  CIBERSORTx_cell_type_sourceGEP.txt
│   ├── [132K]  CIBERSORTx_screfsamplemodified_inferred_phenoclasses.CIBERSORTx_screfsamplemodified_inferred_refsample.bm.K999.pdf
│   ├── [388K]  CIBERSORTx_screfsamplemodified_inferred_phenoclasses.CIBERSORTx_screfsamplemodified_inferred_refsample.bm.K999.txt
│   ├── [ 553]  CIBERSORTx_screfsamplemodified_inferred_phenoclasses.txt
│   ├── [ 16M]  CIBERSORTx_screfsamplemodified_inferred_refsample.txt
│   └── [   0]  [01;34min[00m
│       ├── [6.0M]  mixturestirosh.txt
│       └── [ 88M]  screfsamplemodified.txt
├── [   0]  [01;34mnormal[00m
│   ├── [4.3K]  CIBERSORTx_Adjusted.txt
│   ├── [988K]  CIBERSORTx_Mixtures_Adjusted.txt
│   ├── [3.8M]  CIBERSORTx_cell_type_sourceGEP.txt
│   ├── [172K]  CIBERSORTx_screfsampletirosh_inferred_pheno

In [8]:
with open(original_sc_refsample.source_local_path, "r") as original:
    header = original.readline()
    for cell_type in cell_types_to_combine:
        for weird_name in nice_to_weirds[cell_type]:
            print(
                "replacing",
                header.count(weird_name),
                "occurences of",
                weird_name,
                "with",
                combined_type,
            )
            header = header.replace(weird_name, combined_type)
    with open(modified_sc_refsample.source_local_path, "w") as modified:
        modified.write(header)
        shutil.copyfileobj(original, modified)

replacing 0 occurences of T.CD8 with Lymphocytes
replacing 114 occurences of T cells CD8 with Lymphocytes
replacing 0 occurences of T.CD4 with Lymphocytes
replacing 209 occurences of T cells CD4 with Lymphocytes
replacing 0 occurences of B.cell with Lymphocytes
replacing 65 occurences of B cells with Lymphocytes
replacing 13 occurences of NK cells with Lymphocytes


#### check modified sc refsample...

In [9]:
df_original = pd.read_csv(original_sc_refsample.source_uri, sep="\t", index_col=0)
df_modified = pd.read_csv(
    modified_sc_refsample.source_local_path, sep="\t", index_col=0
)

In [10]:
def get_unique_counts(cols):
    dictionary = {}
    for item in [col.split(".")[0] for col in cols]:
        dictionary[item] = dictionary.get(item, 0) + 1
    return dictionary

In [11]:
get_unique_counts(df_original.columns)

{'Malignant': 242,
 'Endothelial cells': 29,
 'CAF': 25,
 'T cells CD8': 114,
 'NK cells': 13,
 'Macrophages': 40,
 'T cells CD4': 209,
 'B cells': 65}

In [12]:
get_unique_counts(df_modified.columns)

{'Malignant': 242,
 'Endothelial cells': 29,
 'CAF': 25,
 'Lymphocytes': 401,
 'Macrophages': 40}

## configure args

In [13]:
input_file_args = {
    "mixture": InputFile(
        "mixturestirosh.txt",
        "gs://liulab/csx_example_files/Single_Cell_RNA-Seq_Melanoma_SuppFig_3b-d/mixture_melanoma_Tirosh_SuppFig_3b-d.txt",
    ),
    #     "refsample": original_sc_refsample,
    "refsample": modified_sc_refsample,
}

In [14]:
other_args = {
    "rmbatchBmode": "TRUE",
    "single_cell": "TRUE",
}

## configure and run cibersortx

In [15]:
with tempfile.TemporaryDirectory() as temp_dir:
    job = DockerJob(temp_dir, input_file_args, other_args)
    for copy_command in job.make_copy_commands():
        !{copy_command}
    !tree -h {job.path}
    docker_command = job.make_docker_command()
    print(docker_command)
    !{docker_command}
    !tree -h {job.path}
    !gsutil -m rsync -r -d {job.path} {experiment.gcs_uri}

Copying gs://liulab/csx_example_files/Single_Cell_RNA-Seq_Melanoma_SuppFig_3b-d/mixture_melanoma_Tirosh_SuppFig_3b-d.txt...
/ [1 files][  6.0 MiB/  6.0 MiB]                                                
Operation completed over 1 objects/6.0 MiB.                                      
Copying gs://liulab/csx_experiments/cell_type_grouping/screfsamplemodified.txt...
- [1 files][ 87.8 MiB/ 87.8 MiB]                                                
Operation completed over 1 objects/87.8 MiB.                                     
[01;34m/tmp/tmp1u67flvu[00m
└── [4.0K]  [01;34min[00m
    ├── [6.0M]  mixturestirosh.txt
    └── [ 88M]  screfsamplemodified.txt

1 directory, 2 files
docker run \
    --rm \
    -v /tmp/tmp1u67flvu/in:/src/data \
    -v /tmp/tmp1u67flvu:/src/outdir \
    --user "$(id -u):$(id -g)" \
    cibersortx/fractions:latest \
    --username lyronctk@stanford.edu \
    --token dfeba2c8b9d61daebee5fa87026b8e56 \
    --replicates 5 \
    --sampling 0.5 \
    --fraction 0.

In [16]:
!tree -h {experiment.local_path}

[01;34m/mnt/buckets/liulab/csx_experiments/cell_type_grouping/combine_lymphocytes[00m
├── [3.2K]  CIBERSORTx_Adjusted.txt
├── [516K]  CIBERSORTx_Mixtures_Adjusted.txt
├── [2.5M]  CIBERSORTx_cell_type_sourceGEP.txt
├── [ 67K]  CIBERSORTx_screfsamplemodified_inferred_phenoclasses.CIBERSORTx_screfsamplemodified_inferred_refsample.bm.K999.pdf
├── [171K]  CIBERSORTx_screfsamplemodified_inferred_phenoclasses.CIBERSORTx_screfsamplemodified_inferred_refsample.bm.K999.txt
├── [ 306]  CIBERSORTx_screfsamplemodified_inferred_phenoclasses.txt
├── [ 12M]  CIBERSORTx_screfsamplemodified_inferred_refsample.txt
└── [   0]  [01;34min[00m
    ├── [6.0M]  mixturestirosh.txt
    └── [ 88M]  screfsamplemodified.txt

1 directory, 9 files


In [19]:
!gsutil ls -R {experiment.gcs_uri}

gs://liulab/csx_experiments/cell_type_grouping/combine_lymphocytes/:
gs://liulab/csx_experiments/cell_type_grouping/combine_lymphocytes/CIBERSORTx_Adjusted.txt
gs://liulab/csx_experiments/cell_type_grouping/combine_lymphocytes/CIBERSORTx_Mixtures_Adjusted.txt
gs://liulab/csx_experiments/cell_type_grouping/combine_lymphocytes/CIBERSORTx_cell_type_sourceGEP.txt
gs://liulab/csx_experiments/cell_type_grouping/combine_lymphocytes/CIBERSORTx_screfsamplemodified_inferred_phenoclasses.CIBERSORTx_screfsamplemodified_inferred_refsample.bm.K999.pdf
gs://liulab/csx_experiments/cell_type_grouping/combine_lymphocytes/CIBERSORTx_screfsamplemodified_inferred_phenoclasses.CIBERSORTx_screfsamplemodified_inferred_refsample.bm.K999.txt
gs://liulab/csx_experiments/cell_type_grouping/combine_lymphocytes/CIBERSORTx_screfsamplemodified_inferred_phenoclasses.txt
gs://liulab/csx_experiments/cell_type_grouping/combine_lymphocytes/CIBERSORTx_screfsamplemodified_inferred_refsample.txt

gs://liulab/csx_experiments/