# Writing out Compass Files

This notebook generates subsampled scRNA-seq data files for PANC and compass job scripts 

In [2]:
import os
import anndata
import numpy as np
import pandas as pd
import textwrap 
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import gseapy as gp
from gseapy import gseaplot
from gseapy import barplot, dotplot
import scipy
from scipy import io
import scanpy.external as sce

sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)

In [3]:
sc.settings.set_figure_params(dpi=80)
palettecolors = ['rebeccapurple', 'thistle', 'mediumvioletred', 'pink','royalblue', 'lightsteelblue', 'teal','turquoise']

Load previously processed data:

In [1]:
# Checks the current version of env's packages
import pkg_resources
import types
def get_imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            # Split ensures you get root package, 
            # not just imported function
            name = val.__name__.split(".")[0]

        elif isinstance(val, type):
            name = val.__module__.split(".")[0]
            
        # Some packages are weird and have different
        # imported names vs. system/pip names. Unfortunately,
        # there is no systematic way to get pip names from
        # a package's imported name. You'll have to add
        # exceptions to this list manually!
        poorly_named_packages = {
            "PIL": "Pillow",
            "sklearn": "scikit-learn"
        }
        if name in poorly_named_packages.keys():
            name = poorly_named_packages[name]
            
        yield name
imports = list(set(get_imports()))

# The only way I found to get the version of the root package
# from only the name of the package is to cross-check the names 
# of installed packages vs. imported packages
requirements = []
for m in pkg_resources.working_set:
    if m.project_name in imports and m.project_name!="pip":
        requirements.append((m.project_name, m.version))

for r in requirements:
    print("{}=={}".format(*r))


In [5]:
results_file = '/home/dzhang/PANC_scRNAseq/PANC_02.h5ad' 
results_file_g1 = '/home/dzhang/PANC_scRNAseq/PANC_02_g1.h5ad' 
adata=sc.read_h5ad(results_file)
adata.uns['log1p']['base'] = None
adata_g1=sc.read_h5ad(results_file_g1)
adata_g1.uns['log1p']['base'] = None

In [7]:
# Generating bash submission scripts

ANALYSIS_ROOT_DIR = '/home/dzhang/Penalties/'


for model in ['ASPC1', 'MIA', 'PANC1', 'PSN1']:
    for group in ['Parental', 'DTP']: 
        for type in ['MVK', 'ELOVL1', 'ACSL5']: 

            # Initalizing files 
            path_model = 'compass_files/' + model + f"/{group[:3].upper()}/" + model 

            results_path = 'compass_results/HumanGEM/' + f'{type}/' + model + f"/{group[:3].upper()}/" 

            results_path_tmp_model = 'compass_results/HumanGEM/' + f'{type}/' + model + f"/{group[:3].upper()}/tmp" + model

            # Generate file paths 
            path_umap = (path_model +'_umap.tsv').replace(" ", "_")

            path_genes = (path_model +'_genes.tsv').replace(" ", "_")

            path_barcodes = (path_model +'_barcodes.tsv').replace(" ", "_")

            path_metadata = (path_model +'_metadata.tsv').replace(" ", "_")

            path_matrix = (path_model +'_matrix.mtx').replace(" ", "_")

            path_penalty = "penalty_files/HumanGEM/" + f'{model}/' + f"{group[:3].upper()}/{type}_penalties.tsv" 

            script_dir = os.path.join(ANALYSIS_ROOT_DIR, f'compass_scripts/HumanGEM/{type}/')
            os.makedirs(script_dir, exist_ok=True)

            script_filename = (model + '_' + group + '.sh').replace(" ", "_")
            script_path = os.path.join(script_dir, script_filename)

            with open(script_path, 'w') as script_file:
                script_file.write("#!/bin/bash\n")
                script_file.write("#SBATCH --job-name=" + f"{model}_{group}_{type}_" + "compass_job\n")
                script_file.write("#SBATCH --output=" + f"{model}_{group}_{type}_" + "compass_job.out\n")
                script_file.write("#SBATCH --time=10-00:00:00\n")
                script_file.write("#SBATCH --cpus-per-task=12\n")
                script_file.write("#SBATCH --mem-per-cpu=16G\n")
                script_file.write("#SBATCH --mail-user=dzhang@calicolabs.com\n")
                script_file.write("#SBATCH --mail-type=ALL\n")

                script_file.write("__conda_setup=\"$('/home/dzhang/miniconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)\"\n")
                script_file.write("if [ $? -eq 0 ]; then\n")
                script_file.write("    eval \"$__conda_setup\"\n")
                script_file.write("else\n")
                script_file.write("    if [ -f \"/home/dzhang/miniconda3/etc/profile.d/conda.sh\" ]; then\n")
                script_file.write("        . \"/home/dzhang/miniconda3/etc/profile.d/conda.sh\"\n")
                script_file.write("    else\n")
                script_file.write("        export PATH=\"/home/dzhang/miniconda3/bin:$PATH\"\n")
                script_file.write("    fi\n")
                script_file.write("fi\n")
                script_file.write("unset __conda_setup\n")
                script_file.write("conda activate compass_test\n")

                script_file.write(textwrap.dedent(f'''
                compass --data-mtx {ANALYSIS_ROOT_DIR}{path_matrix} \
                {ANALYSIS_ROOT_DIR}{path_genes} \
                {ANALYSIS_ROOT_DIR}{path_barcodes} \
                --model HumanGEM --num-processes 20 \
                --latent-space {ANALYSIS_ROOT_DIR}{path_umap} \
                --species homo_sapiens --lambda 0.25 --calc-metabolites \
                --penalties-file {ANALYSIS_ROOT_DIR}{path_penalty} \
                --output-dir {ANALYSIS_ROOT_DIR}{results_path} \
                --temp-dir {ANALYSIS_ROOT_DIR}{results_path_tmp_model}
                '''))

            print(f"Created bash script: {script_filename}")

Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_DTP.sh
Created bash script: ASPC1_DTP.sh
Created bash script: ASPC1_DTP.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_DTP.sh
Created bash script: MIA_DTP.sh
Created bash script: MIA_DTP.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_DTP.sh
Created bash script: PANC1_DTP.sh
Created bash script: PANC1_DTP.sh
Created bash script: PSN1_Parental.sh
Created bash script: PSN1_Parental.sh
Created bash script: PSN1_Parental.sh
Created bash script: PSN1_DTP.sh
Created bash script: PSN1_DTP.sh
Created bash script: PSN1_DTP.sh


In [5]:
# Generating bash submission scripts

ANALYSIS_ROOT_DIR = '/home/dzhang/Penalties/'


for model in ['ASPC1', 'MIA', 'PANC1', 'PSN1']:
    for group in ['Parental', 'DTP']: 
        for type in ['HumanGEM', 'RECON']: 

            # Initalizing files 
            path_model = 'compass_files/' + model + f"/{group[:3].upper()}/" + model 

            results_path = 'compass_results/' + f'{type}/' + model + f"/{group[:3].upper()}/" 

            results_path_tmp_model = 'compass_results/' + f'{type}/' + model + f"/{group[:3].upper()}/tmp" + model

            # Generate file paths 
            path_umap = (path_model +'_umap.tsv').replace(" ", "_")

            path_genes = (path_model +'_genes.tsv').replace(" ", "_")

            path_barcodes = (path_model +'_barcodes.tsv').replace(" ", "_")

            path_metadata = (path_model +'_metadata.tsv').replace(" ", "_")

            path_matrix = (path_model +'_matrix.mtx').replace(" ", "_")

            path_penalty = "penalty_files/" + f'{type}/' + f'{model}/' + f"{group[:3].upper()}/new_penalties.tsv" 

            script_dir = os.path.join(ANALYSIS_ROOT_DIR, f'compass_scripts/{type}/')
            os.makedirs(script_dir, exist_ok=True)

            script_filename = (model + '_' + group + '.sh').replace(" ", "_")
            script_path = os.path.join(script_dir, script_filename)

            with open(script_path, 'w') as script_file:
                script_file.write("#!/bin/bash\n")
                script_file.write("#SBATCH --job-name=" + f"{model}_{group}_{type}_" + "compass_job\n")
                script_file.write("#SBATCH --output=" + f"{model}_{group}_{type}_" + "compass_job.out\n")
                script_file.write("#SBATCH --time=10-00:00:00\n")
                script_file.write("#SBATCH --cpus-per-task=12\n")
                script_file.write("#SBATCH --mem-per-cpu=16G\n")
                script_file.write("#SBATCH --mail-user=dzhang@calicolabs.com\n")
                script_file.write("#SBATCH --mail-type=ALL\n")

                script_file.write("__conda_setup=\"$('/home/dzhang/miniconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)\"\n")
                script_file.write("if [ $? -eq 0 ]; then\n")
                script_file.write("    eval \"$__conda_setup\"\n")
                script_file.write("else\n")
                script_file.write("    if [ -f \"/home/dzhang/miniconda3/etc/profile.d/conda.sh\" ]; then\n")
                script_file.write("        . \"/home/dzhang/miniconda3/etc/profile.d/conda.sh\"\n")
                script_file.write("    else\n")
                script_file.write("        export PATH=\"/home/dzhang/miniconda3/bin:$PATH\"\n")
                script_file.write("    fi\n")
                script_file.write("fi\n")
                script_file.write("unset __conda_setup\n")
                script_file.write("conda activate compass_test\n")

                if type == "HumanGEM":
                    script_file.write(textwrap.dedent(f'''
                    compass --data-mtx {ANALYSIS_ROOT_DIR}{path_matrix} \
                    {ANALYSIS_ROOT_DIR}{path_genes} \
                    {ANALYSIS_ROOT_DIR}{path_barcodes} \
                    --model HumanGEM --num-processes 20 \
                    --latent-space {ANALYSIS_ROOT_DIR}{path_umap} \
                    --species homo_sapiens --lambda 0.25 --calc-metabolites \
                    --penalties-file {ANALYSIS_ROOT_DIR}{path_penalty} \
                    --output-dir {ANALYSIS_ROOT_DIR}{results_path} \
                    --temp-dir {ANALYSIS_ROOT_DIR}{results_path_tmp_model}
                    '''))
                else:
                    script_file.write(textwrap.dedent(f'''
                    compass --data-mtx {ANALYSIS_ROOT_DIR}{path_matrix} \
                    {ANALYSIS_ROOT_DIR}{path_genes} \
                    {ANALYSIS_ROOT_DIR}{path_barcodes} \
                    --num-processes 20 \
                    --latent-space {ANALYSIS_ROOT_DIR}{path_umap} \
                    --species homo_sapiens --lambda 0.25 --calc-metabolites \
                    --penalties-file {ANALYSIS_ROOT_DIR}{path_penalty} \
                    --output-dir {ANALYSIS_ROOT_DIR}{results_path} \
                    --temp-dir {ANALYSIS_ROOT_DIR}{results_path_tmp_model}
                    '''))

            print(f"Created bash script: {script_filename}")

Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_DTP.sh
Created bash script: ASPC1_DTP.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_DTP.sh
Created bash script: MIA_DTP.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_DTP.sh
Created bash script: PANC1_DTP.sh
Created bash script: PSN1_Parental.sh
Created bash script: PSN1_Parental.sh
Created bash script: PSN1_DTP.sh
Created bash script: PSN1_DTP.sh


In [6]:
# Generating bash submission scripts for subsystems

# Generating bash submission scripts

ANALYSIS_ROOT_DIR = '/home/dzhang/Compass_Results/PANC_scRNAseq/'

for model in ['ASPC1', 'MIA', 'PANC1', 'PSN1']:
    for group in ['Parental', 'DTP']: 
        for type in ['HumanGEM', 'RECON']: 
            # Initalizing files 
            path_model = 'compass_files/' + model + f"/{group[:3].upper()}/" + model 

            results_path = 'compass_results/subsystems/' + type + f"/{model}" + f"/{group[:3].upper()}/" 

            results_path_tmp_model = 'compass_results/subsystems/' + type + f"/{model}" + f"/{group[:3].upper()}/tmp" + model

            # Generate file paths 
            path_umap = (path_model +'_umap.tsv').replace(" ", "_")

            path_genes = (path_model +'_genes.tsv').replace(" ", "_")

            path_barcodes = (path_model +'_barcodes.tsv').replace(" ", "_")

            path_metadata = (path_model +'_metadata.tsv').replace(" ", "_")

            path_matrix = (path_model +'_matrix.mtx').replace(" ", "_")

            script_dir = os.path.join(ANALYSIS_ROOT_DIR, f'compass_scripts/subsystems/{type}/')
            os.makedirs(script_dir, exist_ok=True)

            script_filename = (model + '_' + group + '.sh').replace(" ", "_")
            script_path = os.path.join(script_dir, script_filename)

            with open(script_path, 'w') as script_file:
                script_file.write("#!/bin/bash\n")
                script_file.write("#SBATCH --job-name=" + f"{model}_{group}_{type}_" + "subsystem_compass_job\n")
                script_file.write("#SBATCH --output=" + f"{model}_{group}_{type}_" + "subsystem_compass_job.out\n")
                script_file.write("#SBATCH --time=10-00:00:00\n")
                script_file.write("#SBATCH --cpus-per-task=12\n")
                script_file.write("#SBATCH --mem-per-cpu=16G\n")
                script_file.write("#SBATCH --mail-user=dzhang@calicolabs.com\n")
                script_file.write("#SBATCH --mail-type=ALL\n")

                script_file.write("__conda_setup=\"$('/home/dzhang/miniconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)\"\n")
                script_file.write("if [ $? -eq 0 ]; then\n")
                script_file.write("    eval \"$__conda_setup\"\n")
                script_file.write("else\n")
                script_file.write("    if [ -f \"/home/dzhang/miniconda3/etc/profile.d/conda.sh\" ]; then\n")
                script_file.write("        . \"/home/dzhang/miniconda3/etc/profile.d/conda.sh\"\n")
                script_file.write("    else\n")
                script_file.write("        export PATH=\"/home/dzhang/miniconda3/bin:$PATH\"\n")
                script_file.write("    fi\n")
                script_file.write("fi\n")
                script_file.write("unset __conda_setup\n")
                script_file.write("conda activate compass_test\n")

                if type == "RECON": 
                    subsystem_path = '/home/dzhang/Compass_Results/PANC_scRNAseq/compass_scripts/subsystems/RECON/RECON_subsystems.txt' 
                elif type == "HumanGEM":
                    subsystem_path = '/home/dzhang/Compass_Results/PANC_scRNAseq/compass_scripts/subsystems/HumanGEM/HG_subsystems.txt' 
                
                script_file.write(textwrap.dedent(f'''
                compass --data-mtx {ANALYSIS_ROOT_DIR}{path_matrix} \
                {ANALYSIS_ROOT_DIR}{path_genes} \
                {ANALYSIS_ROOT_DIR}{path_barcodes} \
                --model HumanGEM --num-processes 20 \
                --latent-space {ANALYSIS_ROOT_DIR}{path_umap} \
                --species homo_sapiens --lambda 0.25 --calc-metabolites \
                --select-subsystems {subsystem_path} \
                --output-dir {ANALYSIS_ROOT_DIR}{results_path} \
                --temp-dir {ANALYSIS_ROOT_DIR}{results_path_tmp_model}
                '''))

            print(f"Created bash script: {script_filename}")

Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_DTP.sh
Created bash script: ASPC1_DTP.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_DTP.sh
Created bash script: MIA_DTP.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_DTP.sh
Created bash script: PANC1_DTP.sh
Created bash script: PSN1_Parental.sh
Created bash script: PSN1_Parental.sh
Created bash script: PSN1_DTP.sh
Created bash script: PSN1_DTP.sh


In [10]:
# Generating bash submission scripts

ANALYSIS_ROOT_DIR = '/home/dzhang/Compass_Results/PANC_scRNAseq/'

KO_genes = ["SMPD1", "MVD", "PMVK", "ACSL5", "ELOVL1"]

for model in ['ASPC1', 'MIA', 'PANC1', 'PSN1']:
    for group in ['Parental', 'DTP']: 
        for gene in KO_genes: 

            # Initalizing files 
            path_model = 'compass_files/' + model + f"/{group[:3].upper()}/" + model 

            results_path = 'compass_results/KO_tests/' + gene + f"/{model}" + f"/{group[:3].upper()}/" 

            results_path_tmp_model = 'compass_results/KO_tests/' + gene + f"/{model}" + f"/{group[:3].upper()}/tmp" + model

            # Generate file paths 
            path_umap = (path_model +'_umap.tsv').replace(" ", "_")

            path_genes = (path_model +'_genes.tsv').replace(" ", "_")

            path_barcodes = (path_model +'_barcodes.tsv').replace(" ", "_")

            path_metadata = (path_model +'_metadata.tsv').replace(" ", "_")

            path_matrix = (path_model +'_matrix.mtx').replace(" ", "_")

            script_dir = os.path.join(ANALYSIS_ROOT_DIR, f'compass_scripts/KO_tests/{gene}/')
            os.makedirs(script_dir, exist_ok=True)

            script_filename = (model + '_' + group + '.sh').replace(" ", "_")
            script_path = os.path.join(script_dir, script_filename)

            with open(script_path, 'w') as script_file:
                script_file.write("#!/bin/bash\n")
                script_file.write("#SBATCH --job-name=" + f"{model}_{group}_{gene}_" + "compass_job\n")
                script_file.write("#SBATCH --output=" + f"{model}_{group}_{gene}_" + "compass_job.out\n")
                script_file.write("#SBATCH --time=10-00:00:00\n")
                script_file.write("#SBATCH --cpus-per-task=12\n")
                script_file.write("#SBATCH --mem-per-cpu=16G\n")
                script_file.write("#SBATCH --mail-user=dzhang@calicolabs.com\n")
                script_file.write("#SBATCH --mail-type=ALL\n")

                script_file.write("__conda_setup=\"$('/home/dzhang/miniconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)\"\n")
                script_file.write("if [ $? -eq 0 ]; then\n")
                script_file.write("    eval \"$__conda_setup\"\n")
                script_file.write("else\n")
                script_file.write("    if [ -f \"/home/dzhang/miniconda3/etc/profile.d/conda.sh\" ]; then\n")
                script_file.write("        . \"/home/dzhang/miniconda3/etc/profile.d/conda.sh\"\n")
                script_file.write("    else\n")
                script_file.write("        export PATH=\"/home/dzhang/miniconda3/bin:$PATH\"\n")
                script_file.write("    fi\n")
                script_file.write("fi\n")
                script_file.write("unset __conda_setup\n")
                script_file.write("conda activate compass\n")
 
                script_file.write(textwrap.dedent(f'''
                compass --data-mtx {ANALYSIS_ROOT_DIR}{path_matrix} \
                {ANALYSIS_ROOT_DIR}{path_genes} \
                {ANALYSIS_ROOT_DIR}{path_barcodes} \
                --model HumanGEM --num-processes 20 \
                --latent-space {ANALYSIS_ROOT_DIR}{path_umap} \
                --species homo_sapiens --lambda 0.25 --calc-metabolites \
                --knockout {gene} --knockout-type both \
                --output-dir {ANALYSIS_ROOT_DIR}{results_path} \
                --temp-dir {ANALYSIS_ROOT_DIR}{results_path_tmp_model}
                '''))

            print(f"Created bash script: {script_filename}")

Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_DTP.sh
Created bash script: ASPC1_DTP.sh
Created bash script: ASPC1_DTP.sh
Created bash script: ASPC1_DTP.sh
Created bash script: ASPC1_DTP.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_DTP.sh
Created bash script: MIA_DTP.sh
Created bash script: MIA_DTP.sh
Created bash script: MIA_DTP.sh
Created bash script: MIA_DTP.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_DTP.sh
Created bash script: PANC1_DTP.sh
Created bash script: PANC1_

In [11]:
# Generating bash submission scripts for MVK - three KO approaches 

ANALYSIS_ROOT_DIR = '/home/dzhang/Compass_Results/PANC_scRNAseq/'

KO_genes = ["MVK"]

for model in ['ASPC1', 'MIA', 'PANC1', 'PSN1']:
    for group in ['Parental', 'DTP']: 
        for gene in KO_genes: 
            for KO in ['flux', 'exp', 'both']: 
                # Initalizing files 
                path_model = 'compass_files/' + model + f"/{group[:3].upper()}/" + model 

                results_path = 'compass_results/KO_tests/' + gene + f"/{KO}" + f"/{model}" + f"/{group[:3].upper()}/" 

                results_path_tmp_model = 'compass_results/KO_tests/' + gene + f"/{KO}" + f"/{model}" + f"/{group[:3].upper()}/tmp" + model

                # Generate file paths 
                path_umap = (path_model +'_umap.tsv').replace(" ", "_")

                path_genes = (path_model +'_genes.tsv').replace(" ", "_")

                path_barcodes = (path_model +'_barcodes.tsv').replace(" ", "_")

                path_metadata = (path_model +'_metadata.tsv').replace(" ", "_")

                path_matrix = (path_model +'_matrix.mtx').replace(" ", "_")

                script_dir = os.path.join(ANALYSIS_ROOT_DIR, f'compass_scripts/KO_tests/{gene}/{KO}/')
                os.makedirs(script_dir, exist_ok=True)

                script_filename = (model + '_' + group + '.sh').replace(" ", "_")
                script_path = os.path.join(script_dir, script_filename)

                with open(script_path, 'w') as script_file:
                    script_file.write("#!/bin/bash\n")
                    script_file.write("#SBATCH --job-name=" + f"{model}_{group}_{gene}_" + "compass_job\n")
                    script_file.write("#SBATCH --output=" + f"{model}_{group}_{gene}_" + "compass_job.out\n")
                    script_file.write("#SBATCH --time=10-00:00:00\n")
                    script_file.write("#SBATCH --cpus-per-task=12\n")
                    script_file.write("#SBATCH --mem-per-cpu=16G\n")
                    script_file.write("#SBATCH --mail-user=dzhang@calicolabs.com\n")
                    script_file.write("#SBATCH --mail-type=ALL\n")

                    script_file.write("__conda_setup=\"$('/home/dzhang/miniconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)\"\n")
                    script_file.write("if [ $? -eq 0 ]; then\n")
                    script_file.write("    eval \"$__conda_setup\"\n")
                    script_file.write("else\n")
                    script_file.write("    if [ -f \"/home/dzhang/miniconda3/etc/profile.d/conda.sh\" ]; then\n")
                    script_file.write("        . \"/home/dzhang/miniconda3/etc/profile.d/conda.sh\"\n")
                    script_file.write("    else\n")
                    script_file.write("        export PATH=\"/home/dzhang/miniconda3/bin:$PATH\"\n")
                    script_file.write("    fi\n")
                    script_file.write("fi\n")
                    script_file.write("unset __conda_setup\n")
                    script_file.write("conda activate compass\n")

                    script_file.write(textwrap.dedent(f'''
                    compass --data-mtx {ANALYSIS_ROOT_DIR}{path_matrix} \
                    {ANALYSIS_ROOT_DIR}{path_genes} \
                    {ANALYSIS_ROOT_DIR}{path_barcodes} \
                    --model HumanGEM --num-processes 20 \
                    --latent-space {ANALYSIS_ROOT_DIR}{path_umap} \
                    --species homo_sapiens --lambda 0.25 --calc-metabolites \
                    --knockout {gene} --knockout-type {KO} \
                    --output-dir {ANALYSIS_ROOT_DIR}{results_path} \
                    --temp-dir {ANALYSIS_ROOT_DIR}{results_path_tmp_model}
                    '''))

                print(f"Created bash script: {script_filename}")

Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_Parental.sh
Created bash script: ASPC1_DTP.sh
Created bash script: ASPC1_DTP.sh
Created bash script: ASPC1_DTP.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_Parental.sh
Created bash script: MIA_DTP.sh
Created bash script: MIA_DTP.sh
Created bash script: MIA_DTP.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_Parental.sh
Created bash script: PANC1_DTP.sh
Created bash script: PANC1_DTP.sh
Created bash script: PANC1_DTP.sh
Created bash script: PSN1_Parental.sh
Created bash script: PSN1_Parental.sh
Created bash script: PSN1_Parental.sh
Created bash script: PSN1_DTP.sh
Created bash script: PSN1_DTP.sh
Created bash script: PSN1_DTP.sh


### Sampling Proportionally and generate bash scripts

In [6]:
cellcycle_df = pd.DataFrame()
for model in ['ASPC1', 'MIA', 'PANC1','PSN1']:
    # Separate DTP and Parental for each model
    Group1 = adata[adata.obs['Model'].isin([model]) & adata.obs['DTP'].isin(['DTP'])].copy()
    Group2 = adata[adata.obs['Model'].isin([model]) & adata.obs['DTP'].isin(['Parental'])].copy()
    
    cellcycle_df[model+'_DTP'] = Group1.obs['phase'].value_counts(normalize=True)
    cellcycle_df[model+'_Parental'] = Group2.obs['phase'].value_counts(normalize=True)

In [7]:
cell_number = cellcycle_df.multiply(50).astype(int)

In [8]:
cell_number

Unnamed: 0_level_0,ASPC1_DTP,ASPC1_Parental,MIA_DTP,MIA_Parental,PANC1_DTP,PANC1_Parental,PSN1_DTP,PSN1_Parental
phase,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
G1,38,33,45,32,22,22,33,3
G2M,6,9,2,3,9,16,7,31
S,5,6,2,13,18,11,9,15


In [44]:
# Function find the appropriate model file to use for custom jobs 
import re
PATH_TO_MODELS = "/home/dzhang/Compass_Integration/model_xml/0_1_models" 

def find_file(model, group): 
    for root, dirs, files in os.walk(PATH_TO_MODELS): 
        pattern = re.compile(f"{model}.*_{group}.*\.xml")
        name = filter(pattern.match, files) 
        return list(name)

In [45]:
# Generating subsample scRNA seq files and bash submission scripts

ANALYSIS_ROOT_DIR = '/home/dzhang/PANC_scRNAseq/'

cellcycle_ss_df = pd.DataFrame()
for model in ['ASPC1', 'MIA', 'PANC1', 'PSN1']:
    for group in ['Parental', 'DTP']: 
        Group = adata[adata.obs['Model'].isin([model]) & adata.obs['DTP'].isin([group])]

        for phase in Group.obs.phase.cat.categories:
            if Group[Group.obs['phase'].isin(['G1'])].shape[0]>=50:
                Group_G1_ss = sc.pp.subsample(Group[Group.obs['phase'].isin(['G1'])], n_obs = cell_number[model+'_Parental']['G1'], copy = True)
            
                Group_S_ss = sc.pp.subsample(Group[Group.obs['phase'].isin(['S'])], n_obs = cell_number[model+'_Parental']['S'], copy = True)
            
                Group_G2M_ss = sc.pp.subsample(Group[Group.obs['phase'].isin(['G2M'])], n_obs = cell_number[model+'_Parental']['G2M'], copy = True)
                
                Group_ss = anndata.concat([Group_G1_ss, Group_S_ss, Group_G2M_ss], axis=0)

        # Number of cells/phase 
        print(model), print(group, Group_ss.shape[0]), 
        display(Group_ss.obs['phase'].value_counts())
        cellcycle_ss_df[model+'_DTP'] = Group_ss.obs['phase'].value_counts(normalize=True)
    
        # Initalizing files 
        path_model = 'compass_files/' + model + f"/{group[:3].upper()}/" + model 
        
        results_path = 'compass_results/' + model + f"/{group[:3].upper()}/" 
        
        results_path_tmp_model = 'compass_results/' + model + f"/{group[:3].upper()}/tmp" + model

        subset = Group_ss 
        
        # Generate file paths 
        path_umap = (path_model +'_umap.tsv').replace(" ", "_")
        
        path_genes = (path_model +'_genes.tsv').replace(" ", "_")
        
        path_barcodes = (path_model +'_barcodes.tsv').replace(" ", "_")
        
        path_metadata = (path_model +'_metadata.tsv').replace(" ", "_")
        
        path_matrix = (path_model +'_matrix.mtx').replace(" ", "_")
    
        # Write to files 
        pd.DataFrame(subset.obsm["X_umap"], index=subset.obs_names).to_csv(os.path.join(ANALYSIS_ROOT_DIR, path_umap), sep = "\t", index = True, header = True) 
        pd.DataFrame(subset.raw.var.index).to_csv(os.path.join(ANALYSIS_ROOT_DIR, path_genes), sep = "\t", index = False, header = False)
        pd.DataFrame(subset.obs.index).to_csv(os.path.join(ANALYSIS_ROOT_DIR, path_barcodes), sep = "\t", index = False, header = False)
        subset.obs.to_csv(os.path.join(ANALYSIS_ROOT_DIR, path_metadata), sep = "\t", index = True)
        scipy.io.mmwrite(os.path.join(ANALYSIS_ROOT_DIR, path_matrix), subset.raw.X.T)
    
        # Generate both HumanGEM and custom HumanGEM model scripts - each cell line, each condition 

        for type in ['full', 'custom']: 
            script_dir = os.path.join(ANALYSIS_ROOT_DIR, f'compass_scripts/{type}/')
            os.makedirs(script_dir, exist_ok=True)
            
            script_filename = (model + '_' + group + '.sh').replace(" ", "_")
            script_path = os.path.join(script_dir, script_filename)
            
            with open(script_path, 'w') as script_file:
                script_file.write("#!/bin/bash\n")
                script_file.write("#SBATCH --job-name=" + f"{model}_{group}_{type}_" + "compass_job\n")
                script_file.write("#SBATCH --output=" + f"{model}_{group}_{type}_" + "compass_job.out\n")
                script_file.write("#SBATCH --time=10-00:00:00\n")
                script_file.write("#SBATCH --cpus-per-task=12\n")
                script_file.write("#SBATCH --mem-per-cpu=16G\n")
                script_file.write("#SBATCH --mail-user=dzhang@calicolabs.com\n")
                script_file.write("#SBATCH --mail-type=ALL\n")
    
                script_file.write("__conda_setup=\"$('/home/dzhang/miniconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)\"\n")
                script_file.write("if [ $? -eq 0 ]; then\n")
                script_file.write("    eval \"$__conda_setup\"\n")
                script_file.write("else\n")
                script_file.write("    if [ -f \"/home/dzhang/miniconda3/etc/profile.d/conda.sh\" ]; then\n")
                script_file.write("        . \"/home/dzhang/miniconda3/etc/profile.d/conda.sh\"\n")
                script_file.write("    else\n")
                script_file.write("        export PATH=\"/home/dzhang/miniconda3/bin:$PATH\"\n")
                script_file.write("    fi\n")
                script_file.write("fi\n")
                script_file.write("unset __conda_setup\n")
                script_file.write("conda activate compass_test\n")

                if type == 'custom': 
                    # Find appropriate model file path given model, group 
                    model_file = find_file(model, group[:3].upper())[0]
                    
                    script_file.write(textwrap.dedent(f'''
                    compass --data-mtx /home/dzhang/PANC_scRNAseq/{path_matrix} \
                    /home/dzhang/PANC_scRNAseq/{path_genes} \
                    /home/dzhang/PANC_scRNAseq/{path_barcodes} \
                    --model HumanGEM --num-processes 15 \
                    --custom /home/dzhang/Compass_Integration/model_xml/0_1_models/{model_file} \
                    --latent-space /home/dzhang/PANC_scRNAseq/{path_umap} \
                    --species homo_sapiens --lambda 0.25 --calc-metabolites \
                    --output-dir /home/dzhang/PANC_scRNAseq/{results_path} \
                    --temp-dir /home/dzhang/PANC_scRNAseq/{results_path_tmp_model}
                    '''))
                elif type == "full": 
                    script_file.write(textwrap.dedent(f'''
                    compass --data-mtx /home/dzhang/PANC_scRNAseq/{path_matrix} \
                    /home/dzhang/PANC_scRNAseq/{path_genes} \
                    /home/dzhang/PANC_scRNAseq/{path_barcodes} \
                    --model HumanGEM --num-processes 15 \
                    --latent-space /home/dzhang/PANC_scRNAseq/{path_umap} \
                    --species homo_sapiens --lambda 0.25 --calc-metabolites \
                    --output-dir /home/dzhang/PANC_scRNAseq/{results_path} \
                    --temp-dir /home/dzhang/PANC_scRNAseq/{results_path_tmp_model}
                    '''))
                
            print(f"Created bash script: {script_filename}")

ASPC1
Parental 48


phase
G1     33
G2M     9
S       6
Name: count, dtype: int64

Created bash script: ASPC1_Parental.sh
['PANC1_PAR_0_75.xml', 'PSN1_DTP_0_75.xml', 'MIAPaca_PAR_0_75.xml', 'PANC1_DTP_0_75.xml', 'MIAPaca_DTP_0_75.xml', 'PSN1_PAR_0_75.xml', 'ASPC1_DTP_0_75.xml', 'ASPC1_PAR_0_75.xml']
Created bash script: ASPC1_Parental.sh
ASPC1
DTP 48


phase
G1     33
G2M     9
S       6
Name: count, dtype: int64

Created bash script: ASPC1_DTP.sh
['PANC1_PAR_0_75.xml', 'PSN1_DTP_0_75.xml', 'MIAPaca_PAR_0_75.xml', 'PANC1_DTP_0_75.xml', 'MIAPaca_DTP_0_75.xml', 'PSN1_PAR_0_75.xml', 'ASPC1_DTP_0_75.xml', 'ASPC1_PAR_0_75.xml']
Created bash script: ASPC1_DTP.sh
MIA
Parental 48


phase
G1     32
S      13
G2M     3
Name: count, dtype: int64

Created bash script: MIA_Parental.sh
['PANC1_PAR_0_75.xml', 'PSN1_DTP_0_75.xml', 'MIAPaca_PAR_0_75.xml', 'PANC1_DTP_0_75.xml', 'MIAPaca_DTP_0_75.xml', 'PSN1_PAR_0_75.xml', 'ASPC1_DTP_0_75.xml', 'ASPC1_PAR_0_75.xml']
Created bash script: MIA_Parental.sh
MIA
DTP 48


phase
G1     32
S      13
G2M     3
Name: count, dtype: int64

Created bash script: MIA_DTP.sh
['PANC1_PAR_0_75.xml', 'PSN1_DTP_0_75.xml', 'MIAPaca_PAR_0_75.xml', 'PANC1_DTP_0_75.xml', 'MIAPaca_DTP_0_75.xml', 'PSN1_PAR_0_75.xml', 'ASPC1_DTP_0_75.xml', 'ASPC1_PAR_0_75.xml']
Created bash script: MIA_DTP.sh
PANC1
Parental 49


phase
G1     22
G2M    16
S      11
Name: count, dtype: int64

Created bash script: PANC1_Parental.sh
['PANC1_PAR_0_75.xml', 'PSN1_DTP_0_75.xml', 'MIAPaca_PAR_0_75.xml', 'PANC1_DTP_0_75.xml', 'MIAPaca_DTP_0_75.xml', 'PSN1_PAR_0_75.xml', 'ASPC1_DTP_0_75.xml', 'ASPC1_PAR_0_75.xml']
Created bash script: PANC1_Parental.sh
PANC1
DTP 49


phase
G1     22
G2M    16
S      11
Name: count, dtype: int64

Created bash script: PANC1_DTP.sh
['PANC1_PAR_0_75.xml', 'PSN1_DTP_0_75.xml', 'MIAPaca_PAR_0_75.xml', 'PANC1_DTP_0_75.xml', 'MIAPaca_DTP_0_75.xml', 'PSN1_PAR_0_75.xml', 'ASPC1_DTP_0_75.xml', 'ASPC1_PAR_0_75.xml']
Created bash script: PANC1_DTP.sh
PSN1
Parental 49


phase
G2M    31
S      15
G1      3
Name: count, dtype: int64

Created bash script: PSN1_Parental.sh
['PANC1_PAR_0_75.xml', 'PSN1_DTP_0_75.xml', 'MIAPaca_PAR_0_75.xml', 'PANC1_DTP_0_75.xml', 'MIAPaca_DTP_0_75.xml', 'PSN1_PAR_0_75.xml', 'ASPC1_DTP_0_75.xml', 'ASPC1_PAR_0_75.xml']
Created bash script: PSN1_Parental.sh
PSN1
DTP 49


phase
G2M    31
S      15
G1      3
Name: count, dtype: int64

Created bash script: PSN1_DTP.sh
['PANC1_PAR_0_75.xml', 'PSN1_DTP_0_75.xml', 'MIAPaca_PAR_0_75.xml', 'PANC1_DTP_0_75.xml', 'MIAPaca_DTP_0_75.xml', 'PSN1_PAR_0_75.xml', 'ASPC1_DTP_0_75.xml', 'ASPC1_PAR_0_75.xml']
Created bash script: PSN1_DTP.sh
