Here we want to run consensus on all the methods

inputs: 
1) raw seed genes of each combination
2) PPI network for each tissue
3) list of modules in all methods


In [1]:
import os
import pandas as pd
from pathlib import Path
import subprocess

In [2]:
workdir = os.path.dirname(os.getcwd())
input_dir = Path(f"{workdir}/3_module_expansion/results_full_PPI")
ppi_dir = Path("/home/bbc8731/diseasemodulediscovery/tests/uniprot_ppi.csv")
output_dir = Path("/home/bbc8731/HSV/4_consensus_module_detection/cami_results")
output_dir.mkdir(parents=True, exist_ok=True)

In [4]:
for dir in input_dir.glob('*'):
    seeds_dirs = dir / 'input' / 'seeds'
    module_dir = dir / 'modules'/ 'tsv_nodes'

    for seed_path in seeds_dirs.glob('*.tsv'): # seed_path: seeds
        if seed_path.name.endswith(('.removed.tsv', '.multiqc.tsv')):
            continue

        seed_file = seed_path.name
        dir_tissue = ".".join(seed_file.split(".", 2)[:2]) 

        # ===============================
        # find matching modules 
        # ===============================

        module_paths = [
            m for m in module_dir.glob("*.tsv")
            if (dir_tissue in m.name) and ("firstneighbor" not in m.name)
        ]

        if not module_paths:
            continue

        # ===============================
        # Convert formats
        # ===============================
        if (dir.name == "CC_background_control"):
            dir_tissue = f"CC_{dir_tissue}"

        if (dir.name == "BP_background_control"):
            dir_tissue = f"BP_{dir_tissue}"

        run_dir = output_dir / dir_tissue
        run_dir.mkdir(exist_ok=True)

        # seeds: tsv -> txt , cami needs text not tsv
        seeds_txt = run_dir / "seeds.txt"
        seeds_df = pd.read_csv(seed_path, sep="\t", header=None)
        seeds_df.iloc[:, 0].to_csv(seeds_txt, index=False, header=False)
        
        # ppi: csv -> tsv 
        ppi_tsv = run_dir / "ppi.tsv"
        ppi_df = pd.read_csv(ppi_dir)
        ppi_df.to_csv(ppi_tsv, sep="\t", index=False)

        # modules: tsv -> txt
        module_args = []
        for m in module_paths:
            # file has a real header
            mod_df = pd.read_csv(m, sep="\t")
        
            # output file
            mod_out = run_dir / m.name.replace(".tsv", ".txt")
        
            # extract method name: domino
            method_name = m.stem.split(".")[2]
        
            with open(mod_out, "w") as f:
                # first line = method name
                f.write(method_name + "\n")
        
                # remaining lines = node IDs (first column: 'name')
                mod_df["name"].to_csv(f, index=False, header=False)
        
            module_args.append(str(mod_out))

        # ===============================
        # Run CAMI
        # ===============================

        cmd = [
            "/home/bbc8731/miniconda3/envs/cami/bin/python",
            "/home/bbc8731/cami/src/cami.py",
            "-n", str(ppi_tsv),
            "-o", str(run_dir),
            "--save_temps",
            "--external_results", *module_args,
            "--external_only",
            "--consensus",
            "--seeds", str(seeds_txt),
        ]

        # subprocess.run(cmd, check=True)
        proc = subprocess.run(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )
        
        print("\n===== CAMI STDOUT =====")
        print(proc.stdout)
        
        print("\n===== CAMI STDERR =====")
        print(proc.stderr)
        
        print("\nReturn code:", proc.returncode)
                


===== CAMI STDOUT =====
CAMI started
Creating the PPI network graph and seed list...
With the 29 seed genes CAMI (union) proposes 291 genes to add to the Active Module
With the 29 seed genes CAMI (intersection) proposes 1 genes to add to the Active Module
With the 29 seed genes CAMI (first_neighbours) proposes 3996 genes to add to the Active Module
With the 29 seed genes CAMI (cami_v2_trustrank) proposes 59 genes to add to the Active Module
With the 29 seed genes CAMI (cami_v3_trustrank) proposes 102 genes to add to the Active Module
With the 29 seed genes the module predicted by no_tool contains 30 genes
With the 29 seed genes the module predicted by robust contains 57 genes
With the 29 seed genes the module predicted by robust_bias_aware contains 55 genes
With the 29 seed genes the module predicted by diamond contains 230 genes
With the 29 seed genes the module predicted by rwr contains 65 genes
With the 29 seed genes the module predicted by domino contains 78 genes
With the 29 seed