# Description

Generates manubot tables for pathways enriched (from the MultiPLIER models) given an LV name (in Settings below).

# Modules loading

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import re
from pathlib import Path

import pandas as pd

from entity import Trait
import conf

# Settings

In [3]:
LV_NAME = "LV847"

In [4]:
assert (
    conf.MANUSCRIPT["BASE_DIR"] is not None
), "The manuscript directory was not configured"

OUTPUT_FILE_PATH = conf.MANUSCRIPT["CONTENT_DIR"] / "50.00.supplementary_material.md"
display(OUTPUT_FILE_PATH)
assert OUTPUT_FILE_PATH.exists()

PosixPath('/home/miltondp/projects/labs/greenelab/phenoplier_manuscript/content/50.00.supplementary_material.md')

# Load MultiPLIER summary

In [5]:
multiplier_model_summary = pd.read_pickle(conf.MULTIPLIER["MODEL_SUMMARY_FILE"])

In [6]:
multiplier_model_summary.shape

(2157, 5)

In [7]:
multiplier_model_summary.head()

Unnamed: 0,pathway,LV index,AUC,p-value,FDR
1,KEGG_LYSINE_DEGRADATION,1,0.388059,0.866078,0.956005
2,REACTOME_MRNA_SPLICING,1,0.733057,4.8e-05,0.000582
3,MIPS_NOP56P_ASSOCIATED_PRE_RRNA_COMPLEX,1,0.680555,0.001628,0.011366
4,KEGG_DNA_REPLICATION,1,0.549473,0.312155,0.539951
5,PID_MYC_ACTIVPATHWAY,1,0.639303,0.021702,0.083739


# LV pathways

In [8]:
lv_pathways = multiplier_model_summary[
    multiplier_model_summary["LV index"].isin((LV_NAME[2:],))
    & (
        (multiplier_model_summary["FDR"] < 0.05)
        #         | (multiplier_model_summary["AUC"] >= 0.75)
    )
]

In [9]:
lv_pathways.shape

(4, 5)

In [10]:
lv_pathways = lv_pathways[["pathway", "AUC", "FDR"]].sort_values("FDR")

In [11]:
lv_pathways = lv_pathways.assign(AUC=lv_pathways["AUC"].apply(lambda x: f"{x:.2f}"))

In [12]:
lv_pathways = lv_pathways.assign(FDR=lv_pathways["FDR"].apply(lambda x: f"{x:.2e}"))

In [13]:
lv_pathways = lv_pathways.rename(
    columns={
        "pathway": "Pathway",
    }
)

In [14]:
lv_pathways.head()

Unnamed: 0,Pathway,AUC,FDR
1898,KEGG_SYSTEMIC_LUPUS_ERYTHEMATOSUS,0.74,0.000186
1896,REACTOME_MEIOTIC_RECOMBINATION,0.78,0.00036
1897,REACTOME_RNA_POL_I_TRANSCRIPTION,0.75,0.000556
1895,REACTOME_AMYLOIDS,0.76,0.00217


## Split names

In [15]:
lv_pathways["Pathway"] = lv_pathways["Pathway"].apply(lambda x: " ".join(x.split("_")))

In [16]:
lv_pathways.head()

Unnamed: 0,Pathway,AUC,FDR
1898,KEGG SYSTEMIC LUPUS ERYTHEMATOSUS,0.74,0.000186
1896,REACTOME MEIOTIC RECOMBINATION,0.78,0.00036
1897,REACTOME RNA POL I TRANSCRIPTION,0.75,0.000556
1895,REACTOME AMYLOIDS,0.76,0.00217


## Fill empty

In [17]:
if lv_pathways.shape[0] == 0:
    lv_pathways.loc[0, "Pathway"] = "No pathways significantly enriched"
    lv_pathways = lv_pathways.fillna("")

## Save

In [18]:
# result_set is either phenomexcan or emerge
LV_FILE_MARK_TEMPLATE = "<!-- {lv}:multiplier_pathways:{position} -->"

In [19]:
TABLE_CAPTION = (
    "Table: Pathways aligned to {lv_name} from the MultiPLIER models. {table_id}"
)

In [20]:
TABLE_CAPTION_ID = "#tbl:sup:multiplier_pathways:{lv_name_lower_case}"

In [21]:
# start
lv_file_mark_start = LV_FILE_MARK_TEMPLATE.format(lv=LV_NAME, position="start")
display(lv_file_mark_start)

# end
lv_file_mark_end = LV_FILE_MARK_TEMPLATE.format(lv=LV_NAME, position="end")
display(lv_file_mark_end)

'<!-- LV847:multiplier_pathways:start -->'

'<!-- LV847:multiplier_pathways:end -->'

In [22]:
new_content = lv_pathways.to_markdown(index=False, disable_numparse=True)

In [23]:
# add table caption
table_caption = TABLE_CAPTION.format(
    lv_name=LV_NAME,
    table_id="{" + TABLE_CAPTION_ID.format(lv_name_lower_case=LV_NAME.lower()) + "}",
)
display(table_caption)

'Table: Pathways aligned to LV847 from the MultiPLIER models. {#tbl:sup:multiplier_pathways:lv847}'

In [24]:
new_content += "\n\n" + table_caption

In [25]:
full_new_content = (
    lv_file_mark_start + "\n" + new_content.strip() + "\n" + lv_file_mark_end
)

In [26]:
with open(OUTPUT_FILE_PATH, "r", encoding="utf8") as f:
    file_content = f.read()

In [27]:
new_file_content = re.sub(
    lv_file_mark_start + ".*?" + lv_file_mark_end,
    full_new_content,
    file_content,
    flags=re.DOTALL,
)

In [28]:
with open(OUTPUT_FILE_PATH, "w", encoding="utf8") as f:
    f.write(new_file_content)  # .replace("\beta", r"\beta"))