# Process _P. aeruginosa_ multiplier model

In [1]:
%load_ext autoreload
%autoreload 2

import os
import pandas as pd
import pickle

import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter

In [2]:
readRDS = ro.r["readRDS"]

In [3]:
# CHANGE LOCATION TO LOCAL WHEN COMMIT
multiplier_full_model = readRDS("Pa_compendium_PLIER_model.RDS")

# Format multiplier Z data

The Z data matrix contains the contribution (i.e. weight) per gene to each latent variable

In [4]:
multiplier_model_Z_matrix = multiplier_full_model.rx2("Z")

In [5]:
with localconverter(ro.default_converter + pandas2ri.converter):
    multiplier_model_Z_matrix_values = ro.conversion.rpy2py(multiplier_model_Z_matrix)

In [6]:
column_header = [f"LV{i}" for i in range(1, 73)]

multiplier_model_Z_matrix_df = pd.DataFrame(
    data=multiplier_model_Z_matrix_values,
    index=multiplier_model_Z_matrix.rownames,
    columns=column_header,
)

In [7]:
print(multiplier_model_Z_matrix_df.shape)
multiplier_model_Z_matrix_df.head()

(5549, 72)


Unnamed: 0,LV1,LV2,LV3,LV4,LV5,LV6,LV7,LV8,LV9,LV10,...,LV63,LV64,LV65,LV66,LV67,LV68,LV69,LV70,LV71,LV72
PA0001,0.103221,0.071011,0.0,0.031373,0.111417,0.0,0.0,0.0,0.0,0.122252,...,0.066889,0.00687,0.012213,0.009801,0.0,0.0,0.0,0.0,0.044349,0.0
PA0002,0.03618,0.002204,0.0,0.084074,0.391684,0.0,0.0,0.016545,0.0,0.179054,...,0.017247,0.024816,0.0,0.0,0.0,0.0,0.0,0.038047,0.109131,0.0
PA0003,0.0,0.0,0.0,0.075421,0.101301,0.0,0.0,0.0,0.0,0.0,...,0.0,0.15221,0.0,0.049635,0.0,0.0,0.0,0.0,0.0,0.0
PA0004,0.134765,0.0,0.0,0.23281,0.25697,0.029755,0.044385,0.253926,0.0,0.132221,...,0.0,0.18025,0.0,0.1051,0.161725,0.0,0.037455,0.000385,0.064465,0.0
PA0005,0.0,0.0,0.041325,0.0,0.014607,0.0,0.0,0.0,0.0,0.230676,...,0.128484,0.0,0.0,0.267931,0.18294,0.0,0.0,0.064321,0.0,0.0


In [8]:
# Save
multiplier_model_Z_matrix_df.to_csv("multiplier_Pa_model_z.tsv", sep="\t")

# Format multiplier summary data

This summary data matrix contains statistics about each LV - which pathways it was associated with and its significance score. This information is saved in the MultiPLIER model: https://github.com/greenelab/multi-plier/blob/7f4745847b45edf8fef3a49893843d9d40c258cf/23-explore_AAV_recount_LVs.Rmd

In [9]:
multiplier_model_matrix = multiplier_full_model.rx2("summary")

In [10]:
with localconverter(ro.default_converter + pandas2ri.converter):
    multiplier_model_matrix_values = ro.conversion.rpy2py(multiplier_model_matrix)

In [11]:
multiplier_model_matrix_df = pd.DataFrame(
    data=multiplier_model_matrix_values,
    index=multiplier_model_matrix.rownames,
    columns=multiplier_model_matrix.colnames,
)

In [12]:
multiplier_model_matrix_df.head()

Unnamed: 0,pathway,LV index,AUC,p-value,FDR
1,KEGG.Pathway.pae00190..Oxidative.phosphorylati...,1,0.731124,0.001403613,0.006166705
2,KEGG.Pathway.pae03060..Protein.export...Pseudo...,1,0.707085,0.07103638,0.1131065
3,KEGG.Pathway.pae00230..Purine.metabolism...Pse...,1,0.781124,4.325584e-06,5.45689e-05
4,KEGG.Pathway.pae03010..Ribosome...Pseudomonas....,1,0.944015,1.640398e-10,1.345126e-08
5,KEGG.Module.M00178..Ribosome..bacteria,1,0.999932,6.20496e-12,1.017613e-09


In [13]:
# Save
multiplier_model_matrix_df.to_csv("multiplier_Pa_model_summary.tsv", sep="\t")