# Tutorial Pleiotropy Correction

# Step 1: Import Libraries

In [1]:
# --- Core scientific stack ---
import numpy as np
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt
# --- For single-cell data if your workflow uses AnnData / Scanpy ---
import scanpy as sc
# --- Your local pyVIPER package (the one you edited) ---
import pyviper
import importlib
import pyviper._viper as _v  # this gives you direct access to the edited _viper.py
# --- Reload to make sure Python uses the updated code ---
importlib.reload(_v)
importlib.reload(pyviper)


  from .autonotebook import tqdm as notebook_tqdm


<module 'pyviper' from '/mnt/beegfs/home/friva/pyviper/pyviper/__init__.py'>

# Step 2: Import Gene expression matrix


In [2]:
# Set the HPC file path
gene_expr_path = "/home/friva/TCGA_DEG/DEG-BRCA.tsv"
# Read the data
gene_expr_signature = pd.read_csv(gene_expr_path, sep="\t", index_col=0)
# Convert to AnnData object
gene_expr_signature = sc.AnnData(gene_expr_signature)
# Inspect
gene_expr_signature

AnnData object with n_obs × n_vars = 1106 × 19938

# Step 3: Import ARACNe Network

In [3]:

network_path = "/home/friva/TCGA_networks/net-prunned-BRCA_test.tsv"

network = pd.read_csv(network_path, delimiter="\t")

# Drop the first column if it's just an index
network = network.drop(network.columns[0], axis=1)

network_interactome = pyviper.Interactome('TCGA_BRCA', network)  # convert to class Interactome
network_interactome.size()  # show number of regulons in the Interactome
network_interactome.net_table.head()

Unnamed: 0,regulator,target,mor,likelihood
0,ABCG1,PLEKHF2,1.0,0.986344
1,ABCG1,WWP1,0.999999,0.981491
2,ABCG1,SERPINB5,-0.999987,0.977465
3,ABCG1,HRCT1,-0.99998,0.97037
4,ABCG1,PLAGL1,-0.999981,0.970235


In [4]:
# keep targets present in expression
network_interactome.filter_targets(gene_expr_signature.var_names)
# prune to exactly 50 targets per TF (drop TFs with <50)
network_pruned = network_interactome.copy()
network_pruned.prune(max_targets=50, eliminate=True)

Removed 0 targets.
Removed 15308 targets.
Removed 412 regulators.


  pruned_df = sorted_df.groupby('regulator', group_keys=False).apply(lambda x: x.iloc[:max_targets])


# Step 4: Run PyViper with "pleiotropy = False"

In [7]:
BRCA_NES = pyviper.viper(gex_data=gene_expr_signature, # gene expression signature
                             interactome=network_pruned, # gene regulatory network
                             enrichment = "area",
                             min_targets=0,
                             eset_filter=True,
                             pleiotropy = False,
                             njobs=10,
                             verbose=True)

Preparing the association scores
Computing regulons enrichment with aREA


# Step 5: Run PyViper with "pleiotropy = True"

In [8]:
BRCA_NES_Corrected = pyviper.viper(gex_data=gene_expr_signature, # gene expression signature
                             interactome=network_pruned, # gene regulatory network
                             enrichment = "area",
                             min_targets=0,
                             eset_filter=True,
                             pleiotropy = True,
                             njobs=10,
                             verbose=True)

Preparing the association scores
Computing regulons enrichment with aREA
[pleiotropy] hook reached


Initializing regulons: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10400/10400 [00:00<00:00, 31571.96it/s]
Filling regulons: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10400/10400 [00:00<00:00, 29072.93it/s]


[pleiotropy] ss shape: (19938, 1106)
[pleiotropy] NES0 shape (TFs×samples): (208, 1106)
[pleiotropy] workers: 10
[pleiotropy] updated 726/1106 samples (median TFs updated: 1)


In [9]:
BRCA_NES.X


array([[ 3.31299488,  0.13898613,  1.41328845, ...,  4.27563766,
         4.27255937,  1.81609743],
       [ 4.40232445,  0.17952647,  1.66693234, ...,  2.76371511,
        -1.09122135, -0.25404518],
       [-1.00436247,  2.15137492,  3.10341038, ..., -0.20926719,
        -0.04146679, -0.08405256],
       ...,
       [ 0.85092515, -0.61553694,  1.4120394 , ..., -0.49405199,
        -2.23862397, -2.20046406],
       [ 4.4411637 , -0.4607928 ,  0.82933544, ...,  2.90994808,
         8.02378342,  4.86682743],
       [-5.08276292, -0.07092113, -3.33750214, ..., -8.08333506,
        -8.76794786, -8.41765852]], shape=(1106, 208))

In [10]:
BRCA_NES_Corrected.X

array([[ 3.31299488,  0.13898613,  1.41328845, ...,  4.27563766,
         4.27255937,  1.81609743],
       [ 4.40232445,  0.17952647,  1.66693234, ...,  2.76371511,
        -1.09122135, -0.25404518],
       [-1.00436247,  2.15137492,  3.10341038, ..., -0.20926719,
        -0.04146679, -0.08405256],
       ...,
       [ 0.85092515, -0.61553694,  1.4120394 , ..., -0.49405199,
        -2.23862397, -2.20046406],
       [ 4.4411637 , -0.4607928 ,  0.82933544, ...,  2.90994808,
         8.02378342,  4.86682743],
       [-5.08276292, -0.07092113, -3.33750214, ..., -8.08333506,
        -8.76794786, -8.41765852]], shape=(1106, 208))

In [14]:
import pandas as pd

# Convert AnnData matrix (samples × TFs) to DataFrame
BRCA_NES_df = pd.DataFrame(
    BRCA_NES.X,
    index=BRCA_NES.obs_names,
    columns=BRCA_NES.var_names
)

# Transpose if you prefer TFs as rows and samples as columns
BRCA_NES_df = BRCA_NES_df.T

# Save to CSV
BRCA_NES_df.to_csv("NES_pyVIPER_precorrection_BRCA_github.csv")




In [15]:
BRCA_NES_Corrected_df = pd.DataFrame(
    BRCA_NES_Corrected.X,
    index=BRCA_NES_Corrected.obs_names,
    columns=BRCA_NES_Corrected.var_names
)

# Transpose if you prefer TFs as rows and samples as columns
BRCA_NES_Corrected_df = BRCA_NES_Corrected_df.T

# Save to CSV
BRCA_NES_Corrected_df.to_csv("NES_pyVIPER_postcorrection_BRCA_github.csv")


