In [4]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np
import my_netzoo
from scipy.stats import pearsonr

In [59]:
# LIONESS -> PANDA
def l2p(expression_df, motif, tf_names, gene_names, method='pearson'):

    # Generate correlation matricies
    l2p_corr = my_netzoo.my_lioness(expression_df, method)

    # Run PANDA on each correlation matrix
    l2p_networks = {}
    for key in l2p_corr:
        l2p_network = my_netzoo.my_panda(expression_df=None, motif_df=motif, correlation_matrix=l2p_corr[key])
        l2p_networks[key] = l2p_network
        
    # Assign names to genes/TFs
    l2p_networks_df = {
        key: pd.DataFrame(mat, index=tf_names, columns=gene_names) 
        for key, mat in l2p_networks.items()
    }
    
    return(l2p_networks_df)

# PANDA -> LIONESS
def p2l(expression_df, motif, method="panda"):
    return(my_netzoo.my_lioness(expression_df, method, motif))
    

In [49]:
# Load in miRNA expression data, sample phenotype data, and motif file
final_expr_full = pd.read_csv("~/Documents/Glass_Lab/data/final_expr.txt", sep ="\t")
pheno_df = pd.read_csv("~/Documents/Glass_Lab/data/pheno_df.csv", index_col=0)
motif = pd.read_csv("~/Documents/Glass_Lab/data/motif.txt", sep = "\t")

In [51]:
# Establish tf/gene names
motifs = pd.pivot_table(motif, values="Weight", index="TF", columns="Gene", fill_value=0)
tf_names = list(motifs.index)
gene_names = list(motifs.columns)

In [None]:
# Generate single-sample L2P and P2L networks
l2p_networks = l2p(final_expr_full, motif, gene_names, tf_names)
p2l_networks = p2l(final_expr_full, motif, "panda")

l2p_networks.to_feather("../data/raw_l2p_networks.feather")
p2l_networks.to_feather("../data/raw_p2l_networks.feather")

In [None]:
# Save Pearson correlation data
l2p_corr = my_netzoo.my_lioness(final_expr_full, "pearson")
long_format = []

for sample, df in l2p_corr.items():
    melted = df.reset_index().melt(id_vars=df.index.name or 'index', var_name='gene2', value_name='correlation')
    melted = melted.rename(columns={df.index.name or 'index': 'gene1'})
    melted['sample'] = sample
    long_format.append(melted)

all_long = pd.concat(long_format, ignore_index=True)
all_long.to_feather("../data/l2p_corr.feather")

In [None]:
# Save L2P networks
long_format = []
for sample, arr in l2p_networks.items():
    df = pd.DataFrame(arr, index=tf_names, columns=gene_names)
    melted = df.reset_index().melt(id_vars='index', var_name='Gene', value_name='Score')
    melted = melted.rename(columns={'index': 'tf'})
    melted['sample'] = sample
    long_format.append(melted)

all_long = pd.concat(long_format, ignore_index=True)
all_long.to_feather('l2p_networks.feather')

In [None]:
# Save P2L networks
long_format = []
for sample, arr in p2l_networks.items():
    df = pd.DataFrame(arr, index=tf_names, columns=gene_names)
    melted = df.reset_index().melt(id_vars='index', var_name='Gene', value_name='Score')
    melted = melted.rename(columns={'index': 'TF'})
    melted['sample'] = sample
    long_format.append(melted)

all_long = pd.concat(long_format, ignore_index=True)
all_long.to_feather('p2l_networks.feather')

In [65]:
wide_rows = []
i = 1
for sample, df in p2l_networks.items():
    print(sample, i)
    # df: index = TF, columns = Gene
    row = {'sample': sample}
    for tf in df.index:
        for gene in df.columns:
            row[f"{tf}_{gene}"] = df.loc[tf, gene]
    wide_rows.append(row)
    i+=1

p2l_networks_wide = pd.DataFrame(wide_rows)