In [None]:
import pandas as pd
import numpy as np
import dask.dataframe as dd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import logging
import csv
import seaborn as sns

import grn_inference.utils as utils

def plot_overlapping_sample_feature_score_histograms(
    features,
    inferred_network1,
    inferred_network2,
    label1_name,
    label2_name
):
    print("\tPlotting feature score histograms")

    ncols = 4
    nrows = math.ceil(len(features) / ncols)
    fig, axes = plt.subplots(nrows, ncols, figsize=(5 * ncols, 4 * nrows), squeeze=False)

    # flatten axes for easy indexing
    axes_flat = axes.flat

    for ax, feature in zip(axes_flat, features):
        # draw into this axis explicitly:
        sns.histplot(
            inferred_network1[feature].dropna(),
            bins=50, alpha=0.7,
            color='#1682b1', edgecolor="#032b5f",
            stat='proportion',
            label=label1_name,
            ax=ax
        )
        sns.histplot(
            inferred_network2[feature].dropna(),
            bins=50, alpha=0.7,
            color="#cb5f17", edgecolor="#b13301",
            stat='proportion',
            label=label2_name,
            ax=ax
        )

        # set titles/labels on the same ax
        ax.set_title(feature, fontsize=14)
        ax.set_xlabel(feature, fontsize=14)
        ax.set_ylabel("Proportion", fontsize=14)
        ax.set_xlim(0, 1)
        ax.tick_params(axis='both', labelsize=12)

    # turn off any leftover empty subplots
    for ax in axes_flat[len(features):]:
        ax.set_visible(False)

    # figure-level legend
    handles, labels = axes[0,0].get_legend_handles_labels()
    fig.legend(
        handles, labels,
        loc="lower center",
        ncol=2,
        fontsize=14,
        bbox_to_anchor=(0.5, -0.02)
    )
    fig.tight_layout(rect=[0, 0.05, 1, 1])
    plt.show()

ground_truth_file: str = "/gpfs/Labs/Uzun/DATA/PROJECTS/2024.SC_MO_TRN_DB.MIRA/REPOSITORY/CURRENT/REFERENCE_NETWORKS/RN111_ChIPSeq_BEELINE_Mouse_ESC.tsv"

inferred_network1_file: str = "/gpfs/Labs/Uzun/SCRIPTS/PROJECTS/2024.SINGLE_CELL_GRN_INFERENCE.MOELLER/output/combined_inferred_dfs/mESC_combined_inferred_score_df.parquet"
# inferred_network1_file: str = "/gpfs/Labs/Uzun/SCRIPTS/PROJECTS/2024.SINGLE_CELL_GRN_INFERENCE.MOELLER/output/mESC/filtered_L2_E7.5_rep2/inferred_grns/inferred_score_df.parquet"

inferred_network2_file: str = "/gpfs/Labs/Uzun/SCRIPTS/PROJECTS/2024.SINGLE_CELL_GRN_INFERENCE.MOELLER/output/DS011_mESC/DS011_mESC_sample1/inferred_grns/inferred_score_df.parquet"

In [None]:
inferred_network1_df = utils.load_and_pivot_melted_score_dataframe(inferred_network1_file)
inferred_network2_df = utils.load_and_pivot_melted_score_dataframe(inferred_network2_file)

print(inferred_network1_df.columns)

In [None]:
feature_names = [
    'mean_TF_expression',
    'mean_peak_accessibility',
    'mean_TG_expression',
    'cicero_score',
    # 'correlation',
    # 'TSS_dist_score', 
    # 'homer_binding_score', 
    'sliding_window_score', 
    'string_combined_score', 
    'string_experimental_score', 
    'string_textmining_score'
    ]

plot_feature_score_histograms(feature_names, inferred_network1_df, inferred_network2_df, "Combined mESC", "DS011 mESC")

In [None]:
feature_names = ['TSS_dist_score', 'correlation', 'cicero_score']

plot_feature_score_histograms(feature_names, inferred_network1_dd, inferred_network2_dd, "filtered_L2_E7.5_rep2", "DS011 mESC")