In [None]:
%load_ext autoreload
%autoreload 2
import sys
import os
ProjDIR = "/home/jw3514/Work/ASD_Circuits_CellType/" # Change to your project directory
sys.path.insert(1, f'{ProjDIR}/src/')
from ASD_Circuits import *

try:
    os.chdir(f"{ProjDIR}/notebooks_mouse_sc/")
    print(f"Current working directory: {os.getcwd()}")
except FileNotFoundError as e:
    print(f"Error: Could not change directory - {e}")
except Exception as e:
    print(f"Unexpected error: {e}")

HGNC, ENSID2Entrez, GeneSymbol2Entrez, Entrez2Symbol = LoadGeneINFO()

In [None]:
ClusterAnn = pd.read_csv(ProjDIR + "dat/MouseCT_Cluster_Anno.csv", index_col="cluster_id_label")

In [None]:
CellTypesDF = pd.read_csv(ProjDIR+"dat/CellTypeHierarchy.csv")
Class2Cluster = {}
Subclass2Cluster = {}
for i, row in CellTypesDF.iterrows():
    _cluster, _class, _subclass, _supertype = row
    if _class not in Class2Cluster:
        Class2Cluster[_class] = []
    if _subclass not in Subclass2Cluster:
        Subclass2Cluster[_subclass] = []
    Class2Cluster[_class].append(_cluster)
    Subclass2Cluster[_subclass].append(_cluster)

In [None]:
MouseSC_Z2 = pd.read_csv("/home/jw3514/Work/CellType_Psy/AllenBrainCellAtlas/dat/SC_UMI_Mats/Cluster_Z2Mat_ISHMatch.z1clip3.csv.gz", index_col=0)
#MouseSC_Z2 = pd.read_csv("/home/jw3514/Work/CellType_Psy/AllenBrainCellAtlas/dat/SC_UMI_Mats/Cluster_Z2Mat_ISHMatch.z1clip3.csv.gz", index_col=0)

In [None]:
ASD_GW = Fil2Dict(ProjDIR+"dat/Genetics/GeneWeights_DN/Spark_Meta_EWS.GeneWeight.DN.gw")
ASD_SC_Bias = MouseCT_AvgZ_Weighted(MouseSC_Z2, ASD_GW)
ASD_SC_Bias = add_class(ASD_SC_Bias, ClusterAnn)
ASD_SC_Bias.to_csv(ProjDIR + "../results/CT_Z2/ASD_Spark61.csv")

In [None]:
DDD_GW = Fil2Dict(ProjDIR+"dat/Genetics/GeneWeights_DN/DDD.top293.ExcludeASD.DN.gw")
DDD_SC_Bias = MouseCT_AvgZ_Weighted(MouseSC_Z2, DDD_GW)
DDD_SC_Bias = add_class(DDD_SC_Bias, ClusterAnn)
DDD_SC_Bias.to_csv(ProjDIR + "../results/CT_Z2/ASD_Spark61.csv")

In [None]:
def plot_asd_bias_boxplot(DF, Class2Cluster, title='ASD Bias Across Different Classes'):
    """
    Plots a boxplot of ASD bias across different classes.

    Parameters:
    -----------
    DF : pd.DataFrame
        DataFrame containing the ASD bias data with 'class_id_label' and 'EFFECT' columns.
    Class2Cluster : dict
        Dictionary mapping class labels to cluster ids.
    title : str, optional
        Title for the plot. Default is 'ASD Bias Across Different Classes'.
    """
    Class = sorted(Class2Cluster.keys())
    
    sns.set(style="whitegrid", context="talk")

    # Calculate medians and sort data by medians
    ASD_dat_CB = []
    medians = []
    for _CT in Class:
        subdf = DF[DF["class_id_label"]==_CT]
        data = [x for x in subdf["EFFECT"].values if x==x]
        ASD_dat_CB.append(data)
        medians.append(np.median(data))

    # Sort based on medians
    sorted_indices = np.argsort(medians)
    sorted_ASD_dat_CB = [ASD_dat_CB[i] for i in sorted_indices]
    sorted_Class = [Class[i] for i in sorted_indices]

    # Plotting
    fig, ax = plt.subplots(dpi=480, figsize=(8, 8))

    # Create the boxplot
    # Use tick_labels instead of labels for compatibility with recent matplotlib
    boxplot = ax.boxplot(sorted_ASD_dat_CB, tick_labels=sorted_Class, vert=False, patch_artist=True)

    # Color the boxplot for better aesthetics
    colors = sns.color_palette("muted", len(sorted_Class))
    for patch, color in zip(boxplot['boxes'], colors):
        patch.set_facecolor(color)

    # Customize the plot
    plt.grid(True, linestyle='--', alpha=0.7)
    plt.xlabel('Bias', fontsize=14)
    plt.ylabel('Classes', fontsize=14)
    plt.title(title, fontsize=16, weight='bold')

    # Enhance the grid
    ax.grid(True, linestyle='--', alpha=0.7)

    # Customize tick labels
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)

    # Tight layout
    plt.tight_layout()

    # Show the plot
    plt.show()

In [None]:
plot_asd_bias_boxplot(ASD_SC_Bias, Class2Cluster, title='DDD Bias Across Different Classes')

In [None]:
plot_asd_bias_boxplot(DDD_SC_Bias, Class2Cluster, title='DDD Bias Across Different Classes')

In [None]:
ClusterAnn.head()

In [None]:
ASD_SC_Bias.head(2)

In [None]:
ASD_SC_Bias['class_id_label'].unique()

In [None]:
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Assume ASD_SC_Bias and DDD_SC_Bias are DataFrames with 'EFFECT' and 'class' columns

# Standardize the 'EFFECT' column in both datasets
scaler = StandardScaler()

ASD_SC_Bias_std = ASD_SC_Bias.copy()
DDD_SC_Bias_std = DDD_SC_Bias.copy()

ASD_SC_Bias_std['EFFECT_std'] = scaler.fit_transform(ASD_SC_Bias[['EFFECT']])
DDD_SC_Bias_std['EFFECT_std'] = scaler.fit_transform(DDD_SC_Bias[['EFFECT']])

# Plot the distribution of standardized EFFECT per class for both datasets
fig, axes = plt.subplots(1, 2, figsize=(16, 6), dpi=150)

sns.boxplot(data=ASD_SC_Bias_std, x='class_id_label', y='EFFECT_std', ax=axes[0])
axes[0].set_title('ASD_SC_Bias: Standardized EFFECT per Class')
axes[0].set_ylabel('Standardized EFFECT')
axes[0].set_xlabel('Class')
axes[0].tick_params(axis='x', rotation=45)

sns.boxplot(data=DDD_SC_Bias_std, x='class_id_label', y='EFFECT_std', ax=axes[1])
axes[1].set_title('DDD_SC_Bias: Standardized EFFECT per Class')
axes[1].set_ylabel('Standardized EFFECT')
axes[1].set_xlabel('Class')
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## Try a permutation test to see if the difference is significant

In [None]:
CNU_LGE_Cluster = ClusterAnn[ClusterAnn['class_id_label'] == '09 CNU-LGE GABA'].index.tolist()
IT_ET_Cluster = ClusterAnn[ClusterAnn['class_id_label'] == '01 IT-ET Glut'].index.tolist()
NP_Cluster = ClusterAnn[ClusterAnn['class_id_label'] == '02 NP-CT-L6b Glut'].index.tolist()

In [None]:
CT_Bias_CNU_LGE = MouseSC_Z2.loc[:, CNU_LGE_Cluster]

In [None]:
merged_data = merge_bias_datasets_2(ASD_SC_Bias, DDD_SC_Bias, suffixes=('_ASD', '_DDD'))
results_df = fit_structure_bias_linear_model(merged_data, metric='EFFECT', suffixes=('_ASD', '_DDD'))

In [None]:
def permute_gene_labels(dict1, dict2, seed=None):
    """
    Permute disease labels by randomly splitting pooled genes into two sets.
    Each gene keeps its original weight, but disease assignment is shuffled.
    
    Parameters:
    -----------
    dict1 : dict
        First gene set {gene_id: weight}
    dict2 : dict
        Second gene set {gene_id: weight}
    seed : int or None
        Random seed for reproducibility
    
    Returns:
    --------
    perm_dict1 : dict
        Permuted first set with same size as dict1
    perm_dict2 : dict  
        Permuted second set with same size as dict2
    """
    import random
    
    if seed is not None:
        random.seed(seed)
    
    # Pool all (gene, weight) pairs
    all_genes = list(dict1.keys()) + list(dict2.keys())
    all_weights = list(dict1.values()) + list(dict2.values())   
    
    # Shuffle the pooled genes
    random.shuffle(all_genes)
    #random.shuffle(all_weights)

    #all_weights = np.ones(len(all_genes))
    
    # Split back into two sets of original sizes
    n1 = len(dict1)
    perm_dict1 = dict(zip(all_genes[:n1], all_weights[:n1]))
    perm_dict2 = dict(zip(all_genes[n1:], all_weights[n1:]))
    
    return perm_dict1, perm_dict2

def merge_bias_datasets_2(dataset1, dataset2, suffixes=('_1', '_2')):
    """
    Merge two structure bias datasets for comparison.
    
    Parameters:
    -----------
    dataset1 : DataFrame
        First dataset with 'Rank', 'EFFECT', and 'Region' columns
    dataset2 : DataFrame
        Second dataset with 'Rank' and 'EFFECT' columns
    suffixes : tuple of str
        Suffixes to append to column names for each dataset
    
    Returns:
    --------
    merged_data : DataFrame
        Merged dataset with comparison metrics for both Rank and EFFECT
    """
    # Select all relevant columns
    dataset1_cols = ['Rank', 'EFFECT',]
    dataset2_cols = ['Rank', 'EFFECT']
    
    # Merge the datasets on structure names for comparison
    merged_data = pd.merge(dataset1[dataset1_cols], dataset2[dataset2_cols], 
                          left_index=True, right_index=True, suffixes=suffixes)

    # Calculate differences for both Rank and EFFECT metrics
    merged_data[f'DIFF_Rank'] = merged_data[f'Rank{suffixes[0]}'] - merged_data[f'Rank{suffixes[1]}']
    merged_data[f'ABS_DIFF_Rank'] = np.abs(merged_data[f'DIFF_Rank'])
    
    merged_data[f'DIFF_EFFECT'] = merged_data[f'EFFECT{suffixes[0]}'] - merged_data[f'EFFECT{suffixes[1]}']
    merged_data[f'ABS_DIFF_EFFECT'] = np.abs(merged_data[f'DIFF_EFFECT'])

    # Sort by absolute difference in EFFECT by default
    merged_data = merged_data.sort_values('ABS_DIFF_EFFECT', ascending=False)
    
    return merged_data

from sklearn.linear_model import LinearRegression
def fit_structure_bias_linear_model(merged_data, metric='EFFECT', suffixes=('_1', '_2')):

    X = merged_data[f'{metric}{suffixes[1]}'].values.reshape(-1, 1)
    y = merged_data[f'{metric}{suffixes[0]}'].values

    model = LinearRegression()
    model.fit(X, y)
    y_pred = model.predict(X)
    residuals = y - y_pred

    results_df = merged_data.copy()
    results_df['predicted'] = y_pred
    results_df['residual'] = residuals

    return results_df

In [None]:
results_perm_list = []
for i in range(1000):
    fake_key_DDD, fake_key_Spark = permute_gene_labels(DDD_GW, ASD_GW, seed=i)
    perm_ASD_Bias = MouseCT_AvgZ_Weighted(MouseSC_Z2, fake_key_Spark)
    perm_DDD_Bias = MouseCT_AvgZ_Weighted(MouseSC_Z2, fake_key_DDD)
    #ASD_SC_Bias = MouseCT_AvgZ_Weighted(MouseSC_Z2, ASD_GW)
    #ASD_SC_Bias = add_class(ASD_SC_Bias, ClusterAnn)
    merged_data_perm = merge_bias_datasets_2(perm_ASD_Bias, perm_DDD_Bias, suffixes=('_ASD', '_DDD'))
    results_df_perm = fit_structure_bias_linear_model(merged_data_perm, metric='EFFECT', suffixes=('_ASD', '_DDD'))
    results_perm_list.append(results_df_perm)

In [None]:
results_perm_list[0]

In [None]:
results_df_eval = results_df.loc[CNU_LGE_Cluster, :]

In [None]:
results_df_eval.sort_values(by='residual', ascending=False).head(10)

In [None]:
sorted(ASD_SC_Bias['class_id_label'].unique())

In [None]:
CNU_LGE_Cluster = [x for x in ClusterAnn[ClusterAnn['class_id_label'] == '09 CNU-LGE GABA'].index.tolist() if x in results_df.index]
IT_ET_Cluster = [x for x in ClusterAnn[ClusterAnn['class_id_label'] == '01 IT-ET Glut'].index.tolist() if x in results_df.index]
NP_Cluster = [x for x in ClusterAnn[ClusterAnn['class_id_label'] == '02 NP-CT-L6b Glut'].index.tolist() if x in results_df.index]
CGE_Cluster = [x for x in ClusterAnn[ClusterAnn['class_id_label'] == '06 CTX-CGE GABA'].index.tolist() if x in results_df.index]
MGE_Cluster = [x for x in ClusterAnn[ClusterAnn['class_id_label'] == '07 CTX-MGE GABA'].index.tolist() if x in results_df.index]
TH_Cluster = [x for x in ClusterAnn[ClusterAnn['class_id_label'] == '18 TH Glut'].index.tolist() if x in results_df.index]
#NP_Cluster = [x for x in ClusterAnn[ClusterAnn['class_id_label'] == '02 NP-CT-L6b Glut'].index.tolist() if x in results_df.index]



In [None]:
plt.figure(figsize=(8,4))
plt.hist(results_df.loc[CNU_LGE_Cluster, "residual"], bins=20, color='orange', edgecolor='black', label='CNU_LGE_Cluster', density=True, alpha=0.7)
plt.hist(results_df.loc[IT_ET_Cluster, "residual"], bins=20, color='green', edgecolor='black', label='IT_ET_Cluster', density=True, alpha=0.7)
plt.hist(results_df.loc[NP_Cluster, "residual"], bins=20, color='purple', edgecolor='black', label='NP_Cluster', density=True, alpha=0.7)
plt.xlabel("Residual")
plt.ylabel("Density")
plt.title("Histogram of residuals for CNU_LGE_Cluster")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
import seaborn as sns
import itertools
from scipy.stats import mannwhitneyu
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

def cluster_residual_boxplot(
    results_df, 
    cluster_dict, 
    metric="residual", 
    palette=None, 
    figsize=(9,5), 
    swarm=True,
    box_width=0.6,
    title=None, 
    fontsize=13,
    show=True
):
    """
    Plot boxplot+swarmplot for residuals of multiple clusters,
    with Mann-Whitney U-test annotations for all pairwise comparisons.

    Args:
        results_df (pd.DataFrame): DataFrame containing residual results per index.
        cluster_dict (dict): {cluster_label: list-of-indices}. Each key is a descriptive cluster name.
        metric (str): Column in results_df to plot.
        palette (list or dict): Colors for clusters (len must match cluster_dict order).
        figsize (tuple): Size of the figure.
        swarm (bool): Whether to add a swarm/dotplot overlay.
        box_width (float): box width for boxplot.
        title (str): Optional plot title.
        fontsize (int): Base fontsize.
        show (bool): plt.show() if True.

    Returns: 
        pd.DataFrame: Data used for plotting.
    """
    if palette is None:
        base_colors = sns.color_palette("tab10") + sns.color_palette("Set2") + sns.color_palette("Dark2")
        palette = base_colors[:len(cluster_dict)]
    elif isinstance(palette, dict):
        palette = [palette[k] for k in cluster_dict.keys()]

    cluster_labels = list(cluster_dict.keys())
    cluster_data = []
    n_points = []

    # Build the plot DataFrame
    for k in cluster_labels:
        vals = results_df.loc[cluster_dict[k], metric].dropna()
        cluster_data.append(vals)
        n_points.append(len(vals))
    plot_df = pd.DataFrame({
        "Cluster": np.repeat(cluster_labels, n_points),
        metric: np.concatenate([v.values for v in cluster_data])
    })

    # Plot
    plt.figure(figsize=figsize)
    ax = sns.boxplot(
        x="Cluster", y=metric, data=plot_df, 
        palette=palette, width=box_width, showmeans=True,
        meanprops={"marker": "o", "markerfacecolor": "black", "markeredgecolor": "black"}
    )
    if swarm:
        sns.stripplot(x="Cluster", y=metric, data=plot_df, color='k', alpha=0.4, jitter=0.18, size=5, ax=ax)

    plt.xlabel("Cluster", fontsize=fontsize)
    plt.ylabel(metric.capitalize(), fontsize=fontsize)
    plt.xticks(fontsize=fontsize-1)
    plt.yticks(fontsize=fontsize-1)
    plt.title(title or f"Boxplot of {metric} by Cluster", fontsize=fontsize+1)

    # Pairwise Mann-Whitney U-test & annotation
    pairs = list(itertools.combinations(range(len(cluster_labels)), 2))
    y_min, y_max = plot_df[metric].min(), plot_df[metric].max()
    y_range = y_max - y_min
    y_start = y_max + 0.03 * y_range
    y_step = 0.09 * y_range if y_range else 0.1

    for n, (i, j) in enumerate(pairs):
        vals1, vals2 = cluster_data[i], cluster_data[j]
        if len(vals1) == 0 or len(vals2) == 0:
            continue
        stat, p = mannwhitneyu(vals1, vals2, alternative='two-sided')
        x1, x2 = i, j
        y = y_start + y_step * n
        h = y_step * 0.6
        plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.7, c='k', alpha=0.9)
        plt.text(
            (x1 + x2) * 0.5, y + h,
            f"$p$ = {p:.2e}",
            ha='center', va='bottom', color='k', fontsize=fontsize-2, fontweight='bold'
        )
    plt.tight_layout()
    if show:
        plt.show()
    return plot_df

# USAGE EXAMPLE:
cluster_dict = {
    'CNU_LGE_GABA': CNU_LGE_Cluster,
    'IT_ET_Glut': IT_ET_Cluster,
    'NP_CT_L6b_Glut': NP_Cluster,
    'CTX_CGE_GABA': CGE_Cluster,
    'CTX_MGE_GABA': MGE_Cluster,
    'TH_Glut': TH_Cluster
}
plot_palette = ["orange", "green", "purple", "red", "blue", "yellow", "pink"]
_ = cluster_residual_boxplot(results_df, cluster_dict, metric="residual", palette=plot_palette)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import mannwhitneyu

dpi = 240

def cluster_residual_boxplot(
    results_df, 
    cluster_dict, 
    metric="residual", 
    palette=None, 
    figsize=(9,5), 
    swarm=True,
    box_width=0.6,
    title=None, 
    fontsize=13,
    show=True
):
    """
    Plot boxplot+swarmplot for residuals of multiple clusters,
    with Mann-Whitney U-test annotations only comparing CNU-LGE GABA to others.

    Args:
        results_df (pd.DataFrame): DataFrame containing residual results per index.
        cluster_dict (dict): {cluster_label: list-of-indices}. Each key is a descriptive cluster name.
        metric (str): Column in results_df to plot.
        palette (list or dict): Colors for clusters (len must match cluster_dict order).
        figsize (tuple): Size of the figure.
        swarm (bool): Whether to add a swarm/dotplot overlay.
        box_width (float): box width for boxplot.
        title (str): Optional plot title.
        fontsize (int): Base fontsize.
        show (bool): plt.show() if True.

    Returns: 
        pd.DataFrame: Data used for plotting.
    """
    if palette is None:
        base_colors = sns.color_palette("tab10") + sns.color_palette("Set2") + sns.color_palette("Dark2")
        palette = base_colors[:len(cluster_dict)]
    elif isinstance(palette, dict):
        palette = [palette[k] for k in cluster_dict.keys()]

    cluster_labels = list(cluster_dict.keys())
    cluster_data = []
    n_points = []

    # Build the plot DataFrame
    for k in cluster_labels:
        vals = results_df.loc[cluster_dict[k], metric].dropna()
        cluster_data.append(vals)
        n_points.append(len(vals))
    plot_df = pd.DataFrame({
        "Cluster": np.repeat(cluster_labels, n_points),
        metric: np.concatenate([v.values for v in cluster_data])
    })

    # Plot
    plt.figure(figsize=figsize, dpi=dpi)
    ax = sns.boxplot(
        x="Cluster", y=metric, data=plot_df, 
        palette=palette, width=box_width, showmeans=True,
        meanprops={"marker": "o", "markerfacecolor": "black", "markeredgecolor": "black"}
    )
    if swarm:
        sns.stripplot(x="Cluster", y=metric, data=plot_df, color='k', alpha=0.4, jitter=0.18, size=5, ax=ax)

    plt.xlabel("Cluster", fontsize=fontsize)
    plt.ylabel(metric.capitalize(), fontsize=fontsize)
    plt.xticks(fontsize=fontsize-1)
    plt.yticks(fontsize=fontsize-1)

    # Only test CNU-LGE GABA against others
    ref_label = "CNU_LGE_GABA"
    if ref_label in cluster_labels:
        ref_idx = cluster_labels.index(ref_label)
        compare_indices = [i for i in range(len(cluster_labels)) if i != ref_idx]
        message = f"Boxplot of {metric} by Cluster\n(p: Mann-Whitney U only CNU-LGE vs others)"
    else:
        compare_indices = []
        message = f"Boxplot of {metric} by Cluster"

    plt.title(title or message, fontsize=fontsize+1)

    # Mann-Whitney U-test (CNU-LGE GABA vs others only), and annotate
    y_min, y_max = plot_df[metric].min(), plot_df[metric].max()
    y_range = y_max - y_min
    y_start = y_max + 0.03 * y_range
    y_step = 0.14 * y_range if y_range else 0.2

    n = 0
    for j in compare_indices:
        vals1, vals2 = cluster_data[ref_idx], cluster_data[j]
        if len(vals1) == 0 or len(vals2) == 0:
            continue
        stat, p = mannwhitneyu(vals1, vals2, alternative='two-sided')
        x1, x2 = ref_idx, j
        y = y_start + y_step * n
        h = y_step * 0.6
        # Draw bars between CNU-LGE and the other
        plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=2, c='k', alpha=0.9)
        plt.text(
            (x1 + x2) * 0.5, y + h,
            f"$p$ = {p:.2e}",
            ha='center', va='bottom', color='k', fontsize=fontsize-2, fontweight='bold'
        )
        n += 1

    plt.tight_layout()
    if show:
        plt.show()
    return plot_df

# USAGE EXAMPLE:
cluster_dict = {
    'CNU_LGE_GABA': CNU_LGE_Cluster,
    'IT_ET_Glut': IT_ET_Cluster,
    'NP_CT_L6b_Glut': NP_Cluster,
    'CTX_CGE_GABA': CGE_Cluster,
    'CTX_MGE_GABA': MGE_Cluster,
    'TH_Glut': TH_Cluster
}
plot_palette = ["orange", "green", "purple", "red", "blue", "yellow", "pink"]
_ = cluster_residual_boxplot(results_df, cluster_dict, metric="residual", palette=plot_palette)


In [None]:
def permutation_null_plot(obs_df, null_dfs, structure, metric="residual"):
    """
    Plot permutation null distribution for a given structure and metric.
    
    Parameters:
    - obs_df: observed DataFrame (e.g., results_df_eval)
    - null_dfs: list of permutation DataFrames (e.g., results_perm_list)
    - structure: str, structure to evaluate (row label)
    - metric: metric to evaluate (column name, e.g., 'residual', 'EFFECT', etc.)
    """
    obs = obs_df.loc[structure, metric]
    null = [df.loc[structure, metric] for df in null_dfs]
    # Calculate (one-sided) permutation p-value: probability of seeing as large or larger value under null
    pval = (np.sum(np.abs(null) >= np.abs(obs)) + 1) / (len(null) + 1)

    plt.figure(figsize=(7, 4))
    plt.hist(null, bins=10, color="skyblue", edgecolor="k", alpha=0.7)
    plt.axvline(obs, color="red", linestyle="--", label=f"Observed: {obs:.3f}")
    plt.xlabel(metric.capitalize())
    plt.ylabel("Count")
    plt.title(
        f"Permutation null distribution for {structure} {metric}\nObserved value marked\n"
        + f"Permutation p-value: {pval:.3g}"
    )
    plt.legend()
    plt.tight_layout()
    plt.show()
    return pval

In [None]:
results_perm_list[0].head(5)

In [None]:
CT = "0965 STR D2 Gaba_1"
permutation_null_plot(results_df_eval, results_perm_list, CT, metric="ABS_DIFF_EFFECT")
permutation_null_plot(results_df_eval, results_perm_list, CT, metric="residual")