In [None]:
%load_ext autoreload
%autoreload 2
import sys
import os
ProjDIR = "/home/jw3514/Work/ASD_Circuits_CellType/" # Change to your project directory
sys.path.insert(1, f'{ProjDIR}/src/')
from ASD_Circuits import *

try:
    os.chdir(f"{ProjDIR}/notebook_rebuttal/")
    print(f"Current working directory: {os.getcwd()}")
except FileNotFoundError as e:
    print(f"Error: Could not change directory - {e}")
except Exception as e:
    print(f"Unexpected error: {e}")


HGNC, ENSID2Entrez, GeneSymbol2Entrez, Entrez2Symbol = LoadGeneINFO()

In [None]:
STR_BiasMat = pd.read_parquet("../dat/BiasMatrices/AllenMouseBrain_Z2bias.parquet")
Anno = STR2Region()

In [None]:
csv_fil = "NeuralSystem.csv"
NeuralSystem = pd.read_csv(csv_fil, index_col=0)
ExpLevel = pd.read_csv("../dat/allen-mouse-exp/ExpMatchFeatures.csv", index_col=0)

NeuralSystem['entrez_id'] = NeuralSystem['entrez_id'].astype(int)
ExpLevel.index = ExpLevel.index.astype(int)

# Map the EXP value based on entrez_id
NeuralSystem['EXP'] = NeuralSystem['entrez_id'].map(ExpLevel['EXP'])

In [None]:
[NeuralSystem["neurotransmitter_system"].unique()]

In [None]:
NeuralSystem[NeuralSystem["neurotransmitter_system"]=="acetylcholine"]

In [None]:
STR_BiasMat.loc[1128,:].sort_values(ascending=False).head(10)

In [None]:
# Import functions from clean ASD_Circuits.py module
from ASD_Circuits import (
    analyze_neurotransmitter_systems_source_target, 
    plot_source_target_heatmap, 
    plot_source_target_only,
    save_neurotransmitter_results
)

# Define category groupings for this analysis
SOURCE_CATEGORIES = ['synthesis', 'transporter', 'storage_release', 'processing']
TARGET_CATEGORIES = ['receptor', 'degradation']

# Run the source-target analysis using imported function
print("="*70)
nt_results = analyze_neurotransmitter_systems_source_target(
    NeuralSystem, 
    STR_BiasMat, 
    SOURCE_CATEGORIES=SOURCE_CATEGORIES, 
    TARGET_CATEGORIES=TARGET_CATEGORIES,
    weights = "EXP",
    generate_bias=True
)

In [None]:
nt_results.keys()

In [None]:
nt = "acetylcholine"
nt_results[nt]["source"].head(10)

In [None]:
# drop  "acetylcholine" from nt_results

In [None]:
nt_results[nt]["target"].head(10)

In [None]:
# Load NT STR Bias results into a dictionary
NT_Systems = ["Dopamine", "Serotonin", "Oxytocin"]
NT_BiasDict = {}
for system in NT_Systems:
    NT_BiasDict[system] = {
        'source': pd.read_csv(f"{ProjDIR}/results/STR_ISH/NT_{system}_source_bias_addP_sibling.csv", index_col=0),
        'target': pd.read_csv(f"{ProjDIR}/results/STR_ISH/NT_{system}_target_bias_addP_sibling.csv", index_col=0),
        'combined': pd.read_csv(f"{ProjDIR}/results/STR_ISH/NT_{system}_combined_bias_addP_sibling.csv", index_col=0),
    }

In [None]:
NT_BiasDict["Dopamine"]["source"].head(10)

In [None]:
# Create visualizations and save results using imported functions
print("Creating source-target visualization plots...")

# Plot source vs target vs combined comparison and save to results directory
fig1 = plot_source_target_only(NT_BiasDict, top_n=10, save_plot=True, dpi=480, pvalue_label="P-value")

display_summary = False
if display_summary:
    # Display summary statistics
    print("\n" + "="*70)
    print("SUMMARY OF RESULTS - SOURCE vs TARGET ANALYSIS")
    print("="*70)
    print(f"SOURCE categories: {SOURCE_CATEGORIES}")
    print(f"TARGET categories: {TARGET_CATEGORIES}")

    for system, system_data in NT_BiasDict.items():
        print(f"\n{system.upper()} SYSTEM:")
        
        if 'source' in system_data:
            top_structure = system_data['source'].index[0]
            top_effect = system_data['source'].iloc[0]['EFFECT']
            print(f"  Source: Top structure = {top_structure.replace('_', ' ')} (Effect: {top_effect:.3f})")
        
        if 'target' in system_data:
            top_structure = system_data['target'].index[0]
            top_effect = system_data['target'].iloc[0]['EFFECT']
            print(f"  Target: Top structure = {top_structure.replace('_', ' ')} (Effect: {top_effect:.3f})")
        
        if 'combined' in system_data:
            top_structure = system_data['combined'].index[0]
            top_effect = system_data['combined'].iloc[0]['EFFECT']
            print(f"  Combined: Top structure = {top_structure.replace('_', ' ')} (Effect: {top_effect:.3f})")

    print("\nSource-Target analysis complete!")
    print(f"All results saved to ./results/ directory")
    print("  - CSV files: Individual bias results for each system and category")
    print("  - PNG files: High-resolution plots for publication")
    print("\nEach system now has 3 DataFrames:")
    print("  - 'source': bias from synthesis + transporter + storage_release genes")
    print("  - 'target': bias from receptor + degradation genes") 
    print("  - 'combined': bias from source + target genes (excluding metabolism, processing)")

In [None]:
NT_BiasDict

In [None]:
def plot_combined_bias_only(
    results, 
    top_n=15, 
    save_plot=False, 
    results_dir="./results",
    pvalue_label="q-value",
    dpi=300
):
    """
    Visualize only the COMBINED neurotransmitter system bias results,
    in a 3x1 (three rows, one column) figure: one barplot per system.
    Shows significance stars (* for q < 0.05, ** for q < 0.01, *** for q < 0.001)
    (If <3 systems, fewer axes)
    Each structure bar colored by region, using provided color schema.
    The legend for region colors is shown beside ALL subplots (not just the last one),
    and ensures the labels and colors match exactly.
    """
    import matplotlib.pyplot as plt
    import matplotlib.patches as mpatches
    import os
    import numpy as np
    import seaborn as sns

    REGIONS_seq = [
        'Isocortex', 'Olfactory_areas', 'Cortical_subplate', 
        'Hippocampus', 'Amygdala', 'Striatum',
        "Thalamus", "Hypothalamus", "Midbrain", 
        "Medulla", "Pallidum", "Pons",
        "Cerebellum"
    ]
    REG_COR_Dic = dict(zip(REGIONS_seq, [
        "#268ad5", "#D5DBDB", "#7ac3fa",
        "#2c9d39", "#742eb5", "#ed8921",
        "#e82315", "#E6B0AA", "#f6b26b",
        "#20124d", "#2ECC71", "#D2B4DE",
        "#ffd966",
    ]))

    def get_significance_star(q):
        if q < 0.001:
            return '***'
        elif q < 0.01:
            return '**'
        elif q < 0.05:
            return '*'
        return ''
    
    # Set global prettier style
    plt.style.use('seaborn-v0_8-whitegrid')
    sns.set_context("notebook", font_scale=1.5)
    # Not using default palette anymore - each structure will be colored by region

    systems = list(results.keys())
    n_systems = len(systems)
    n_panels = min(n_systems, 3)
    fig, axes = plt.subplots(1, n_panels, figsize=(8 * n_panels, 7), dpi=dpi, constrained_layout=True)
    if n_panels == 1:
        axes = [axes]

    # --- Gather region-color presence across *all* subplots for the legend
    found_region_colors = dict()
    all_present_regions = set()
    for i in range(n_panels):
        system = systems[i]
        system_data = results[system]
        if 'combined' not in system_data:
            continue
        top_combined = system_data['combined'].head(top_n)
        if "Region" in top_combined.columns:
            present_regions = [str(region) for region in top_combined["Region"]]
            for region in present_regions:
                col = REG_COR_Dic.get(region, "#888888")
                found_region_colors[region] = col
                all_present_regions.add(region)
        else:
            found_region_colors["Unknown"] = "#888888"
            all_present_regions.add("Unknown")

    # For legend: order by REGIONS_seq, then add others (sorted alphabetically)
    used_regions = [r for r in REGIONS_seq if r in found_region_colors]
    others = sorted([r for r in found_region_colors if r not in REGIONS_seq])
    legend_entries = used_regions + others
    legend_handles = [
        mpatches.Patch(color=found_region_colors[r], label=r.replace('_', ' ')) for r in legend_entries
    ]
    ncol = 2 if len(legend_handles) > 7 else 1

    # ---- Plotting each panel
    for i in range(n_panels):
        system = systems[i]
        system_data = results[system]
        ax = axes[i]

        if 'combined' not in system_data:
            ax.axis('off')
            continue

        top_combined = system_data['combined'].head(top_n)

        # Get the color for each structure according to its region
        region_col = []
        present_regions = []
        if "Region" in top_combined.columns:
            for region in top_combined["Region"]:
                region = str(region)
                col = REG_COR_Dic.get(region, "#888888")
                region_col.append(col)
                present_regions.append(region)
        else:
            region_col = ["#888888"] * len(top_combined)
            present_regions = ["Unknown"] * len(top_combined)

        eff_vals = top_combined['EFFECT'].values
        bars = ax.barh(
            y=np.arange(len(top_combined)),
            width=eff_vals,
            color=region_col,
            edgecolor='black',
            alpha=0.94,
            zorder=2
        )

        # Prettier y-tick labels, text
        ax.set_yticks(np.arange(len(top_combined)))
        ax.set_yticklabels([s.replace('_', ' ') for s in top_combined.index], fontsize=35, fontweight='bold')
        ax.set_xlabel('Bias Effect', fontsize=18, fontweight='bold', labelpad=8)
        ax.set_title(f'{system.capitalize()}', fontsize=21, fontweight='bold', pad=16)
        ax.tick_params(axis='x', labelsize=16)
        ax.tick_params(axis='y', labelsize=15)
        ax.invert_yaxis()

        for spine in ['top','right']:
            ax.spines[spine].set_visible(False)
        ax.spines['left'].set_linewidth(1.6)
        ax.spines['bottom'].set_linewidth(1.6)

        ax.axvline(0, ls='--', color='grey', lw=1.1, zorder=1)
        
        # Add significance stars (prettier: near/bar tip, further from bar end, bigger font)
        if pvalue_label in top_combined.columns:
            top_combined_pval = top_combined[pvalue_label]
        else:
            raise ValueError(f"P-value column '{pvalue_label}' not found in the dataframe for system '{system}', 'combined'.")
        for j, (eff_val, p_val) in enumerate(zip(top_combined['EFFECT'], top_combined_pval)):
            star = get_significance_star(p_val)
            if star:
                x_offset = 0.08 * (np.nanmax(np.abs(eff_vals)) or 1)
                x = eff_val + x_offset if eff_val >= 0 else eff_val - x_offset
                ha = 'left' if eff_val >= 0 else 'right'
                ax.text(
                    x, j,
                    star,
                    va='center',
                    ha=ha,
                    color='firebrick',
                    fontsize=23,
                    fontweight='bold',
                    zorder=5,
                )
        # Adjust xlim for visual padding
        bar_absmax = np.nanmax(np.abs(eff_vals))
        ax.set_xlim(0, bar_absmax * 1.18)

    # Place the *same* region legend beside every plot (immediately right of each subplot)
    for i, ax in enumerate(axes):
        # Each gets legend, but with same content and layout
        # Use a slightly offset anchor for the legend for each panel for visual separation
        if i == len(axes) - 1:
            ax.legend(
                handles=legend_handles,
                loc='upper left',
            bbox_to_anchor=(0.7, 0.6),
            borderaxespad=0.6,
            fontsize=16,
            ncol=1,
            title="Region",
            title_fontsize=16,
            frameon=True
        )

    # Space between, padding etc
    fig.subplots_adjust(wspace=0.23, left=0.13, right=0.98, bottom=0.07, top=0.93)

    if save_plot:
        os.makedirs(results_dir, exist_ok=True)
        plot_path = os.path.join(results_dir, f'neurotransmitter_COMBINED_barplots_top{top_n}.svg')
        fig.savefig(plot_path, dpi=dpi, bbox_inches='tight', transparent=False)
        print(f"Combined bar plots saved to: {plot_path}")
    plt.show()
    return fig

# Only plot combined (top N), 3x1 layout, high-resolution, and save
fig1 = plot_combined_bias_only(NT_BiasDict, top_n=15, save_plot=True, dpi=120, pvalue_label="P-value")

### Test Bias vs CCS 

In [None]:
ScoreMatDir="/home/jw3514/Work/ASD_Circuits/dat/allen-mouse-conn/ScoreingMat_jw_v3/"
WeightMat = pd.read_csv(ScoreMatDir + "WeightMat.Ipsi.csv", index_col=0)
IpsiInfoMat=pd.read_csv(ScoreMatDir + "InfoMat.Ipsi.csv", index_col=0)
IpsiInfoMatShort_v1=pd.read_csv(ScoreMatDir + "InfoMat.Ipsi.Short.3900.csv", index_col=0)
IpsiInfoMatLong_v1=pd.read_csv(ScoreMatDir + "InfoMat.Ipsi.Long.3900.csv", index_col=0)

topNs = np.arange(200, 5, -1)
DIR = "/home/jw3514/Work/ASD_Circuits/scripts/RankScores/"
Cont_Distance = np.load("{}/RankScore.Ipsi.Cont.npy".format(DIR))
Cont_DistanceShort = np.load("{}/RankScore.Ipsi.Short.3900.Cont.npy".format(DIR))
Cont_DistanceLong = np.load("{}/RankScore.Ipsi.Long.3900.Cont.npy".format(DIR))

In [None]:
def plot_circuit_connectivity_scores(topNs, SC_Agg_topN_score, SC_Agg_topN_scoreLong, SC_Agg_topN_scoreShort,
                                  Cont_Distance, Cont_DistanceLong, Cont_DistanceShort):
    fig, (ax1, ax2, ax3) = plt.subplots(3,1, dpi=480, figsize=(7,11))

    BarLen = 34.1
    #BarLen = 47.5

    cont = np.median(Cont_Distance, axis=0)
    ax1.plot(topNs, SC_Agg_topN_score, color="blue", marker="o", markersize=5, lw=1,
                         ls="dashed", label="P-factor")

    lower = np.percentile(Cont_Distance, 50-BarLen, axis=0)
    upper = np.percentile(Cont_Distance, 50+BarLen, axis=0)
    ax1.errorbar(topNs, cont, color="grey", marker="o", markersize=1.5, lw=1,
                yerr=(cont - lower, upper - cont ), ls="dashed", label="Siblings")
    ax1.set_xlabel("Structure Rank\n", fontsize=17)
    ax1.set_ylabel("Circuit Connectivity Score", fontsize=15)
    ax1.legend(fontsize=13)

    cont = np.nanmean(Cont_DistanceLong, axis=0)
    ax2.plot(topNs, SC_Agg_topN_scoreLong, color="blue", marker="o", markersize=5, lw=1,
                         ls="dashed", label="P-factor")

    lower = np.nanpercentile(Cont_DistanceLong, 50-BarLen, axis=0)
    upper = np.nanpercentile(Cont_DistanceLong, 50+BarLen, axis=0)
    ax2.errorbar(topNs, cont, color="grey", marker="o", markersize=1.5, lw=1,
                yerr=(cont - lower, abs(upper - cont) ), ls="dashed", label="Siblings")
    ax2.set_xlabel("Structure Rank\n", fontsize=17)
    ax2.set_ylabel("Circuit Connectivity Score", fontsize=15)
    ax2.legend(fontsize=13)

    cont = np.median(Cont_DistanceShort, axis=0)
    ax3.plot(topNs, SC_Agg_topN_scoreShort, color="blue", marker="o", markersize=5, lw=1,
                         ls="dashed", label="P-factor")

    lower = np.percentile(Cont_DistanceShort, 50-BarLen, axis=0)
    upper = np.percentile(Cont_DistanceShort, 50+BarLen, axis=0)
    ax3.errorbar(topNs, cont, color="grey", marker="o", markersize=1.5, lw=1,
                yerr=(cont - lower, upper - cont ), ls="dashed", label="Siblings")
    ax3.set_xlabel("Structure Rank\n", fontsize=17)
    ax3.set_ylabel("Circuit Connectivity Score", fontsize=15)
    ax3.legend(fontsize=13)

    #fig.text(0.5, -0.03, 'Top Number of Structuress', ha='center')
    #fig.text(-0.03, 0.5, 'SI Score', va='center', rotation='vertical')
    ax1.set_xlim(0, 121)
    ax2.set_xlim(0, 121)
    ax3.set_xlim(0, 121)
    plt.tight_layout()
    #plt.legend(fontsize=15)
    ax1.grid(True)
    ax2.grid(True)
    ax3.grid(True)
    #plt.savefig("../figs/main/Fig2.BCD.pdf")
    #plt.savefig("../figs/main/Fig2.BCD.png")
    return fig

In [None]:
def calculate_circuit_scores(pc_scores_df, IpsiInfoMat, sort_by="PC1"):
    STR_Ranks = pc_scores_df.sort_values(sort_by, ascending=False).index.values
    topNs = list(range(200, 5, -1))
    SC_Agg_topN_score = []
    
    for topN in topNs:
        top_strs = STR_Ranks[:topN]
        score = ScoreCircuit_SI_Joint(top_strs, IpsiInfoMat)
        SC_Agg_topN_score.append(score)
        
    return np.array(SC_Agg_topN_score)

In [None]:
print("Keys in nt_results_detailed:")
for key in nt_results.keys():
    print(f"  {key}: {list(nt_results[key].keys())}")

In [None]:
#NT_BiasDict = nt_results

In [None]:
# For each neurotransmitter system, create a new DataFrame where each structure's EFFECT is the max of source/target, but in final df use column name "EFFECT"
NT_BiasDict_max = {}
for system in NT_BiasDict.keys():
    source = NT_BiasDict[system].get("source")
    target = NT_BiasDict[system].get("target")
    if source is not None and target is not None:
        # Align source and target indices, take max per structure
        aligned = source["EFFECT"].to_frame("source").join(
            target["EFFECT"].to_frame("target"), how="outer"
        )
        aligned["EFFECT"] = aligned[["source", "target"]].max(axis=1)
        # You might want to add back region/other columns if needed
        df_max = aligned[["EFFECT"]].copy()
        # Optionally add Region info from source or target
        if "Region" in source.columns:
            df_max["Region"] = source["Region"]
        elif "Region" in target.columns:
            df_max["Region"] = target["Region"]
        df_max = df_max.sort_values("EFFECT", ascending=False)
        NT_BiasDict_max[system] = df_max

In [None]:
neuro_system_scores = {}
for system in NT_BiasDict.keys():
    for category, df in NT_BiasDict[system].items():
        neuro_system_scores[f"{system}_{category}"] = calculate_circuit_scores(df, IpsiInfoMat, sort_by="EFFECT")

neuro_system_scores2 = {}
for system in NT_BiasDict_max.keys():
    df = NT_BiasDict_max[system]
    neuro_system_scores2[f"{system}"] = calculate_circuit_scores(df, IpsiInfoMat, sort_by="EFFECT")

In [None]:
plt.style.use('seaborn-v0_8-whitegrid')
fig, ax1 = plt.subplots(1,1, dpi=480, figsize=(12,6), facecolor='none')

fig.patch.set_alpha(0)
ax1.patch.set_alpha(0)

BarLen = 34.1
#BarLen = 47.5

# Define colors for each neurotransmitter system
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', 
          '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#aec7e8', '#ffbb78']

topNs = list(range(200, 5, -1))  # Define topNs based on the range used in calculate_circuit_scores
                        
for i, (system_category, scores) in enumerate(neuro_system_scores.items()):
    if "combined" in system_category:
        label = system_category.replace("_combined", "")
        color = colors[i % len(colors)]
        ax1.plot(topNs, scores, color=color, marker="o", markersize=5, lw=1,
                            ls="dashed", label=label, alpha = 0.5)

cont = np.median(Cont_Distance, axis=0)
lower = np.percentile(Cont_Distance, 50-BarLen, axis=0)
upper = np.percentile(Cont_Distance, 50+BarLen, axis=0)
ax1.errorbar(topNs, cont, color="grey", marker="o", markersize=1.5, lw=1,
            yerr=(cont - lower, upper - cont ), ls="dashed", label="Siblings")
ax1.set_xlabel("Structure Rank\n", fontsize=17)
ax1.set_ylabel("Circuit Connectivity Score", fontsize=15)
ax1.grid(True, linestyle='--', alpha=0.7)
ax1.set_xlim(0, 121)

# Place legend outside of plot
ax1.legend(fontsize=13, bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()  # Adjust layout to prevent legend cutoff

In [None]:
plt.style.use('seaborn-v0_8-whitegrid')
fig, ax1 = plt.subplots(1,1, dpi=480, figsize=(12,6), facecolor='none')

fig.patch.set_alpha(0)
ax1.patch.set_alpha(0)

BarLen = 34.1
#BarLen = 47.5

# Define colors for each neurotransmitter system
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', 
          '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#aec7e8', '#ffbb78']

topNs = np.arange(200, 5, -1)  # Define topNs based on the range used in calculate_circuit_scores
                        
for i, (system_category, scores) in enumerate(neuro_system_scores2.items()):

    color = colors[i % len(colors)]
    ax1.plot(topNs, scores, color=color, marker="o", markersize=5, lw=1,
                        ls="dashed", label=system_category, alpha = 0.5)

cont = np.median(Cont_Distance, axis=0)
lower = np.percentile(Cont_Distance, 50-BarLen, axis=0)
upper = np.percentile(Cont_Distance, 50+BarLen, axis=0)
ax1.errorbar(topNs, cont, color="grey", marker="o", markersize=1.5, lw=1,
            yerr=(cont - lower, upper - cont ), ls="dashed", label="Siblings")
ax1.set_xlabel("Structure Rank\n", fontsize=17)
ax1.set_ylabel("Circuit Connectivity Score", fontsize=15)
ax1.grid(True, linestyle='--', alpha=0.7)
ax1.set_xlim(0, 121)

# Place legend outside of plot
ax1.legend(fontsize=13, bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()  # Adjust layout to prevent legend cutoff

In [None]:
def plot_CCS_pvalues_at_N(n, neuro_system_scores):
    """
    Plot -log10(p-values) of circuit connectivity scores at given N (topN).
    
    Args:
        n: int
            Rank at which to evaluate CCS p-values (e.g., 40, 50, etc).
    Returns:
        fig, ax: matplotlib Figure and Axes objects
    """
    # Create figure and axis
    fig, ax = plt.subplots(figsize=(7,6), dpi=120)

    # Store p-values in a dictionary
    pvalues = {}

    # For each neurotransmitter system, get p-value versus null
    for i, (system_category, scores) in enumerate(neuro_system_scores.items()):
        obs = scores[np.where(topNs==n)][0]  # Extract scalar value for n
        Null = Cont_Distance[:, np.where(topNs==n)].flatten()
        z, p, xx = GetPermutationP(Null, obs)
        pvalues[system_category] = p

    # Create bar plot
    labels = list(neuro_system_scores.keys())
    values = [-np.log10(pvalues[label]) for label in labels]

    # Use color palette, cycling as needed
    colors_bar = colors[:len(labels)]
    if len(colors_bar) < len(labels):
        # Extend colors if not enough
        times = (len(labels) // len(colors)) + 1
        colors_bar = (colors * times)[:len(labels)]

    bars = ax.bar(range(len(labels)), values, color=colors_bar)

    # Set properties that might have array inputs
    for bar in bars:
        bar.set_linewidth(1.0)

    ax.set_xticks(range(len(labels)))
    ax.set_xticklabels(labels, rotation=45, ha='right')
    ax.set_ylabel('-log10(p-value)')
    ax.set_title(f'CCS Score P-values at N_Str={n} (Neurotransmitter systems)')
    ax.grid(True, linestyle='--', alpha=0.7)

    # Add significance threshold line at p=0.05
    ax.axhline(y=-np.log10(0.05), color='red', linestyle='--', alpha=0.5)

    # Print numeric values
    print(f"CCS score P-values at N_Str={n} (Neurotransmitter systems):")
    for label in labels:
        print(f"{label}: {pvalues[label]:.2e}")

    plt.tight_layout()
    return fig, ax


In [None]:
#plot_CCS_pvalues_at_N(46, neuro_system_scores)
plot_CCS_pvalues_at_N(46, neuro_system_scores2)
plot_CCS_pvalues_at_N(40, neuro_system_scores2)
plot_CCS_pvalues_at_N(20, neuro_system_scores2)

In [None]:
plot_CCS_pvalues_at_N(40, neuro_system_scores2)
plot_CCS_pvalues_at_N(20, neuro_system_scores2)

In [None]:
try:
    from adjustText import adjust_text
except ImportError:
    print("adjustText not found. Installing...")
    import subprocess
    subprocess.check_call(["pip", "install", "adjusttext"])
    from adjustText import adjust_text

def plot_CCS_histogram_with_NT_bars(n, neuro_system_scores, Cont_Distance, topNs):
    """
    Plot histogram of siblings CCS distribution with vertical bars for Dopamine, Serotonin, and Oxytocin.
    
    Args:
        n: int
            Rank at which to evaluate CCS (e.g., 20, 40)
        neuro_system_scores: dict
            Dictionary with neurotransmitter system names as keys and CCS arrays as values
        Cont_Distance: numpy array
            Array of CCS scores for siblings (null distribution), shape (n_permutations, n_topNs)
        topNs: numpy array
            Array of topN values corresponding to Cont_Distance columns
    
    Returns:
        fig, ax: matplotlib Figure and Axes objects
    """
    # Get siblings CCS distribution at N
    n_idx = np.where(topNs == n)[0]
    if len(n_idx) == 0:
        raise ValueError(f"N={n} not found in topNs")
    n_idx = n_idx[0]
    siblings_CCS = Cont_Distance[:, n_idx].flatten()
    
    # Get observed CCS values for the three neurotransmitter systems
    nt_systems = ['Dopamine', 'Serotonin', 'Oxytocin']
    nt_colors = {'Dopamine': '#d62728', 'Serotonin': '#2ca02c', 'Oxytocin': '#1f77b4'}
    nt_values = {}
    nt_pvalues = {}
    
    for system in nt_systems:
        if system in neuro_system_scores:
            obs = neuro_system_scores[system][n_idx]
            nt_values[system] = obs
            
            # Calculate p-value using GetPermutationP (same as plot_CCS_pvalues_at_N)
            z, p_value, xx = GetPermutationP(siblings_CCS, obs)
            nt_pvalues[system] = p_value
    
    # Create figure
    fig, ax = plt.subplots(figsize=(6, 5), dpi=120)
    
    # Plot histogram of siblings CCS
    n_bins = 50
    counts, bins, patches = ax.hist(siblings_CCS, bins=n_bins, alpha=0.7, color='gray', 
                                     edgecolor='black', linewidth=0.5, label='Siblings CCS')
    
    # Get peak frequency (max count) and position labels at half that height
    peak_frequency = np.max(counts)
    label_y_position = peak_frequency * 0.5
    
    # Sort systems by CCS value
    systems_with_values = [(system, nt_values[system], nt_pvalues[system]) 
                           for system in nt_systems if system in nt_values]
    systems_with_values.sort(key=lambda x: x[1])  # Sort by CCS value
    
    # Store text objects for adjust_text
    text_objects = []
    
    # Add vertical bars for each neurotransmitter system
    for system, obs_value, p_val in systems_with_values:
        color = nt_colors[system]
        
        # Draw vertical line
        ax.axvline(obs_value, color=color, linestyle='--', linewidth=2.5, 
                  label=system)
        
        # Add text annotation with p-value (horizontal text, positioned to the right of the line)
        # Add small offset to position label to the right of the vertical line
        x_offset = 0.01  # Small offset to the right
        text_obj = ax.text(obs_value + x_offset, label_y_position, f'{system}\np={p_val:.2e}', 
                          rotation=0, verticalalignment='center', horizontalalignment='left',
                          fontsize=9, color=color, fontweight='bold',
                          bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.7, edgecolor=color))
        text_objects.append(text_obj)
    
    # Use adjust_text to automatically avoid overlaps (including histogram bars)
    adjust_text(text_objects, ax=ax, 
                arrowprops=dict(arrowstyle='->', color='gray', lw=0.5),
                expand_points=(1.2, 1.2),
                expand_text=(1.2, 1.2),
                force_points=(0.5, 0.5),
                force_text=(0.5, 0.5),
                avoid_text=True,
                avoid_points=True,
                avoid_objects=patches)  # Avoid histogram bars
    
    ax.set_xlabel('Circuit Connectivity Score (CCS)', fontsize=12)
    ax.set_ylabel('Frequency', fontsize=12)
    #ax.set_title(f'Distribution of Siblings CCS at N={n}\nwith Neurotransmitter System Values', fontsize=13)
    ax.set_title(f'N={n}\n', fontsize=13)
    ax.legend(loc='upper right', fontsize=10)
    ax.grid(True, linestyle='--', alpha=0.3)
    
    plt.tight_layout()
    return fig, ax

# Plot for N=20 and N=40
fig1, ax1 = plot_CCS_histogram_with_NT_bars(20, neuro_system_scores2, Cont_Distance, topNs)
plt.show()

fig2, ax2 = plot_CCS_histogram_with_NT_bars(40, neuro_system_scores2, Cont_Distance, topNs)
plt.show()

# Other disorders 

In [None]:
PKList = ["SNCA", "PRKN", "PARK7", "PINK1", "LRRK2"]
PK_GW = dict(zip([GeneSymbol2Entrez[x] for x in PKList], [1]*len(PKList)))
Dict2Fil(PK_GW, "../dat/Genetics/GeneWeights/Parkinson.gw")
PK_STR_Bias  = MouseSTR_AvgZ_Weighted(STR_BiasMat, PK_GW)
PK_STR_Bias["Region"] = [Anno.get(ct_idx, "Unknown") for ct_idx in PK_STR_Bias.index.values]

In [None]:
PK_STR_Bias.head(50)

In [None]:
# NegativeCircuits
from asyncio import Handle


DIR = "~/Work/ASD_Circuits_CellType/results/STR_ISH/"
T2D = pd.read_csv(DIR + "T2D_bias_addP_sibling.csv", index_col=0)
Parkinson = pd.read_csv(DIR + "Parkinson_bias_addP_sibling.csv", index_col=0)
HBALC = pd.read_csv(DIR + "hba1c_bias_addP_sibling.csv", index_col=0)
IBD = pd.read_csv(DIR + "IBD_bias_addP_sibling.csv", index_col=0)
HDL_C = pd.read_csv(DIR + "HDL_C_bias_addP_sibling.csv", index_col=0)
Alzheimer = pd.read_csv(DIR + "Alzheimer_bias_addP_sibling.csv", index_col=0)
#ASD = pd.read_csv(DIR + "ASD_bias_addP_sibling.csv", index_col=0)
ASD = pd.read_csv("../dat/Unionize_bias/Spark_Meta_EWS.Z2.bias.FDR.csv", index_col="STR")

In [None]:
# Calculate circuit scores for each disorder dataset
disorder_scores = {}
disorder_datasets = {
    "T2D": T2D,
    # "Parkinson": Parkinson,
    # "HBALC": HBALC,
    "IBD": IBD,
    "HDL_C": HDL_C,
    # "Alzheimer": Alzheimer,
    "ASD": ASD
}

# Fixed color dictionary - ASD is blue, other disorders each get a specific color
disorder_colors = {
    "ASD": "#1f77b4",    # blue
    "T2D": "#ff7f0e",    # orange
    "IBD": "#2ca02c",    # green
    "HDL_C": "#d62728",  # red
    # Add more if enabling more disorders (e.g., Parkinson, HBALC, Alzheimer)
    "Parkinson": "#9467bd", # purple
    "HBALC": "#8c564b",     # brown
    "Alzheimer": "#e377c2"  # pink
}

for disorder_name, disorder_df in disorder_datasets.items():
    disorder_scores[disorder_name] = calculate_circuit_scores(disorder_df, IpsiInfoMat, sort_by="EFFECT")


In [None]:
# Plot rank vs CCS for all disorders
plt.style.use('seaborn-v0_8-whitegrid')
fig, ax1 = plt.subplots(1,1, dpi=480, figsize=(12,6), facecolor='none')

fig.patch.set_alpha(0)
ax1.patch.set_alpha(0)

BarLen = 34.1

# Define a color palette, but force ASD to blue (#1f77b4), others to use different colors
asd_color = '#1f77b4'
other_colors = ['#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', 
                '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#aec7e8', '#ffbb78']

topNs = list(range(200, 5, -1))  # Define topNs based on the range used in calculate_circuit_scores
color_idx = 0
for disorder_name, scores in disorder_scores.items():
    if disorder_name.upper() == "ASD":
        color = asd_color
        zorder = 10  # Bring ASD to the front if overlapped
        alpha = 1.0
        linewidth = 2.5
    else:
        color = other_colors[color_idx % len(other_colors)]
        color_idx += 1
        zorder = 2
        alpha = 0.7
        linewidth = 1
    ax1.plot(topNs, scores, color=color, marker="o", markersize=5, lw=linewidth,
             ls="dashed", label=disorder_name, alpha=alpha, zorder=zorder)

# Add sibling controls
cont = np.median(Cont_Distance, axis=0)
lower = np.percentile(Cont_Distance, 50-BarLen, axis=0)
upper = np.percentile(Cont_Distance, 50+BarLen, axis=0)
ax1.errorbar(topNs, cont, color="grey", marker="o", markersize=1.5, lw=1,
            yerr=(cont - lower, upper - cont ), ls="dashed", label="Siblings")
ax1.set_xlabel("Structure Rank\n", fontsize=17)
ax1.set_ylabel("Circuit Connectivity Score", fontsize=15)
ax1.grid(True, linestyle='--', alpha=0.7)
ax1.set_xlim(0, 121)

# Place legend outside of plot
ax1.legend(fontsize=13, bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()  # Adjust layout to prevent legend cutoff


In [None]:
# Plot CCS p-values for negative controls (disorders)
# Ensure topNs is a numpy array for the function (it's already defined in cell 17, but ensure it's numpy array)
topNs = np.arange(200, 5, -1)

# Define colors if not already defined (needed for plot_CCS_pvalues_at_N)
if 'colors' not in globals():
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', 
              '#e377c2', '#7f7f7f', '#bcbd22', '#17becf', '#aec7e8', '#ffbb78']

# Plot p-values at different ranks
plot_CCS_pvalues_at_N(46, disorder_scores)
plot_CCS_pvalues_at_N(20, disorder_scores)


In [None]:

import matplotlib as mpl
import matplotlib.pyplot as plt

# Set style for better aesthetics (copied from Figures_Tables.ipynb)
plt.style.use('seaborn-v0_8-whitegrid')
mpl.rcParams.update({
    "axes.titlesize": 20,
    "axes.labelsize": 18,
    "xtick.labelsize": 14,
    "ytick.labelsize": 14,
    "legend.fontsize": 14,
    "axes.edgecolor": "k",
    "axes.linewidth": 1.4,
})

def plot_CCS_histogram_with_NT_bars(n, neuro_system_scores, Cont_Distance, topNs):
    """
    Plot histogram of siblings CCS distribution with vertical bars for each system in neuro_system_scores.
    System color is searched in both neurotransmitter and disorder color dicts.

    Args:
        n: int
            Rank at which to evaluate CCS (e.g., 20, 40)
        neuro_system_scores: dict
            Dictionary with system names as keys and CCS arrays as values
        Cont_Distance: numpy array
            Array of CCS scores for siblings (null distribution), shape (n_permutations, n_topNs)
        topNs: numpy array
            Array of topN values corresponding to Cont_Distance columns
    Returns:
        fig, ax: matplotlib Figure and Axes objects
    """
    # Color dictionary (unified, case-insensitive for NTs)
    nt_fixed_colors = {
        'dopamine':       '#e45756',   # reddish
        'serotonin':      '#4e79a7',   # blue
        'oxytocin':       '#76b7b2',   # teal
        'acetylcholine':  '#f28e2b',   # orange
    }
    disorder_colors = {
        "ASD": "#1f77b4",    # blue
        "T2D": "#ff7f0e",    # orange
        "IBD": "#2ca02c",    # green
        "HDL_C": "#d62728",  # red
        "Parkinson": "#9467bd", # purple
        "HBALC": "#8c564b",     # brown
        "Alzheimer": "#e377c2"  # pink
    }

    # Find the index in topNs for the requested n
    n_idx = np.where(topNs == n)[0]
    if len(n_idx) == 0:
        raise ValueError(f"N={n} not found in topNs")
    n_idx = n_idx[0]
    siblings_CCS = Cont_Distance[:, n_idx].flatten()

    # Gather observed values and p-values for all requested systems
    nt_systems = list(neuro_system_scores.keys())
    nt_values = {}
    nt_pvalues = {}
    for system in nt_systems:
        obs = neuro_system_scores[system][n_idx]
        nt_values[system] = obs
        _, p_value, _ = GetPermutationP(siblings_CCS, obs)
        nt_pvalues[system] = p_value

    # Create styled figure for publication
    fig, ax = plt.subplots(dpi=300, figsize=(7,5))

    # Plot histogram for siblings (styled as in Figures_Tables.ipynb)
    n_bins = 25
    n_sims = siblings_CCS.shape[0]
    n, bins, patches = ax.hist(
        siblings_CCS, bins=n_bins, histtype="barstacked", align="mid",
        facecolor="#8888FF", alpha=0.75, label="Siblings", edgecolor="black", linewidth=0.7, zorder=2
    )

    # Plot vertical lines for each system (like ASD/control lines in Figures_Tables.ipynb)
    # Sort by value for aesthetics
    systems_with_values = sorted([(system, nt_values[system], nt_pvalues[system]) for system in nt_systems], key=lambda x: x[1])

    # Custom color mapping function
    def get_system_color(system):
        # Try case-insensitive match for NTs
        sys_lower = system.lower()
        if sys_lower in nt_fixed_colors:
            return nt_fixed_colors[sys_lower]
        # Then check for disorder exact/case-insensitive match
        for k, v in disorder_colors.items():
            if system == k or system.lower() == k.lower():
                return v
        return "#d62728"  # fallback

    # Plot each system vertical line
    control_colors = [
        "crimson", "darkorange", "purple", "teal", "olive", "brown"
    ]
    for i, (system, obs_val, p_val) in enumerate(systems_with_values):
        col = get_system_color(system) if system not in disorder_colors else disorder_colors.get(system, get_system_color(system))
        if system == "ASD":
            lw = 3.5
            alpha = 1.0
            ls = '-'
            zorder = 4
        else:
            lw = 2.2
            alpha = 0.85
            ls = '--'
            zorder = 3
        ax.axvline(
            obs_val, ymin=0, ymax=1, linewidth=lw, color=col, linestyle=ls, label=system, alpha=alpha, zorder=zorder
        )

    # Annotate each system (no arrow for clarity, styled box)
    # Stack annotations vertically if too close
    label_y_positions = np.linspace(n.max()*0.95, n.max()*0.40, len(systems_with_values))
    for (system, obs_val, p_val), y in zip(systems_with_values, label_y_positions):
        col = get_system_color(system) if system not in disorder_colors else disorder_colors.get(system, get_system_color(system))
        ha = "left"
        ax.annotate(
            f"{system}\np={p_val:.1e}", xy=(obs_val+0.008, y),
            fontsize=12, fontweight="bold",
            color=col, ha=ha, va="top",
            bbox=dict(boxstyle='round,pad=0.25', facecolor="white", alpha=0.8, edgecolor=col, linewidth=1.5)
        )

    # Axis labels styled as in Figures_Tables.ipynb
    ax.set_xlabel("Circuit Connectivity Score (CCS)", fontsize=18, weight="bold")
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.tick_params(axis='both', which='major', length=6, width=1.2)
    ax.grid(axis='y', linestyle="--", alpha=0.3)

    # Legend: show one entry per label, outside plot
    from collections import OrderedDict
    handles, labels = ax.get_legend_handles_labels()
    od = OrderedDict()
    for h, l in zip(handles, labels):
        od[l] = h
    ax.legend(
        od.values(), od.keys(),
        loc="center left", bbox_to_anchor=(0.8, 0.7),
        borderaxespad=0.8, frameon=False, title="", ncol=1
    )

    plt.tight_layout()
    return fig, ax

fig2, ax2 = plot_CCS_histogram_with_NT_bars(46, disorder_scores, Cont_Distance, topNs)
plt.show()