In [None]:
%load_ext autoreload
%autoreload 2
import sys
ProjDIR = "/home/jw3514/Work/ASD_Circuits_CellType/"    
sys.path.insert(0, '/home/jw3514/Work/ASD_Circuits_CellType/src/')
from ASD_Circuits import *
#from CellType_PSY import *
#import scanpy as sc
#HGNC, ENSID2Entrez, GeneSymbol2Entrez, Entrez2Symbol = LoadGeneINFO()

import requests
import SimpleITK as sitk
import pathlib

from sklearn.cluster import DBSCAN
from scipy.spatial import ConvexHull
import alphashape
from alpha_shapes.boundary import Boundary, get_boundaries

from descartes import PolygonPatch
from alpha_shapes import Alpha_Shaper, plot_alpha_shape
import os 
import yaml

os.chdir("/home/jw3514/Work/ASD_Circuits_CellType/notebooks_mouse_sc/")
print(f"Current working directory: {os.getcwd()}")

In [None]:
def get_region_shape(section, reg, level="structure"):
    if level == "structure":
        sub_section = section[section["parcellation_structure"]==reg]
    elif level == "division":
        sub_section = section[section["parcellation_division"]==reg]
    points = sub_section[["x_reconstructed", "y_reconstructed"]].values
    print(points.shape)
    shaper = Alpha_Shaper(points)
    alpha_opt, alpha_shape = shaper.optimize()
    alpha_shape = shaper.get_shape(alpha=alpha_opt * 0.6)
    
    condition = points[:, 0] < 6
    subset_data = points[condition]
    center = subset_data.mean(axis=0)
    return alpha_shape, center
def add_region_bound(plt, alpha_shape, center, reg):
    for bound in get_boundaries(alpha_shape):
        data = bound._exterior
        plt.plot(data[:,0], data[:,1], color='black', linestyle='dashed', linewidth=1.5)
        holes = bound._holes
        for hole in holes:
            plt.plot(hole[:,0], hole[:,1], color='black', linestyle='dashed', linewidth=1.5)
        plt.text(x=center[0] - 0.5, y=center[1], s=reg)

In [None]:
# Load config file
with open("../config/config.yaml", "r") as f:
    config = yaml.safe_load(f)

ClusterAnn = pd.read_csv(ProjDIR + "dat/MouseCT_Cluster_Anno.csv", index_col="cluster_id_label")

In [None]:
#MERFISH = pd.read_csv("/home/jw3514/Work/CellType_Psy/AllenBrainCellAtlas/dat/MERFISH/MERFISH.ISH_Annot.csv", index_col=0)
#MERFISH = MERFISH[MERFISH["x_reconstructed"]!="Supplemental somatosensory area"]
#MERFISH.to_csv("/home/jw3514/Work/CellType_Psy/AllenBrainCellAtlas/dat/MERFISH/MERFISH.ISH_Annot.clean.csv")

In [None]:
import os

MERFISH_cells_bias_file = "dat/MERFISH/MERFISH.cells.ASD.Bias.Anno.parquet"

if os.path.exists(MERFISH_cells_bias_file):
    MERFISH = pd.read_parquet(MERFISH_cells_bias_file)
else:
    #MERFISH = pd.read_csv("dat/MERFISH/MERFISH.ISH_Annot.clean.csv", index_col=0)
    #MERFISH.to_parquet("dat/MERFISH/MERFISH.ISH_Annot.clean.parquet")
    MERFISH = pd.read_parquet("dat/MERFISH/MERFISH.ISH_Annot.clean.parquet")
    
    #ASD_CT_Bias_Cluster = pd.read_csv("dat/Bias/ASD.ClusterV3.DN.Z2.csv", index_col=0)
    ASD_Cluster_Bias_SibAdj = pd.read_csv("dat/Bias/ASD.ClusterV3.top60.UMI.Z2.z1clip3.addP.csv", index_col=0)
    
    ## Annotate ASD Bias to MERFISH
    for i, row in MERFISH.iterrows():
        cluster = row["cluster"]
        try:
            MERFISH.loc[i, "ASD.Bias"] = ASD_Cluster_Bias_SibAdj.loc[cluster, "EFFECT"]
            MERFISH.loc[i, "ASD.Bias.adj"] = ASD_Cluster_Bias_SibAdj.loc[cluster, "EFFECT2"]
        except:
            MERFISH.loc[i, "ASD.Bias"] = 0
            MERFISH.loc[i, "ASD.Bias.adj"] = 0

    MERFISH["x_reconstructed"] = pd.to_numeric(MERFISH["x_reconstructed"])
    MERFISH["y_reconstructed"] = pd.to_numeric(MERFISH["y_reconstructed"])
    MERFISH.to_parquet(MERFISH_cells_bias_file)

In [None]:
MERFISH.head(2)

In [None]:
def MakePlot(section, STRs, title, fontsize=20):
    sns.set(style="whitegrid", context="talk")
    plt.figure(dpi=480, figsize=(10, 8))
    sc = plt.scatter(
        section["x_reconstructed"], 
        section["y_reconstructed"], 
        c=section["ASD.Bias"], 
        cmap='coolwarm', 
        s=0.5, 
        alpha=0.7, 
        edgecolor='none', 
        vmin=-0.5, 
        vmax=0.5
    )
    cbar = plt.colorbar(sc)
    cbar.set_label('ASD Bias', fontsize=fontsize, weight='bold')
    for reg in STRs:
        alpha_shape, center = get_region_shape(section, reg)
        add_region_bound(plt, alpha_shape, center, reg)
    plt.xlabel("X Reconstructed", fontsize=fontsize*1.2, weight='bold')
    plt.ylabel("Y Reconstructed", fontsize=fontsize*1.2, weight='bold')
    plt.title(title, fontsize=fontsize*1.2, weight='bold')
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.xticks(fontsize=fontsize)
    plt.yticks(fontsize=fontsize)
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.show()

##### Validate MD

In [None]:
# Sections contains MD
MERFISH[MERFISH["parcellation_structure"]=="MD"]["brain_section_label"].value_counts()

In [None]:
# Sections contains MD
MERFISH[MERFISH["parcellation_structure"]=="RE"]["brain_section_label"].value_counts()

In [None]:
# select C57BL6J-638850.38
brain_section = 'C57BL6J-638850.38'
pred = (MERFISH['brain_section_label'] == brain_section )
section_38 = MERFISH[pred]
print(len(section_38))

In [None]:
section_38["parcellation_structure"].value_counts().head(20)
STR_Disp_Names = section_38["parcellation_structure"].value_counts().head(20).index.values

In [None]:
section_38["parcellation_structure"].value_counts().head(30)

In [None]:
STR_Disp_Names

In [None]:
tmp = section_38[section_38["parcellation_structure"].isin(STR_Disp_Names)]
tmp = tmp.drop_duplicates(subset="parcellation_structure")
#tmp[["parcellation_structure", "ISH_STR", "ASD.Bias"]]

In [None]:
STRs = ['LP', 'MD', 'CP', 'CA1', 'SSs', "VISa", "DG", "MEA", "PIR", "RSPv", "RE"]

In [None]:
MakePlot(section_38, STRs, "Section 38")

##### [end] Validate MD

##### Validate ACB

In [None]:
MERFISH[MERFISH["parcellation_structure"]=="ACB"]["brain_section_label"].value_counts()

In [None]:
brain_section = 'C57BL6J-638850.51'
pred = (MERFISH['brain_section_label'] == brain_section )
section_51 = MERFISH[pred]
print(len(section_51))

In [None]:
section_51["parcellation_structure"].value_counts().head(20)
STR_Disp_Names = section_51["parcellation_structure"].value_counts().head(16).index.values

In [None]:
STR_Disp_Names

In [None]:
tmp = section_51[section_51["parcellation_structure"].isin(STR_Disp_Names)]
tmp = tmp.drop_duplicates(subset="parcellation_structure")
tmp[["parcellation_structure", "ISH_STR", "ASD.Bias", "ASD.Bias.adj"]]

In [None]:
STRs = ['CP', 'SSp-m', 'ACB', 'MOp', 'OT', 'PIR', 'MOs', 'SSp-ul',
       'LSr', 'ACAv', 'ACAd', 'SSs', 'GU', 'AId']
MakePlot(section_51, STRs, "Section 51")

In [None]:
def get_region_shape(section, reg, level="structure"):
    if level == "structure":
        sub_section = section[section["parcellation_structure"]==reg]
    elif level == "division":
        sub_section = section[section["parcellation_division"]==reg]
    points = sub_section[["x_reconstructed", "y_reconstructed"]].values
    print(points.shape)
    shaper = Alpha_Shaper(points)
    alpha_opt, alpha_shape = shaper.optimize()
    alpha_shape = shaper.get_shape(alpha=alpha_opt * 0.6)
    
    condition = points[:, 0] < 6
    subset_data = points[condition]
    center = subset_data.mean(axis=0)
    return alpha_shape, center
def add_region_bound2(plt, alpha_shape, center, reg):
    for bound in get_boundaries(alpha_shape):
        data = bound._exterior
        plt.plot(data[:,0], data[:,1], color='black', linestyle='dashed', linewidth=1.5)
        holes = bound._holes
        for hole in holes:
            plt.plot(hole[:,0], hole[:,1], color='black', linestyle='dashed', linewidth=1.5)
        #plt.text(x=center[0] - 0.5, y=center[1], s=reg)

def MakePlot2(section, STRs, title, fontsize=20):
    sns.set(style="whitegrid", context="talk")
    plt.figure(dpi=480, figsize=(10, 8))
    sc = plt.scatter(section["x_reconstructed"], section["y_reconstructed"], 
                     c=section["ASD.Bias"], cmap='seismic', s=0.5, alpha=0.7, edgecolor='none', vmin=-0.5, vmax=0.5) # coolwarm
    cbar = plt.colorbar(sc)
    cbar.set_label('ASD Bias', fontsize=fontsize, weight='bold')
    for reg in STRs:
        alpha_shape, center = get_region_shape(section, reg)
        add_region_bound2(plt, alpha_shape, center, reg)
    plt.xlabel("X Reconstructed", fontsize=fontsize*1.2   , weight='bold')
    plt.ylabel("Y Reconstructed", fontsize=fontsize*1.2, weight='bold')
    plt.title(title, fontsize=fontsize*1.2, weight='bold')
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.xticks(fontsize=fontsize)
    plt.yticks(fontsize=fontsize)
    plt.gca().invert_yaxis()
    plt.tight_layout()
    plt.show()

In [None]:
MakePlot2(section_51, STRs, "Section 51", 20)

In [None]:
STRs = ['CLA']
MakePlot(section_51, STRs, "Section 51")

##### [end] Validate ACB

##### Validate AMY

In [None]:
MERFISH[MERFISH["parcellation_structure"]=="BLA"]["brain_section_label"].value_counts()

In [None]:
brain_section = 'C57BL6J-638850.36'
pred = (MERFISH['brain_section_label'] == brain_section )
section36 = MERFISH[pred]
print(len(section36))

In [None]:
section36["parcellation_structure"].value_counts().head(30)

In [None]:
STR_Disp_Names = section36["parcellation_structure"].value_counts().head(30).index.values

In [None]:
STR_Disp_Names

In [None]:
tmp = section36[section36["parcellation_structure"].isin(STR_Disp_Names)]
tmp = tmp.drop_duplicates(subset="parcellation_structure")
tmp[["parcellation_structure", "ISH_STR", "ASD.Bias", "ASD.Bias.adj"]]

In [None]:
STRs = ['DG', 'CA1', 'RSPv', 'MEA', 'AUDp', 'AUDv', 'AUDd', 'VISam', 'SSs', 'TEa', 
       'VISrl', 'CA3', 'PIR', 'PF', 'LA', 'RSPd', 'BLA', 'BMA', ]

In [None]:
MakePlot(section36, STRs, "Section 36")

##### [end] Validate AMY

##### Validate Cortex

In [None]:
MERFISH[MERFISH["parcellation_structure"]=="ILA"]["brain_section_label"].value_counts()

In [None]:
brain_section56 = 'C57BL6J-638850.56'
pred = (MERFISH['brain_section_label'] == brain_section56 )
section_56 = MERFISH[pred]
print(len(section_56))

In [None]:
section_56["parcellation_structure"].value_counts().head(30)

In [None]:
STR_Disp_Names = section_56["parcellation_structure"].value_counts().head(15).index.values

In [None]:
STR_Disp_Names

In [None]:
tmp = section_56[section_56["parcellation_structure"].isin(STR_Disp_Names)]
tmp = tmp.drop_duplicates(subset="parcellation_structure")
tmp[["parcellation_structure", "ISH_STR", "ASD.Bias", "ASD.Bias.adj"]]

In [None]:
STRs = ['MOp', 'MOs', 'PIR', 'PL', 'ILA', 'AId', 'ORBl',
        'AON', 'ACAd', 'TT', ]

In [None]:
MakePlot(section_56, STRs, "Section 56")

##### [end] Validate Cortex

In [None]:
MERFISH[MERFISH["parcellation_structure"]=="VTA"]["brain_section_label"].value_counts()

In [None]:
brain_section = 'C57BL6J-638850.15'
pred = (MERFISH['brain_section_label'] == brain_section )
section = MERFISH[pred]
print(len(section))

In [None]:
section["parcellation_structure"].value_counts().head(30)

In [None]:
STR_Disp_Names = section["parcellation_structure"].value_counts().head(20).index.values

In [None]:
tmp = section[section["parcellation_structure"].isin(STR_Disp_Names)]
tmp = tmp.drop_duplicates(subset="parcellation_structure")
tmp[["parcellation_structure", "ISH_STR", "ASD.Bias", "ASD.Bias.adj"]]

In [None]:
tmp2 = section[section["ISH_STR"]=="Central lobule"]

In [None]:
tmp2.head(2)

In [None]:
tmp2[tmp2["ASD.Bias"]>0.1]["subclass"].value_counts()

In [None]:
STRs = tmp[tmp["ISH_STR"]!="Not in Connectome"]["parcellation_structure"].values

In [None]:
MakePlot(section, STRs, "Section 15")

### 3D plot

In [None]:
%matplotlib inline
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = Axes3D(fig)

ax.scatter(ABC_ALL["x_reconstructed"], 
           ABC_ALL["y_reconstructed"], 
           ABC_ALL["z_reconstructed"], 
           c=ABC_ALL["Bias.V2"], cmap='bwr', s=0.1)
"""
ax.scatter(CP_ALL["x_reconstructed"], 
           CP_ALL["y_reconstructed"], 
           CP_ALL["z_reconstructed"], 
           c=CP_ALL["Bias.V2"], cmap='bwr', s=0.1)
           """

# Set labels
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')
plt.show()

In [None]:
ABC_ALL["size"] = 0.1

In [None]:
ABC_ALL.head(5)

In [None]:
import plotly.express as px
fig = px.scatter_3d(ABC_ALL, x='x_reconstructed', y='y_reconstructed', z='z_reconstructed',
                    color = 'Bias.V2', color_continuous_scale="RdBu", opacity=1, size="size")
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
fig.show()

In [None]:
import plotly.graph_objects as go
import numpy as np

# Helix equation
t = np.linspace(0, 20, 100)
x, y, z = np.cos(t), np.sin(t), t

fig = go.Figure(data=[go.Scatter3d(
    x=ABC_ALL["x_reconstructed"],
    y=ABC_ALL["y_reconstructed"],
    z=ABC_ALL["z_reconstructed"],
    mode='markers',
    marker=dict(
        size=2,
        color=ABC_ALL["Bias.V2"].values[::-1],                # set color to an array/list of desired values
        colorscale='RdBu',   # choose a colorscale
        opacity=0.8
    )
)])

# tight layout
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
fig.show()

### Check Region/STR bias distribution

In [None]:
ABC_ALL = MERFISH[MERFISH["parcellation_structure"]=="ACB"]
print("ACB", ABC_ALL.shape)
CP_ALL = MERFISH[MERFISH["parcellation_structure"]=="CP"]
print("CP", CP_ALL.shape)

In [None]:
plt.figure(dpi=120)
sns.kdeplot(ABC_ALL["Bias.V2"], label="ACB.v2")
sns.kdeplot(ABC_ALL["Bias.V3"], label="ACB.V3")

sns.kdeplot(CP_ALL["Bias.V2"], label="CP.v2")
sns.kdeplot(CP_ALL["Bias.V3"], label="CP.V3")
plt.legend()

##### Regional bias by MERFISH

In [None]:
Regions = MERFISH["parcellation_division"].value_counts()

In [None]:
Regions.index.values

In [None]:
Select_Regions = ['Isocortex', 'STR', 'CB', 'HPF', 'MB', 'OLF', 'MY', 'P', 'TH',
       'HY', 'PAL', 'CTXsp']

In [None]:
MERFISH_SelectedReg = MERFISH[MERFISH["parcellation_division"].isin(Select_Regions)]

In [None]:
plt.figure(dpi=120, figsize=(12,6))
sns.violinplot(data=tmp, x="parcellation_division", y="Bias.V2")

In [None]:
plt.figure(dpi=120, figsize=(12,6))
sns.violinplot(data=tmp, x="parcellation_division", y="Bias.V3")

In [None]:
Region_DFs = []
for REG in Select_Regions:
    RegDF = MERFISH[MERFISH["parcellation_division"]==REG]
    Region_DFs.append(RegDF)

In [None]:
plt.figure(dpi=120)
for i, REG in enumerate(Select_Regions):
    sns.kdeplot(Region_DFs[i]["Bias.V2"], label=REG)
plt.legend()

In [None]:
plt.figure(dpi=120)
for i, REG in enumerate(Select_Regions):
    sns.kdeplot(Region_DFs[i]["Bias.V2"], label=REG)
plt.legend()

In [None]:
plt.figure(dpi=120)
for i, REG in enumerate(Select_Regions):
    sns.kdeplot(Region_DFs[i]["Bias.V3"], label=REG)
plt.legend()

##### Structure bias by MERFISH

In [None]:
MERFISH_SelectedReg["parcellation_structure"].value_counts()

In [None]:
Structures = MERFISH_SelectedReg["parcellation_structure"].value_counts().index.values

In [None]:
dat_v2 = []
dat_v3 = []
for STR in Structures:
    STRDF = MERFISH_SelectedReg[MERFISH_SelectedReg["parcellation_structure"]==STR]
    dat_v2.append(np.nanmean(STRDF["Bias.V2"]))
    dat_v3.append(np.nanmean(STRDF["Bias.V3"]))

In [None]:
STR_Bias_DF = pd.DataFrame(data={"STR": Structures, "ASD Bias V2": dat_v2, "ASD Bias V3": dat_v3})

In [None]:
STR_Bias_DF = STR_Bias_DF.sort_values("ASD Bias V2", ascending=False)
STR_Bias_DF = STR_Bias_DF.reset_index()
STR_Bias_DF["BiasRank.V2"] = STR_Bias_DF.index.values + 1

In [None]:
STR_Bias_DF = STR_Bias_DF.sort_values("ASD Bias V3", ascending=False)
STR_Bias_DF = STR_Bias_DF.reset_index()
STR_Bias_DF["BiasRank.V3"] = STR_Bias_DF.index.values + 1
STR_Bias_DF = STR_Bias_DF.drop("level_0", axis=1)
STR_Bias_DF = STR_Bias_DF.drop("index", axis=1)

In [None]:
STR_Bias_DF.head(10)

In [None]:
#### Add some annotations about Structures

In [None]:
ontology = pd.read_excel("../../data/Allen_Mouse_Brain_Cell_Atlas/SuppTables/41586_2023_6812_MOESM2_ESM.xlsx",
                        skiprows=1, index_col="abbreviation")

In [None]:
ontology

In [None]:
ontology[ontology["structure ID"]==477]

In [None]:
for i, row in STR_Bias_DF.iterrows():
    STR = row["STR"]
    #Region = 
    if STR in ontology.index.values:
        STR_Bias_DF.loc[i, "Full Name"] = ontology.loc[STR, "full structure name"]
        tree = ontology.loc[STR, "structure_id_path"]
        #REG_ID = int(tree.split("/")[-3])
        REG_ID = int(tree.split("/")[5])
        REG_Name = ontology[ontology["structure ID"]==REG_ID] #[0]["full structure name"]
        REG_Name = REG_Name["full structure name"].values[0]
        STR_Bias_DF.loc[i, "Region1"] = REG_Name 
        
        REG_ID = int(tree.split("/")[-3])
        #REG_ID = int(tree.split("/")[5])
        REG_Name = ontology[ontology["structure ID"]==REG_ID] #[0]["full structure name"]
        REG_Name = REG_Name["full structure name"].values[0]
        STR_Bias_DF.loc[i, "Region2"] = REG_Name 
        #print(REG_Name)
    else:
        STR_Bias_DF.loc[i, "Full Name"] = ""
        STR_Bias_DF.loc[i, "Region1"] = ""
        STR_Bias_DF.loc[i, "Region2"] = ""

In [None]:
STR_Bias_DF.head(50)

In [None]:
STR_Bias_DF.to_csv("dat/Test>ABC_REGION.bias.csv", index=False)

In [None]:
pearsonr(STR_Bias_DF["ASD Bias V2"], STR_Bias_DF["ASD Bias V3"])

In [None]:
plt.scatter(STR_Bias_DF["ASD Bias V2"], STR_Bias_DF["ASD Bias V3"])

In [None]:
CB = MERFISH_SelectedReg[MERFISH_SelectedReg["parcellation_division"]=="CB"]

In [None]:
CB = CB.sort_values("Bias.V3", ascending=False)

In [None]:
CB.head(10)

In [None]:
CB.to_csv("dat/MERFISH_CB.csv")

In [None]:
for REG in Select_Regions:
    RegDF = MERFISH[MERFISH["parcellation_division"]==REG]
    Region_DFs.append(RegDF)