In [None]:
import scanpy as sc
import scanpy.external as sce
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
import pandas as pd

In [None]:
#Colors
mouse = "#F2C911"
rat = "#65B0AC"
intrinsic = "#F55F00"
extrinsic = "#7D9AF4"
reinforcing = "#9B00F5"
opposing = "#F50901"
interaction = "#1E771A"

sns.set(font_scale=1.5)
sns.set_style("white")
d_ct_abrev = {"Chondrocyte":"chondrocyte", "Forebrain glutamatergic progenitors":"brain.glut.prog", "Forebrain GABAergic progenitors":"brain.GABA.prog", "Intermediate progenitors":"inter.prog", "Forebrain glutamatergic neurons":"brain.glut.neu", "Forebrain GABAergic neurons":"brain.GABA.neu", "Spinal GABAergic neurons":"spine.GABA.neu", "Spinal glutamatergic neurons":"spine.glut.neu", "Chondrocytes":"chondrocyte", "Mesenchyme 0":"mesen.0", "Mesenchyme 2":"mesen.2", "Mesenchyme cycling":"mesen.cyc", "Forebrain GABAergic neurons 0":"Toss"}

#Define function to convert file name to cell type
def file_to_celltype(x):
    x = x.split("/")[-1]
    x = x.replace("_GO_Molecular_Function_2023", "").replace("_GO_Biological_Process_2023", "").replace("Mesechyme", "Mesenchyme").replace("Brain_Or_2010_Div_New4_NewNorm_", "")
    x = x.replace("Mesenchymal_Or_2010_Div_New4_NewNorm_", "").replace("Chondrocyte_Or_2010_Div_New4_NewNorm_", "").replace(".csv", "").replace("_", " ").replace(" all", "")
    x = x.replace("Glutamatergic", "Forebrain glutamatergic").replace("GABAergic", "Forebrain GABAergic").replace("Spinal Forebrain", "Spinal").replace("Chondrocytes", "Chondrocyte")
    return x

plt.rcParams["font.family"] = "Arial"

In [None]:
#Make a function to plot the various neuron:progenitor ratios
#Takes as input only one row
#extra_title is usually the cell type
def bp(vp, col, extra_title):
    
    #Reformat and rename for seaborn
    r1 = ["Rat-like env.", "Mouse", float(vp["MR_m"].iloc[0])]
    r2 = ["Rat-like env.", "Rat", float(vp["MR_r"].iloc[0])]
    r3 = ["Mouse-like env.", "Mouse", float(vp["RM_m"].iloc[0])]
    r4 = ["Mouse-like env.", "Rat", float(vp["RM_r"].iloc[0])]
    r21 = ["Host", "Rat", float(vp["MR_r"].iloc[0])]
    r31 = ["Host", "Mouse", float(vp["RM_m"].iloc[0])]
    
    #Create data frame
    tp = pd.DataFrame([r1, r2, r3, r4, r31, r21])
    tp.columns = ["Chimera (Donor-Host)", "Cell species", col]
    print(tp)
    #fig, ax = plt.subplots(figsize = (6, 4.5))
    sns.set(font_scale=1.5)
    sns.set_style("white")
    
    #Make barplot
    sns.barplot(data = tp, y = col, x = "Chimera (Donor-Host)", hue = "Cell species", palette = {"Mouse":mouse, "Rat":rat}, alpha = 1, order = ["Host", "Rat-like env.", "Mouse-like env."])
    plt.ylabel(col)
    #plt.xlabel("Chimera (donor" + r'$\rightarrow$' + "host)")
    plt.xlabel("Extrinsic environment")
    plt.title("Neuron to progenitor ratio: " + extra_title)
    xmin, xmax, ymin, ymax = plt.axis()
    #plt.ylim(0, 1)
    plt.axvline(0.5, color = "black", linewidth = 2.5, alpha = 1, linestyle="dashed")
    #plt.legend(bbox_to_anchor=(1.375, 1.05))
    plt.legend([],[], frameon=False)
    plt.show()
    
    #Return the dataframe used to make the plot
    return vp



In [None]:
#Decompose into intrinsic, extrinsic, and interaction components as discussed in the paper
def decompose(vp, name):
    row = {}
    
    #Compute the log fold-changes
    row["DM/HM"] = np.log2(float(vp["MR_m"].iloc[0])/float(vp["RM_m"].iloc[0]))
    row["DR/HR"] = np.log2(float(vp["RM_r"].iloc[0])/float(vp["MR_r"].iloc[0]))
    row["DM/HR"] = np.log2(float(vp["MR_m"].iloc[0])/float(vp["MR_r"].iloc[0]))
    row["HM/DR"] = np.log2(float(vp["RM_m"].iloc[0])/float(vp["RM_r"].iloc[0]))
    
    #Compute interaction divergence
    N = row["DM/HM"] + row["DR/HR"]
    
    #Compute extrinsic divergence estimates
    E1 = -row["DM/HM"]
    E2 = row["DR/HR"]
    
    #Compute intrinsic divergence estimates
    I1 = row["DM/HR"]
    I2 = row["HM/DR"]
    
    #Need to flip sign to make it match the rest of the figures
    E = -(E1 + E2)
    I = -(I1 + I2)
    
    #Compute final estimates
    extr = E/2
    intr = I/2
    
    #See paper for where the factor of 2 comes from
    interaction = N/2
    
    #Compute divergence proportions
    extr_prop = abs(E)/(abs(N) + abs(E) + abs(I))
    intr_prop = abs(I)/(abs(N) + abs(E) + abs(I))
    interaction_prop = abs(N)/(abs(N) + abs(E) + abs(I))
    df = pd.DataFrame([extr, intr, interaction, extr_prop, intr_prop, interaction_prop])
    df.index = ["Extrinsic", "Intrinsic", "Interaction", "Proportion extrinsic", "Proportion intrinsic", "Proportion interaction"]
    df.columns = [name]
    return df


In [None]:
#Plot for glutamatergic
v = pd.read_csv("Final/Cell_counts_neuronal.tsv", sep = "\t")

#Restrict to only progenitors and neurons
v = v[["Sample", "Condition", "0", "4", "5"]]
out = []

#Convert to proportions
for index, row in v.iterrows():
    s = np.sum(row[["0", "4", "5"]])
    out.append([row["0"]/s, row["4"]/s, row["5"]/s])
    
#Create proportions dataframe
df = pd.DataFrame(out)
df.columns = ["Prop 0", "Prop 4", "Prop 5"]
v = v.join(df)

#Comput neuron:progenitor ratio
v["Prop 0 + 5/Prop 4"] = (v["Prop 0"] + v["Prop 5"])/v["Prop 4"]
vp_glut = v[v["Sample"].isin(["RM1_r", "RM1_m", "MR1_r", "MR1_m"])]
vp_glut = vp_glut[["Condition", "Prop 0 + 5/Prop 4"]]
vp_glut.columns = ["Condition", "brain.glut.neu/brain.glut.prog"]
vp_glut = vp_glut.set_index("Condition").T

#Decompose into different components
df_glut = decompose(vp_glut, "forebrain glut.")


In [None]:
#Plot
bp(vp_glut, "brain.glut.neu/brain.glut.prog", "forebrain glut.")

In [None]:
#Plot for spinal cord combined, nearly identical to above
v = pd.read_csv("Final/Cell_counts_neuronal.tsv", sep = "\t")
v = v[["Sample", "Condition", "6", "7", "12"]]
out = []
for index, row in v.iterrows():
    s = np.sum(row[["6", "7", "12"]])
    out.append([row["6"]/s, row["7"]/s, row["12"]/s])
df = pd.DataFrame(out)
df.columns = ["Prop 6", "Prop 7", "Prop 12"]
v = v.join(df)

#Combine inhibitory and excitatory neurons since we could not distinguish spinal glutamatergic and GABAergic progenitors
v["Prop 6 + 7/Prop 12"] = (v["Prop 6"] + v["Prop 7"])/v["Prop 12"]
vp_spine = v[v["Sample"].isin(["RM1_r", "RM1_m", "MR1_r", "MR1_m"])]
vp_spine = vp_spine[["Condition", "Prop 6 + 7/Prop 12"]]
vp_spine.columns = ["Condition", "spine.neu/spine.prog"]
vp_spine = vp_spine.set_index("Condition").T
df_spine = decompose(vp_spine, "spinal")
bp(vp_spine, "spine.neu/spine.prog", "spinal")

In [None]:
#Join glutamatergic and spinal
df_plot = df_glut.join(df_spine)
df_plot

In [None]:
#plot for GABAergic
v = pd.read_csv("Final/Cell_counts_neuronal.tsv", sep = "\t")
v = v[["Sample", "Condition", "2", "1", "3", "9"]]
out = []
for index, row in v.iterrows():
    s = np.sum(row[["2", "1", "3", "9"]])
    out.append([row["2"]/s, row["1"]/s, row["3"]/s, row["9"]/s])
df = pd.DataFrame(out)
df.columns = ["Prop 2", "Prop 1", "Prop 3", "Prop 9"]
v = v.join(df)

#Add up the values for the different GABAergic neuron types
v["Prop 1 + 3 + 9/Prop 2"] = (v["Prop 1"] + v["Prop 3"]  + v["Prop 9"])/v["Prop 2"]
vp_gaba = v[v["Sample"].isin(["RM1_r", "RM1_m", "MR1_r", "MR1_m"])]
vp_gaba = v[v["Sample"].isin(["RM1_r", "RM1_m", "MR1_r", "MR1_m"])]
vp_gaba = vp_gaba[["Condition", "Prop 1 + 3 + 9/Prop 2"]]
vp_gaba.columns = ["Condition", "brain.GABA.neu/brain.GABA.prog"]
vp_gaba = vp_gaba.set_index("Condition").T
df_gaba = decompose(vp_gaba, "forebrain GABA.")

#Plot
bp(vp_gaba, "brain.GABA.neu/brain.GABA.prog", "forebrain GABA.")

In [None]:
#Add GABAergic information
df_plot = df_plot.join(df_gaba)


In [None]:
#Transpose
df_plot = df_plot.T

In [None]:
out = []

#Reformat for seaborn
for index, row in df_plot.iterrows():
    out.append([index, row["Proportion intrinsic"], "Proportion intrinsic"])
    out.append([index, row["Proportion extrinsic"], "Proportion extrinsic"])
    out.append([index, row["Proportion interaction"], "Proportion interaction"])

#Make dataframe with sensible column names    
df = pd.DataFrame(out)
df.columns = ["Trajectory", "Proportion divergence", "Divergence type"]
df = df.sort_values("Proportion divergence", ascending = False)

#Create barplot of divergence types across lineages
sns.barplot(data = df, x = "Trajectory", y = "Proportion divergence", hue = "Divergence type", palette = {"Proportion intrinsic":intrinsic, "Proportion extrinsic":extrinsic, "Proportion interaction":interaction}, hue_order = ["Proportion intrinsic", "Proportion extrinsic", "Proportion interaction"])
plt.legend(bbox_to_anchor=(1, 1))
plt.title("Decomposition of divergence in neuron-progenitor ratio")