In [None]:
import numpy as np
import math
import matplotlib.pyplot as plt
import csv
import pandas as pd
from scipy import stats
import re
from scipy.stats.stats import pearsonr
#import rpy2.robjects as robjects
import random
from statsmodels.stats.multitest import fdrcorrection
import copy
from collections import Counter
import seaborn as sns
from matplotlib.patches import PathPatch
from matplotlib.path import Path
import os

In [None]:
#Colors
mouse = "#F2C911"
rat = "#65B0AC"
intrinsic = "#F55F00"
extrinsic = "#7D9AF4"
reinforcing = "#9B00F5"
opposing = "#F50901"
interaction = "#1E771A"

sns.set(font_scale=1.5)
sns.set_style("white")
d_ct_abrev = {"Forebrain glutamatergic progenitors":"brain.glut.prog", "Forebrain GABAergic progenitors":"brain.GABA.prog", "Intermediate progenitors":"inter.prog", "Forebrain glutamatergic neurons":"brain.glut.neu", "Forebrain GABAergic neurons":"brain.GABA.neu", "Spinal GABAergic neurons":"spine.GABA.neu", "Spinal glutamatergic neurons":"spine.glut.neu", "Chondrocytes":"chondrocyte", "Mesenchyme 0":"mesen.0", "Mesenchyme 2":"mesen.2", "Mesenchyme cycling":"mesen.cyc", "Forebrain GABAergic neurons 0":"Toss"}

In [None]:
#Make a function to plot the various expression levels
def bp(gene, file):
    vv = pd.read_csv(file).set_index("Unnamed: 0")
    vp = vv.loc[gene]
    r1 = ["Rat-like env.", "Mouse", vp["MR1_mi Norm CPM"]]
    r2 = ["Rat-like env.", "Rat", vp["MR1_ri Norm CPM"]]
    r3 = ["Mouse-like env.", "Mouse", vp["RM1_mi Norm CPM"]]
    r4 = ["Mouse-like env.", "Rat", vp["RM1_ri Norm CPM"]]
    r5 = ["Mouse-like env.", "Mouse", vp["RM2_mi Norm CPM"]]
    r6 = ["Mouse-like env.", "Rat", vp["RM2_ri Norm CPM"]]
    r7 = ["Wildtype", "Mouse", vp["WT_mi Norm CPM"]]
    r8 = ["Wildype", "Rat", vp["WT_ri Norm CPM"]]
    if "Brain" in file:
        r21 = ["Host", "Rat", vp["MR1_ri Norm CPM"]]
        r31 = ["Host", "Mouse", vp["RM1_mi Norm CPM"]]
        tp = pd.DataFrame([r1, r2, r3, r4, r31, r21])
    else:
        r21 = ["Host", "Rat", vp["MR1_ri Norm CPM"]]
        r31 = ["Host", "Mouse", vp["RM2_mi Norm CPM"]]
        tp = pd.DataFrame([r31, r21, r1, r2, r6, r5])
    tp.columns = ["Chimera (Donor-Host)", "Cell species", "Norm CPM"]
    #fig, ax = plt.subplots(figsize = (6, 4.5))
    sns.set(font_scale=1.5)
    sns.set_style("white")
    sns.barplot(data = tp, y = "Norm CPM", x = "Chimera (Donor-Host)", hue = "Cell species", palette = {"Mouse":mouse, "Rat":rat}, alpha = 1, order = ["Host", "Rat-like env.", "Mouse-like env."])
    plt.ylabel("Pseudobulked normalized counts")
    #plt.xlabel("Chimera (donor" + r'$\rightarrow$' + "host)")
    plt.xlabel("Extrinsic environment")
    plt.title("Expression of $\it{" + gene + "}$ in brain.GABA.prog")
    xmin, xmax, ymin, ymax = plt.axis()
    #plt.ylim(0, 1)
    plt.axvline(0.5, color = "black", linewidth = 2.5, alpha = 1, linestyle="dashed")
    #plt.legend(bbox_to_anchor=(1.375, 1.05))
    plt.legend([],[], frameon=False)
    plt.show()
    return vp

file = "Final/Div/Brain_Or_2010_Div_New4_NewNorm_GABAergic_neurons_all.csv"
#genes = "Sf3b4;Cwc25;Ncbp1;Srsf1;Snrpb2;Smn1;Prpf8;Prmt5;Srsf7;Prpf40a"
#for gene in genes.split(";"):
gene = "Jun"
vp = bp(gene, file)

In [None]:
def make_to_plot(dm_hr_if, hm_dr_if, hm_hr_if, dm_dr_if, dm_hr_rna, hm_dr_rna, hm_hr_rna, dm_dr_rna):
    df = pd.DataFrame([[dm_hr_if, "DM/HR", "IF"], [hm_dr_if, "HM/DR", "IF"], [hm_hr_if, "HM/HR", "IF"], [dm_dr_if, "DM/DR", "IF"], [dm_hr_rna, "DM/HR", "RNA"], [hm_dr_rna, "HM/DR", "RNA"], [hm_hr_rna, "HM/HR", "RNA"], [dm_dr_rna, "DM/DR", "RNA"]])
    df.columns = ["Log2 fold-change", "Comparison", "Measurement"]
    return df

In [None]:
def get_props(df):
    new = df[df["Measurement"].isin(["IF"])][["Log2 fold-change", "Comparison"]]
    row = {}
    for index, r in new.iterrows():
        row[r["Comparison"]] = r["Log2 fold-change"]
    row["DM/HM"] = row["DM/HR"] - row["HM/HR"]
    row["DR/HR"] = row["HM/HR"] - row["HM/DR"]
    N = row["DM/HM"] + row["DR/HR"]
    E1 = -row["DM/HM"]
    E2 = row["DR/HR"]
    I1 = row["DM/HR"]
    I2 = row["HM/DR"]
    E = -(E1 + E2)
    I = -(I1 + I2)
    extr = E/2
    intr = I/2
    interaction = N/2
    extr_prop = abs(E)/(abs(N) + abs(E) + abs(I))
    intr_prop = abs(I)/(abs(N) + abs(E) + abs(I))
    interaction_prop = abs(N)/(abs(N) + abs(E) + abs(I))
    df = pd.DataFrame([[extr, intr, interaction], [extr_prop, intr_prop, interaction_prop]])
    df.columns = ["Extrinsic", "Intrinsic", "Interaction"]
    df.index = ["Estimate", "Proportion"]
    return df.T

In [None]:
file = "Final/Div/Brain_Or_2010_Div_New4_NewNorm_GABAergic_neurons_all.csv"
gene = "Jun"
vp_neu = bp(gene, file)
file = "Final/Div/Brain_Or_2010_Div_New4_NewNorm_GABAergic_progenitors.csv"
vp_prog = bp(gene, file)

#Batch3 c-Jun
df_3_jun = make_to_plot(0.2024745940023056, -0.2258996892003502, -2.731347173458083, 2.7079220782600384, vp_prog["DM/HR"], vp_prog["HM/DR"], vp_prog["HM/HR"], vp_prog["DM/DR"])
#Batch1 c-Jun
df_1_jun = make_to_plot(0.035415333181662234, -0.6223054527740928, -0.7804823329354207, 0.19359221334299015, vp_prog["DM/HR"], vp_prog["HM/DR"], vp_prog["HM/HR"], vp_prog["DM/DR"])


df_1_jun_if = get_props(df_1_jun)
df_3_jun_if = get_props(df_3_jun)
df_1_jun_if['Signed proportion'] = np.sign(df_1_jun_if["Estimate"])*df_1_jun_if["Proportion"]
df_3_jun_if['Signed proportion'] = np.sign(df_3_jun_if["Estimate"])*df_3_jun_if["Proportion"]
out = []
for index, row in df_1_jun_if.iterrows():
    out.append([index, row["Signed proportion"], "Chimera 1", "Protein"])
for index, row in df_3_jun_if.iterrows():
    out.append([index, row["Signed proportion"], "Chimera 3", "Protein"])
    if "nteraction" not in index:
        out.append([index, np.sign(vp_neu[index])*vp_neu["Proportion " + index.lower()], "brain.GABA.neu", "RNA"])
        out.append([index, np.sign(vp_prog[index])*vp_prog["Proportion " + index.lower()], "brain.GABA.prog", "RNA"])
    else:
        out.append([index, abs(np.sign(vp_neu[index])*vp_neu["Proportion " + index.lower()]), "brain.GABA.neu", "RNA"])
        out.append([index, abs(np.sign(vp_prog[index])*vp_prog["Proportion " + index.lower()]), "brain.GABA.prog", "RNA"])
df_jun_plot = pd.DataFrame(out)
df_jun_plot.columns = ["Type", "Signed proportion", "Chimera", "Measurement"]
df_jun_plot

In [None]:
fig, ax1 = plt.subplots()
ax1.set_ylabel('Signed proportion')
ax2 = ax1.twinx()
ax2.set_ylabel('Proportion interaction', rotation=270, labelpad = 20) 
#ax2.set_ylabel('Proportion interaction')
palette = {"RNA":'#0000ff', "Protein":'#ffa500'}
sns.set(font_scale = 1.3)
sns.set_style("white")
#sns.swarmplot(data = df_jun_plot, x = "Type", y = "Signed proportion", hue = "Measurement", dodge = True, palette = palette, marker = "s")
t_ax = sns.barplot(data = df_jun_plot, x = "Type", y = "Signed proportion", hue = "Measurement", dodge = True, errorbar=None, linewidth=2.5, edgecolor=".5", facecolor='#F2C91140', gap = 0.1, palette = palette)
sns.scatterplot(x = [0.20, 1.20, 2.20], y = [0.673144, 0.287709, abs(0.039146)], color = "blue", marker = "s")
sns.scatterplot(x = [0.20, 1.20, 2.20], y = [0.931008, 0.046624, abs(-0.022368)], color = "blue", marker = "^")
sns.scatterplot(x = [-0.2, 0.8, 1.80], y = [0.439032, 0.264522, abs(0.296446)], color = "orange", marker = "o")
sns.scatterplot(x = [-0.2, 0.8, 1.80], y = [0.923308, 0.003976, abs(0.072716)], color = "orange", marker = "o")
plt.axvline(1.5, color = "black", linewidth = 2.5, alpha = 1, linestyle="dashed")
c = 0
for patch in t_ax.patches:
    if c <= 2:
        patch.set_edgecolor("#ffa500")
        patch.set_facecolor("#FFA5001A")
    elif c > 2:
        patch.set_edgecolor("#0000ff")
        patch.set_facecolor("#0000FF1A")
    c += 1
plt.xticks(rotation=0)
plt.legend([],[], frameon=False)
plt.title("Comparison of RNA and protein expression for $\it{Jun}$")

In [None]:
file = "Final/Div/Brain_Or_2010_Div_New4_NewNorm_GABAergic_neurons_all.csv"
gene = "Hspa5"
vp_neu = bp(gene, file)
file = "Final/Div/Brain_Or_2010_Div_New4_NewNorm_GABAergic_progenitors.csv"
vp_prog = bp(gene, file)

#Batch1 Hspa5
df_1_hspa5 = make_to_plot(0.3102673393795623, -0.15697471849134104, -0.7212578153536491, 0.8745504362418703, vp["DM/HR"], vp["HM/DR"], vp["HM/HR"], vp["DM/DR"])
#Batch3 Hspa5
df_3_hspa5 = make_to_plot(0.16244444771488276, -0.22986796400355303, -0.8111566920514832, 0.743733175762813, vp["DM/HR"], vp["HM/DR"], vp["HM/HR"], vp["DM/DR"])


sns.barplot(data=df_1_hspa5, x="Comparison", y="Log2 fold-change", hue = "Measurement")

df_1_hspa5_if = get_props(df_1_hspa5)
df_3_hspa5_if = get_props(df_3_hspa5)
df_1_hspa5_if['Signed proportion'] = np.sign(df_1_hspa5_if["Estimate"])*df_1_hspa5_if["Proportion"]
df_3_hspa5_if['Signed proportion'] = np.sign(df_3_hspa5_if["Estimate"])*df_3_hspa5_if["Proportion"]
out = []
for index, row in df_1_hspa5_if.iterrows():
    out.append([index, row["Signed proportion"], "Chimera 1", "Protein"])
for index, row in df_3_hspa5_if.iterrows():
    
    if "nteraction" not in index:
        out.append([index, row["Signed proportion"], "Chimera 3", "Protein"])
        out.append([index, np.sign(vp_neu[index])*vp_neu["Proportion " + index.lower()], "brain.GABA.neu", "RNA"])
        out.append([index, np.sign(vp_prog[index])*vp_prog["Proportion " + index.lower()], "brain.GABA.prog", "RNA"])
    else:
        out.append([index, abs(row["Signed proportion"]), "Chimera 3", "Protein"])
        out.append([index, abs(np.sign(vp_neu[index])*vp_neu["Proportion " + index.lower()]), "brain.GABA.neu", "RNA"])
        out.append([index, abs(np.sign(vp_prog[index])*vp_prog["Proportion " + index.lower()]), "brain.GABA.prog", "RNA"])
df_hspa5_plot = pd.DataFrame(out)
df_hspa5_plot.columns = ["Type", "Signed proportion", "Chimera", "Measurement"]
df_hspa5_plot

In [None]:
fig, ax1 = plt.subplots()
ax1.set_ylabel('Signed proportion')
ax2 = ax1.twinx()
ax2.set_ylabel('Proportion interaction', rotation=270, labelpad = 20)
ax1.set_yticks([-0.3, 0, 0.3, 0.6, 0.9], [-0.3, 0, 0.3, 0.6, 0.9])
ax2.set_yticks([0, 0.2, 0.4, 0.6, 0.8, 1], [0, 0.2, 0.4, 0.6, 0.8, 1])
plt.ylim(-0.3, 1)
#ax2.set_ylabel('Proportion interaction')
palette = {"RNA":'#0000ff', "Protein":'#ffa500'}
sns.set(font_scale = 1.3)
sns.set_style("white")#sns.swarmplot(data = df_jun_plot, x = "Type", y = "Signed proportion", hue = "Measurement", dodge = True, palette = palette, marker = "s")
t_ax = sns.barplot(data = df_hspa5_plot, x = "Type", y = "Signed proportion", hue = "Measurement", dodge = True, errorbar=None, linewidth=2.5, edgecolor=".5", facecolor=(0.5, 0, 0, 0), gap = 0.1, palette = palette)
sns.scatterplot(x = [0.20, 1.20, 2.20], y = [0.598672, -0.219973, abs(-0.181355)], color = "#0000ff", marker = "s")
sns.scatterplot(x = [0.20, 1.20, 2.20], y = [0.528187, 0.178108, 0.293705], color = "#0000ff", marker = "^")
sns.scatterplot(x = [-0.2, 0.8, 1.80], y = [0.720019, -0.069165, 0.210817], color = "#ffa500", marker = "o")
sns.scatterplot(x = [-0.2, 0.8, 1.80], y = [0.771801, 0.033467, 0.194732], color = "#ffa500", marker = "o")
c = 0
plt.axvline(1.5, color = "black", linewidth = 2.5, alpha = 1, linestyle="dashed")
c = 0
for patch in t_ax.patches:
    if c <= 2:
        patch.set_edgecolor("#ffa500")
        patch.set_facecolor("#FFA5001A")
    elif c > 2:
        patch.set_edgecolor("#0000ff")
        patch.set_facecolor("#0000FF1A")
    c += 1
plt.xticks(rotation=0)
plt.legend([],[], frameon=False)
plt.title("Comparison of RNA and protein expression for $\it{Hspa5}$")

In [None]:
vp_prog

In [None]:
df_jun_plot

In [None]:
palette = {"RNA":"blue", "Protein":"orange"}
#sns.swarmplot(data = df_jun_plot, x = "Type", y = "Signed proportion", hue = "Measurement", dodge = True, palette = palette, marker = "s")
t_ax = sns.barplot(data = df_jun_plot, x = "Type", y = "Signed proportion", hue = "Measurement", dodge = True, errorbar=None, linewidth=2.5, edgecolor=".5", facecolor=(0, 0, 0, 0), gap = 0.1, palette = palette)
sns.scatterplot(x = [0.20, 1.20, 2.20], y = [0.673144, 0.287709, 0.039146], color = "blue", marker = "s")
sns.scatterplot(x = [0.20, 1.20, 2.20], y = [0.931008, 0.046624, -0.022368], color = "blue", marker = "^")
sns.scatterplot(x = [-0.2, 0.8, 1.80], y = [0.439032, 0.264522, 0.296446], color = "orange", marker = "o")
sns.scatterplot(x = [-0.2, 0.8, 1.80], y = [0.923308, 0.003976, 0.072716], color = "orange", marker = "o")
c = 0
for patch in t_ax.patches:
    if not c:
        patch.set_edgecolor("orange")
    elif c == 1:
        patch.set_edgecolor("orange")
    elif c == 2:
        patch.set_edgecolor("orange")
    elif c > 2:
        patch.set_edgecolor("blue")
    c += 1
plt.xticks(rotation=0)
#plt.legend([],[], frameon=False)