In [None]:
import numpy as np
import math
import matplotlib.pyplot as plt
import csv
import pandas as pd
from scipy import stats
import re
from scipy.stats import pearsonr,spearmanr,fisher_exact,binom_test
#import rpy2.robjects as robjects
import random
from statsmodels.stats.multitest import fdrcorrection
import os
import seaborn as sns
#import gseapy as gs
from scipy.stats import norm
import gseapy as gs
from collections import Counter
import scanpy as sc

d_ct_abrev = {"Chondrocyte":"chondrocyte", "Forebrain glutamatergic progenitors":"brain.glut.prog", "Forebrain GABAergic progenitors":"brain.GABA.prog", "Intermediate progenitors":"inter.prog", "Forebrain glutamatergic neurons":"brain.glut.neu", "Forebrain GABAergic neurons":"brain.GABA.neu", "Spinal GABAergic neurons":"spine.GABA.neu", "Spinal glutamatergic neurons":"spine.glut.neu", "Chondrocytes":"chondrocyte", "Mesenchyme 0":"mesen.0", "Mesenchyme 2":"mesen.2", "Mesenchyme cycling":"mesen.cyc", "Forebrain GABAergic neurons 0":"Toss"}

#Function to convert file to cell type
def file_to_celltype(x):
    x = x.split("/")[-1]
    x = x.replace("_GO_Molecular_Function_2023", "").replace("_GO_Biological_Process_2023", "").replace("Mesechyme", "Mesenchyme").replace("Brain_Or_2010_Div_New4_NewNorm_", "")
    x = x.replace("Mesenchymal_Or_2010_Div_New4_NewNorm_", "").replace("Chondrocyte_Or_2010_Div_New4_NewNorm_", "").replace(".csv", "").replace("_", " ").replace(" all", "")
    x = x.replace("Glutamatergic", "Forebrain glutamatergic").replace("GABAergic", "Forebrain GABAergic").replace("Spinal Forebrain", "Spinal").replace("Chondrocytes", "Chondrocyte")
    return x

plt.rcParams["font.family"] = "Arial"

In [None]:
from gseapy import Biomart
bm = Biomart()
#Convert mouse gene names to human gene names with this
m2h = bm.query(dataset='mmusculus_gene_ensembl',
               attributes=['ensembl_gene_id','external_gene_name',
                           'hsapiens_homolog_ensembl_gene',
                           'hsapiens_homolog_associated_gene_name'])

h2m = bm.query(dataset='hsapiens_gene_ensembl',
               attributes=['ensembl_gene_id','external_gene_name',
                           'mmusculus_homolog_ensembl_gene',
                           'mmusculus_homolog_associated_gene_name'])
m2h.to_csv("Mouse_To_Human_Gene_Conversions.csv", index = False)

In [None]:
m2h = pd.read_csv("Mouse_To_Human_Gene_Conversions.csv").dropna()
d = {}
for index, row in m2h.iterrows():
    d[row["external_gene_name"]] = row["hsapiens_homolog_associated_gene_name"]

In [None]:
#Compute number of CREs per gene
v = pd.read_csv("Final/Human_Promoters_Ortho_Sorted_hg38_cCREs.sort.bed", sep = "\t", header = None)
v = v[~v[5].isin(["CTCF-only,CTCF-bound"])]

#Only count within 100 kb of the TSS
v = v[v[10] < 100000]
cres_per_gene = Counter(v[9])

o = []
for key in cres_per_gene.keys():
    o.append([key, cres_per_gene[key]])
cpg = pd.DataFrame(o)
cpg.sort_values(1)
cpg.columns = ["Gene", "Number of CREs"]
cpg = cpg.set_index("Gene")

In [None]:
#Get the widest possible interval for genes with multiple promoters
v = pd.read_csv("Final/Human_Promoters_Ortho_Sorted_hg38.sort.bed", sep = "\t", header = None)
o = []
for key in list(set(list(v[3]))):
    v2 = v[v[3].isin([key])]
    for index, row in v2.iterrows():
        pass
    o.append([row[0], min(v2[1]), max(v2[2]), key])
rns1 = pd.DataFrame(o)

In [None]:
#Write out if desired
rns1.to_csv("Final/Human_Promoters_Ortho_Sorted_hg38_single.bed", sep = "\t", header = None, index = None)

In [None]:
#Sorted with sort -k1,1 -k2,2n
v = pd.read_csv("Final/Human_Promoters_Ortho_Sorted_hg38_single.sort.bed", sep = "\t", header = None)
o = []
for chrom in ["chr" + str(x) for x in range(1, 23)] + ["chrX", "chrY"]:
    
    #Restrict to chrom
    v_chrom = v[v[0].isin([chrom])]
    genes = list(v_chrom[3])
    for i in range(len(genes)):
        if i == 0:
            pass
        elif i == len(genes) - 1:
            pass
        else:
            #Get the distance to the nearest TSS on either side
            cur_gene = v_chrom[v_chrom[3].isin([genes[i]])]
            prev_val = int(v_chrom[v_chrom[3].isin([genes[i-1]])][2])
            next_val = int(v_chrom[v_chrom[3].isin([genes[i+1]])][1])
            o.append([genes[i], int(cur_gene[1])-prev_val + next_val - int(cur_gene[2])])
rns = pd.DataFrame(o)


In [None]:
#Compute Tau, a measure of tissue-specificity for our data
def tau(x):
    max_x = max(x)
    n = len(x)
    return np.sum((1-x/max_x)/(n-1))

ind = 1
df = 0

#Go through pseudobulked unfiltered data
for file in os.listdir("Final/Unfiltered_Pseudobulked/"):
    v = pd.read_csv("Final/Unfiltered_Pseudobulked/" + file).set_index("Unnamed: 0")
    ind2 = 0
    
    #Compute mean normalized CPM
    if "Brain" in file and "neurons_0" not in file:
        v["Mean Norm CPM"] = (v["MR1_mi Norm CPM"] + v["MR1_ri Norm CPM"] + v["RM1_mi Norm CPM"] + v["RM1_ri Norm CPM"])/4
        v = v[["Mean Norm CPM"]]
        v.columns = ["Mean norm CPM " + file]
        ind2 = 1
    elif ("Chondro" in file or "Mesenchyme" in file) and "Mesechyme_2" not in file:
        v["Mean Norm CPM"] = (v["MR1_mi Norm CPM"] + v["MR1_ri Norm CPM"] + v["RM2_mi Norm CPM"] + v["RM2_ri Norm CPM"])/4
        v = v[["Mean Norm CPM"]]
        v.columns = ["Mean norm CPM " + file]
        ind2 = 1

    if ind and ind2:
        ind = 0
        df = v
    elif ind2:
        df = df.join(v)

#Restrict to only genes that are appreciabilty expressed in at least on cell type
df["Max Exp"] = np.max(df, axis = 1)
df = df[df["Max Exp"] > 0.2]

#Compute tau
df["Tau"] = df.apply(tau, axis = 1)
TAU = df[["Tau"]]

out = []
for index, row in TAU.iterrows():
    try:
        out.append([d[index]] + list(row))
    except:
        pass
TAU = pd.DataFrame(out).set_index(0)

In [None]:
rns.columns = ["Gene", "Distance to nearest genes"]
rns = rns.set_index("Gene")

In [None]:
#Working with the Kaessmann files
#Convert rat gene names to mouse and restrict to one-to-one orthos
v_rat = pd.read_csv("Kaessmann_Bulk/Rat_rpkm.txt", sep = " ").set_index("Names")
orthos = pd.read_csv("mouse_rat_orthologs.txt", sep = "\t").dropna()
orthos = orthos.set_index("Gene stable ID")
orthos = orthos[orthos["Mouse homology type"] == "ortholog_one2one"]
v_rat = v_rat.join(orthos).dropna()
new_ind = []
for i in list(v_rat.index):
    new_ind.append(orthos.loc[i]["Mouse gene name"])
v_rat.index = new_ind

In [None]:
#Restrict to one-to-one orthos
v_mouse = pd.read_csv("Kaessmann_Bulk/Mouse_rpkm.txt", sep = " ").set_index("Names")
orthos = pd.read_csv("mouse_rat_orthologs.txt", sep = "\t").dropna()
orthos = orthos.set_index("Mouse gene stable ID")
orthos = orthos[orthos["Mouse homology type"] == "ortholog_one2one"]
v_mouse = v_mouse.join(orthos).dropna()
new_ind = []
for i in list(v_mouse.index):
    new_ind.append(orthos.loc[i]["Mouse gene name"])
v_mouse.index = new_ind

In [None]:
#Get only genes measured in both
keep = np.intersect1d(v_mouse.index, v_rat.index)
v_rat = v_rat.loc[keep]
v_mouse = v_mouse.loc[keep]

In [None]:
#Pull out the relevant embryonic time points for mouse
mcol = np.array(v_mouse.columns).astype(str)
keep11 = np.flatnonzero(np.core.defchararray.find(mcol, "Brain.e11.5") != -1)
keep13 = np.flatnonzero(np.core.defchararray.find(mcol, "Brain.e13.5") != -1)
keep15 = np.flatnonzero(np.core.defchararray.find(mcol, "Brain.e15.5") != -1)
out = []
for index, row in v_mouse.iterrows():
    data_11 = row[keep11]
    data_13 = row[keep13]
    data_15 = row[keep15]
    labs = list(np.repeat(11.5, len(data_11))) + list(np.repeat(13.5, len(data_13))) + list(np.repeat(15.5, len(data_15)))
    data_all = list(data_11) + list(data_13) + list(data_15)
    #Compute correlation of expression with time
    p = pearsonr(data_all, labs)
    out.append([index, p[0], p[1]])
df_mouse = pd.DataFrame(out)

assert(list(v_mouse.index) == list(v_rat.index))

In [None]:
#Pull out the relevant embryonic time points for rat
rcol = np.array(v_rat.columns).astype(str)
keep11 = np.flatnonzero(np.core.defchararray.find(rcol, "Brain.e13") != -1)
keep13 = np.flatnonzero(np.core.defchararray.find(rcol, "Brain.e15") != -1)
keep15 = np.flatnonzero(np.core.defchararray.find(rcol, "Brain.e17") != -1)
out = []
for index, row in v_rat.iterrows():
    data_11 = row[keep11]
    data_13 = row[keep13]
    data_15 = row[keep15]
    labs = list(np.repeat(11.5, len(data_11))) + list(np.repeat(13.5, len(data_13))) + list(np.repeat(15.5, len(data_15)))
    data_all = list(data_11) + list(data_13) + list(data_15)
    #Compute correlation of expression with time
    p = pearsonr(data_all, labs)
    out.append([index, p[0], p[1]])
df_rat = pd.DataFrame(out)

In [None]:
#Join rat and mouse
df_rat.columns = ["Gene", "Pearson r rat", "Pearson p-value rat"]
df_rat = df_rat.set_index("Gene")
df_mouse.columns = ["Gene", "Pearson r mouse", "Pearson p-value mouse"]
df_mouse = df_mouse.set_index("Gene")
df = df_rat.join(df_mouse)

In [None]:
#Multiply pearson rho values
df["Mult"] = df["Pearson r rat"]*df["Pearson r mouse"]
df = df.sort_values("Mult").dropna()
df

In [None]:
#If negative, then it is likely these genes have opposing expression trajectories in mouse and rat
df_oppo = df[df["Mult"] < -0.25]

#Subset to those that are up in rat and down in mouse and vice versa
upr_downm = df_oppo[(df_oppo["Pearson r rat"] > 0) & (df_oppo["Pearson r mouse"] < 0)]
upm_downr = df_oppo[(df_oppo["Pearson r rat"] < 0) & (df_oppo["Pearson r mouse"] > 0)]
d_DifDir = {"DifMouseRat":list(df_oppo.index)}

In [None]:
#If there is a major role for changes in expression trajectory in driving our results, we would expect:
#Enrichment of high interaction with opposite genes
#We only see a weak enrichment in Spinal glutamatergic neurons, suggesting this isn't a major player

for file in os.listdir("Final/Div"):
    if "New4_NewNorm" in file and "Brain" in file:
        v = pd.read_csv("Final/Div/" + file, sep = ",")
        v = v.sort_values("Proportion interaction", ascending = False)
        ranking = v[["Unnamed: 0", "Proportion interaction"]]
        try:
            gs.prerank(rnk=ranking, gene_sets=d_DifDir, threads=4, permutation_num=1000, outdir= 'C:/Users/astar/Chimerism/Final/GSEAPY_New4_Interaction_DifTimePattern/' + file.replace(".csv", ""), format='png', seed=6, min_size = 10, max_size = 30000)
        except:
            pass

In [None]:
#If there is a major role for changes in expression trajectory in driving our results, we would expect:
#Enrichment of UpRat_DownMouse at the bottom of the list
#Enrichment of UpMouse_DownRat at the top of the list
#We only see one of these things for three cell types
#Combined with the above I don't think we have strong evidence that opposite expression trajectories are a likely cause of interactions


for file in os.listdir("Final/Div"):
    if "New4_NewNorm" in file and "Brain" in file:
        v = pd.read_csv("Final/Div/" + file, sep = ",")
        v["Signed interaction"] = np.sign(v["Interaction"])*v["Proportion interaction"]
        v = v.sort_values("Signed interaction", ascending = False)
        ranking = v[["Unnamed: 0", "Signed interaction"]]
        try:
            gs.prerank(rnk=ranking, gene_sets=d_DifDir, threads=4, permutation_num=1000, outdir= 'C:/Users/astar/Chimerism/Final/GSEAPY_New4_Interaction_DifTimePatternSigned/' + file.replace(".csv", ""), format='png', seed=6, min_size = 10, max_size = 30000)
        except:
            pass

In [None]:
#Get genes going in the same directions
df_same = df[df["Mult"] > 0.7]

#Split into down in both and up in both
up_both = df_same[df_same["Pearson r rat"] > 0]
down_both = df_same[df_same["Pearson r rat"] < 0]
d_SameDir = {"SameDir":list(df_same.index)}


In [None]:
#We would expect that same direction would be enriched for opposing/reinforcing
#So we should see enrichment at the top of the list in this case
#We see weak enrichment for Intermediate progenitors and spinal GABAergic neurons weaker enrichments in other cell types

for file in os.listdir("Final/Div"):
    if "New4_NewNorm" in file and "Brain" in file:
        v = pd.read_csv("Final/Div/" + file, sep = ",")
        v["Multiply"] = np.abs(v["Proportion intrinsic"]*v["Proportion extrinsic"])
        v = v.sort_values("Multiply", ascending = False)
        ranking = v[["Unnamed: 0", "Multiply"]]
        try:
            gs.prerank(rnk=ranking, gene_sets=d_SameDir, threads=4, permutation_num=1000, outdir= 'C:/Users/astar/Chimerism/Final/GSEAPY_New4_ReinfOppo_SameDir_Abs/' + file.replace(".csv", ""), format='png', seed=6, min_size = 10, max_size = 30000)
        except:
            pass

In [None]:
#Renormalize all columns to sum to 1000000
v_mouseT = v_mouse.T
v_mouseT = v_mouseT.loc[np.setdiff1d(v_mouseT.index, ["Gene stable ID", "Mouse gene name", "Mouse orthology confidence [0 low, 1 high]", "Mouse homology type", "Gene name"])].copy()
out = []
for index, row in v_mouseT.iterrows():
    out.append(row/np.sum(row)*1000000)
v_mouse_new = pd.DataFrame(out).T
v_mouse_new.columns = v_mouseT.index
v_mouse_new.index = v_mouseT.columns


In [None]:
#Do the same renormalization for rat
v_ratT = v_rat.T
v_ratT = v_ratT.loc[np.setdiff1d(v_ratT.index, ["Gene stable ID", "rat gene name", "rat orthology confidence [0 low, 1 high]", "rat homology type", "Gene name", 'Mouse gene name', 'Mouse gene stable ID', 'Mouse homology type', 'Mouse orthology confidence [0 low, 1 high]'])].copy()
out = []
for index, row in v_ratT.iterrows():
    out.append(row/np.sum(row)*1000000)
v_rat_new = pd.DataFrame(out).T
v_rat_new.columns = v_ratT.index
v_rat_new.index = v_ratT.columns
v_rat_new

In [None]:
#Need to use the Kaessmann data to determine whether higher expression in mouse

#Again get the right columns
mcol = np.array(v_mouse_new.columns).astype(str)
keep11m = np.flatnonzero(np.core.defchararray.find(mcol, "Brain.e11.5") != -1)
keep13m = np.flatnonzero(np.core.defchararray.find(mcol, "Brain.e13.5") != -1)
keep15m = np.flatnonzero(np.core.defchararray.find(mcol, "Brain.e15.5") != -1)

rcol = np.array(v_rat_new.columns).astype(str)
keep11r = np.flatnonzero(np.core.defchararray.find(rcol, "Brain.e13") != -1)
keep13r = np.flatnonzero(np.core.defchararray.find(rcol, "Brain.e15") != -1)
keep15r = np.flatnonzero(np.core.defchararray.find(rcol, "Brain.e17") != -1)

high_rat = []
high_mouse = []
same_expr = []
for index in list(v_mouse_new.index):
    
    #Get the expression levels at each stage for each sample
    row_m = v_mouse_new.loc[index]
    data_11m = row_m[keep11m]
    data_13m = row_m[keep13m]
    data_15m = row_m[keep15m]
    
    row_r = v_rat_new.loc[index]
    data_11r = row_r[keep11r]
    data_13r = row_r[keep13r]
    data_15r = row_r[keep15r]
    
    #Compute the log fold-change between mouse and rat at equivalent stages
    lfc13 = np.log2((1+np.mean(data_13m))/(1+np.mean(data_11r)))
    lfc15 = np.log2((1+np.mean(data_15m))/(1+np.mean(data_13r)))
    
    #Compute the log fold-change at exactly the same timepoint
    lfcNotStageMatch = np.log2((1+np.mean(data_13m))/(1+np.mean(data_13r)))
    
    #Split into genes with higher expression in mouse and higher expression in rat
    if lfc13 > 0.5 and lfc15 > 0.5:
        high_mouse.append(index)
    elif lfc13 < -0.5 and lfc15 < -0.5:
        high_rat.append(index)
    #If we add the restriction of abs(lfcNotStageMatch) > 0.5 there are very few genes!
    #Indicates that our extrinsic genes are probably fine
    #Get genes with no difference at the same matched time point
    elif abs(lfc13) < 0.25 and abs(lfc15) < 0.25:
        same_expr.append(index)


In [None]:
df_same = df[df["Mult"] > 0.7]
up_both = df_same[df_same["Pearson r rat"] > 0]
down_both = df_same[df_same["Pearson r rat"] < 0]

#Get genes that should show opposing intrinsic/extrinsic based on their expression trajectories
should_be_oppo1 = np.intersect1d(high_mouse, up_both.index)
should_be_oppo2 = np.intersect1d(high_rat, down_both.index)

#Get genes that should show reinforcing intrinsic/extrinsic based on their expression trajectories
should_be_reinf1 = np.intersect1d(high_rat, up_both.index)
should_be_reinf2 = np.intersect1d(high_mouse, down_both.index)
d_OR = {"ShouldBeOppo":list(should_be_oppo1) + list(should_be_oppo2), "ShouldBeReinf":list(should_be_reinf1) + list(should_be_reinf2)}


In [None]:
#Signed test where opposite should be enriched for Mouse expr > Rat expr and increasing, Mouse expr < Rat expr and decreasing
#Reinforcing should be enriched for Mouse expr > Rat expr and decreasing, Mouse expr < Rat expr and increasing
#We don't see many enrichments, again implying that this is not a super major confounder for our results


for file in os.listdir("Final/Div"):
    if "New4_NewNorm" in file and "Brain" in file:
        v = pd.read_csv("Final/Div/" + file, sep = ",")
        v.index = v["Unnamed: 0"]
        v["Multiply"] = np.sign(v["Intrinsic"])*v["Proportion intrinsic"]*v["Proportion extrinsic"]*np.sign(v["Extrinsic"])
                
        v = v.sort_values("Multiply", ascending = False)
        ranking = v[["Unnamed: 0", "Multiply"]]
        try:
            gs.prerank(rnk=ranking, gene_sets=d_OR, threads=4, permutation_num=1000, outdir= 'C:/Users/astar/Chimerism/Final/GSEAPY_New4_ReinfOppo_SameDirSigned/' + file.replace(".csv", ""), format='png', seed=6, min_size = 10, max_size = 30000)
        except:
            pass

In [None]:
df_same = df[df["Mult"] > 0.7]
up_both = df_same[df_same["Pearson r rat"] > 0]
down_both = df_same[df_same["Pearson r rat"] < 0]
d_SameDir = {"SameDir":list(df_same.index)}

In [None]:
#We would expect that extrinsic would be enriched for same direction
#So we should see enrichment at the top of the list in this case
#We see weak enrichments in multiple cell types, indicating that this is not a major issue

for file in os.listdir("Final/Div"):
    if "New4_NewNorm" in file and "Brain" in file:
        v = pd.read_csv("Final/Div/" + file, sep = ",")
        v = v.sort_values("Proportion extrinsic", ascending = False)
        ranking = v[["Unnamed: 0", "Proportion extrinsic"]]
        try:
            gs.prerank(rnk=ranking, gene_sets=d_SameDir, threads=4, permutation_num=1000, outdir= 'C:/Users/astar/Chimerism/Final/GSEAPY_New4_Extrinsic_SameDir/' + file.replace(".csv", ""), format='png', seed=6, min_size = 10, max_size = 30000)
        except:
            pass

In [None]:
df_same = df[df["Mult"] > 0.7]
up_both = df_same[df_same["Pearson r rat"] > 0]
down_both = df_same[df_same["Pearson r rat"] < 0]
d_SameDir = {"UpBoth":np.intersect1d(list(up_both.index), same_expr), "DownBoth":np.intersect1d(list(down_both.index), same_expr)}


In [None]:
#We would expect an even stronger enrichment for those genes where mouse and rat have similar expression at E13.5 and E15.5
#There are close to zero genes that have similar expression at E13.5 and E15.5 but a larger difference between stage matched in bulk data
#This indicates off the bat that the extrinsic genes are probably truly extrinsic
#Even with more relaxed criteria there are very few genes that have the same expression at E13.5 and E15.5 in mouse/rat (at least in bulk)
#And are increasing/decreasing in expression over time
#This suggests that the kind of heterochronic shift needed to explain extrinsic divergence is extremely rare at the bulk level
#But that when it occurs it can indeed masquerade extrinsic divergence, at least in spinal neurons where we see enrichment


for file in os.listdir("Final/Div"):
    if "New4_NewNorm" in file and "Brain" in file:
        v = pd.read_csv("Final/Div/" + file, sep = ",")
        v["Signed proportion extrinsic"] = np.sign(v["Extrinsic"])*v["Proportion extrinsic"]
        v = v.sort_values("Signed proportion extrinsic", ascending = False)
        ranking = v[["Unnamed: 0", "Signed proportion extrinsic"]]
        try:
            gs.prerank(rnk=ranking, gene_sets=d_SameDir, threads=4, permutation_num=1000, outdir= 'C:/Users/astar/Chimerism/Final/GSEAPY_New4_Extrinsic_SameDirSameExpr/' + file.replace(".csv", ""), format='png', seed=6, min_size = 10, max_size = 30000)
        except:
            pass

In [None]:
#Want to compute Tau and variance of the log fold-changes across tissues for the Kaessman data
mcol = np.array(v_mouse_new.columns).astype(str)
keep13 = np.flatnonzero(np.core.defchararray.find(mcol, "e13.5") != -1)
keep13 = mcol[keep13]

#Get the right stage for mouse
v_mouse_new13 = v_mouse_new[keep13]
mcols2 = np.array(v_mouse_new13.columns).astype(str)

#Get the names of all tissues
tissues = list(set([x.split(".")[0] for x in list(keep13)]))
out = []
for tissue in tissues:
    #get the correct tissue and compute mean expression
    keepT = np.flatnonzero(np.core.defchararray.find(mcols2, tissue) != -1)
    keepT = mcols2[keepT]
    v_mouse_new13T = v_mouse_new13[keepT]
    out.append([tissue] + list(np.mean(v_mouse_new13T, axis = 1)))
    
#Create data frame
df_m13 = pd.DataFrame(out)
df_m13.columns = ["Tissue"] + list(v_mouse_new13.index)
df_m13 = df_m13.set_index("Tissue").T

In [None]:
#Want to compute Tau and variance of the log fold-changes across tissues for the Kaessman data, this time for rat
#Repeated from code above
rcol = np.array(v_rat_new.columns).astype(str)
keep13 = np.flatnonzero(np.core.defchararray.find(rcol, "e15") != -1)
keep13 = rcol[keep13]
v_rat_new13 = v_rat_new[keep13]
rcols2 = np.array(v_rat_new13.columns).astype(str)
tissues = list(set([x.split(".")[0] for x in list(keep13)]))
out = []
for tissue in tissues:
    keepT = np.flatnonzero(np.core.defchararray.find(rcols2, tissue) != -1)
    keepT = rcols2[keepT]
    v_rat_new13T = v_rat_new13[keepT]
    out.append([tissue] + list(np.mean(v_rat_new13T, axis = 1)))
df_r13 = pd.DataFrame(out)
df_r13.columns = ["Tissue"] + list(v_rat_new13.index)
df_r13 = df_r13.set_index("Tissue").T

In [None]:
#Require at least moderate expression in one tissue
df_r13["Max Expr"] = np.max(df_r13, axis = 1)
keep_rat = list(df_r13[df_r13["Max Expr"] > 5].index)

df_m13["Max Expr"] = np.max(df_m13, axis = 1)
keep_mouse = list(df_m13[df_m13["Max Expr"] > 5].index)

#Restrict to only genes in both
keep = list(set(keep_rat + keep_mouse))
df_r13 = df_r13.loc[keep].sort_index()
df_m13 = df_m13.loc[keep].sort_index()


In [None]:
#Copied from above
def tau(x):
    max_x = max(x)
    n = len(x)
    return np.sum((1-x/max_x)/(n-1))

In [None]:
#Remove cerebellum as it is highly similar to forebrain
df_r13 = df_r13.drop(["Cerebellum"], axis = 1)
df_m13 = df_m13.drop(["Cerebellum"], axis = 1)
df_lfc = np.log2((df_m13 + 1)/(df_r13 + 1))

In [None]:
#Compute tau
df_r13["Tau"] = df_r13.apply(tau, axis = 1)
df_m13["Tau"] = df_m13.apply(tau, axis = 1)
df_kaes_tau = pd.DataFrame()
df_kaes_tau["Tau rat"] = df_r13["Tau"]
df_kaes_tau["Tau mouse"] = df_m13["Tau"]

#Take average in mouse and rat
df_kaes_tau["Mean tau"] = np.mean(df_kaes_tau, axis = 1)

In [None]:
#Compute variance across tissues
df_lfc["Variance"] = np.var(df_lfc, axis = 1)
df_lfc_var = df_lfc[["Variance"]].copy()

In [None]:
out = []
for index, row in df_kaes_tau.iterrows():
    try:
        out.append([d[index]] + list(row))
    except:
        pass
df_kaes_tau = pd.DataFrame(out)
df_kaes_tau.columns = ["Gene", "Tau rat", "Tau mouse", "Mean tau"]
df_kaes_tau = df_kaes_tau.set_index("Gene")[["Mean tau"]]

In [None]:
out = []
for index, row in df_lfc_var.iterrows():
    try:
        out.append([d[index]] + list(row))
    except:
        pass
df_lfc_var = pd.DataFrame(out)
df_lfc_var.columns = ["Gene", "LFC variance"]
df_lfc_var = df_lfc_var.set_index("Gene")[["LFC variance"]]
df_lfc_var

In [None]:
#Make files to run multiple linear regression
try:
    os.mkdir("Final/RegInput")
except:
    print("Dir exists")
for file in os.listdir("Final/Div/"):
    #Read in the info for the cell type
    v = pd.read_csv("Final/Div/" + file, sep = ",").set_index("Unnamed: 0")
    
    #Read in constraint
    cons = pd.read_csv("Final/Pritchard_Constraint_Metric_GeneName.txt", sep = "\t").drop_duplicates("Gene name").set_index("Gene name")
    cons = cons[["post_mean"]]
    
    #Get the right sample for brain and connective tissue
    if "Brain" in file:
        v["Total expression"] = (v["MR1_ri Norm CPM"] + v["RM1_mi Norm CPM"])/2
    else:
        v["Total expression"] = (v["MR1_ri Norm CPM"] + v["RM2_mi Norm CPM"])/2
    
    #Restrict to only columns of interest
    v = v[["Proportion intrinsic", "Proportion extrinsic", "Proportion interaction", "Intrinsic", "Extrinsic", "Interaction", "Total expression", "HM/HR"]]
    out = []
    for index, row in v.iterrows():
        try:
            out.append([d[index]] + list(row))
        except:
            pass
    
    #Compute absolute log-fold change after makking data frame
    df = pd.DataFrame(out).set_index(0)
    df.columns = ["Proportion intrinsic", "Proportion extrinsic", "Proportion interaction", "Intrinsic", "Extrinsic", "Interaction", "Total expression", "HM/HR"]
    df["HM/HR"] = np.abs(df["HM/HR"])
    
    #Join the various dataframes, require an entry for a gene in all of them
    df = df.join(cons).dropna().join(cpg).dropna().join(rns).dropna().join(TAU).dropna().join(df_kaes_tau).dropna().join(df_lfc_var).dropna()
    df.columns = ["Proportion intrinsic", "Proportion extrinsic", "Proportion interaction", "Intrinsic", "Extrinsic", "Interaction", "Total expression", "HM/HR"] + ["Constraint on expression", "Number of CREs", "cis-neighborhood size", "Tau us", "Tau kaes", "LFC variance kaes"]
    #Compute CRE density
    df["CRE density"] = df["Number of CREs"]/np.minimum(100000, df["cis-neighborhood size"])
    df["Gene"] = df.index
    df = df.drop_duplicates(["Gene"])
    df = df.drop(["Gene"], axis = 1)
    #Output
    df.to_csv("Final/RegInput/" + file.replace(".csv", "_LinReg.csv"), index = True, header = True)

In [None]:
#Compute the spearman correlations with our predictors
#Also create current Figure 3D
#Can change to extrinsic or interaction
directory = "Final/RegInput/"
out = []
for file in os.listdir(directory):
    #Compute a mess of spearman correlations
    v = pd.read_csv(directory + file, sep = ",")
    v["CRE density"] = v["Number of CREs"]/np.maximum(10000, np.minimum(100000, v["cis-neighborhood size"]))
    te = spearmanr(v["Proportion intrinsic"]-v["Proportion extrinsic"], v["Total expression"])
    lfc = spearmanr(v["Proportion intrinsic"]-v["Proportion extrinsic"], v["HM/HR"])
    ce = spearmanr(v["Proportion intrinsic"]-v["Proportion extrinsic"], v["Constraint on expression"])
    ncres = spearmanr(v["Proportion intrinsic"]-v["Proportion extrinsic"], v["Number of CREs"])
    cn_size = spearmanr(v["Proportion intrinsic"]-v["Proportion extrinsic"], v["cis-neighborhood size"])
    tau_ = spearmanr(v["Proportion intrinsic"]-v["Proportion extrinsic"], v["Tau us"])
    reg_dens = spearmanr(v["Proportion intrinsic"]-v["Proportion extrinsic"], v["CRE density"])
    tau_kaes = spearmanr(v["Proportion intrinsic"]-v["Proportion extrinsic"], v["Tau kaes"])
    lfc_var = spearmanr(v["Proportion intrinsic"]-v["Proportion extrinsic"], v["LFC variance kaes"])
    out.append([d_ct_abrev[file_to_celltype(file)], te[0], te[1], lfc[0], lfc[1], ce[0], ce[1], ncres[0], ncres[1], cn_size[0], cn_size[1], tau_[0], tau_[1], reg_dens[0], reg_dens[1], tau_kaes[0], tau_kaes[1], lfc_var[0], lfc_var[1]])
df = pd.DataFrame(out)

#Create dataframe of the spearman correlations
df.columns = ["Cell type", "Total expression rho", "Total expression p-value", "LFC rho", "LFC p-value", "Constraint rho", "Constraint p-value", "Number CREs rho", "Number CREs p-value", "Nearest gene dist rho", "Nearest gene dist p-value", "Tau us rho", "Tau us p-value", "CRE density rho", "CRE density p-value", "Tau kaes rho", "Tau kaes p-value", "LFC var kaes rho", "LFC var kaes p-value"]
df.to_csv("Final/Spearman_Prop_Intrinsic-Extrinsic_Predictors.csv", sep = ",", index = False)

In [None]:
#Make illustrative figures
#Colors
mouse = "#F2C911"
rat = "#65B0AC"
intrinsic = "#F55F00"
extrinsic = "#7D9AF4"
reinforcing = "#9B00F5"
opposing = "#F50901"
interaction = "#1E771A"

sns.set(font_scale=1.5)
sns.set_style("white")
d_ct_abrev = {"Forebrain glutamatergic progenitors":"brain.glut.prog", "Forebrain GABAergic progenitors":"brain.GABA.prog", "Intermediate progenitors":"inter.prog", "Forebrain glutamatergic neurons":"brain.glut.neu", "Forebrain GABAergic neurons":"brain.GABA.neu", "Spinal GABAergic neurons":"spine.GABA.neu", "Spinal glutamatergic neurons":"spine.glut.neu", "Chondrocyte":"chondrocyte", "Chondrocytes":"chondrocyte", "Mesenchyme 0":"mesen.0", "Mesenchyme 2":"mesen.2", "Mesenchyme cycling":"mesen.cyc", "Forebrain GABAergic neurons 0":"Toss"}

#Function to convert file to cell type
def file_to_celltype(x):
    x = x.split("/")[-1]
    x = x.replace("_GO_Molecular_Function_2023", "").replace("_GO_Biological_Process_2023", "").replace("Mesechyme", "Mesenchyme").replace("Brain_Or_2010_Div_New4_NewNorm_", "")
    x = x.replace("Mesenchymal_Or_2010_Div_New4_NewNorm_", "").replace("Chondrocyte_Or_2010_Div_New4_NewNorm_", "").replace(".csv", "").replace("_", " ").replace(" all", "")
    x = x.replace("Glutamatergic", "Forebrain glutamatergic").replace("GABAergic", "Forebrain GABAergic").replace("Spinal Forebrain", "Spinal").replace("Chondrocytes", "Chondrocyte")
    return x.replace(" LinReg", "")

plt.rcParams["font.family"] = "Arial"

In [None]:
#Make versions of expression plots with made up data
#mouse_vals is the mouse expression and rat_vals is the rat expression
def ill_poss(mouse_vals, rat_vals, title, xlabs=["E11.5", "E13.5", "E15.5"]):
    #Create scatterplots
    plt.scatter(x = xlabs, y = mouse_vals, color = mouse, s = 100, alpha = 1)
    plt.scatter(x = xlabs, y = rat_vals, color = rat, s = 100, alpha = 1)
    plt.title(title)
    plt.legend({"Mouse":mouse, "Rat":rat})
    plt.show()
    tp = pd.DataFrame()
    
    #Add barplots for mean values
    tp["Chimera (Donor-Host)"] = ["Rat-like env.", "Rat-like env.", "Mouse-like env.", "Mouse-like env.", "Host", "Host"]
    tp["Cell species"] = ["Mouse", "Rat", "Mouse", "Rat", "Mouse", "Rat"]
    tp["Norm CPM"] = [mouse_vals[2], rat_vals[2], mouse_vals[1], rat_vals[1], mouse_vals[1], rat_vals[2]]
    sns.set(font_scale=1.5)
    sns.set_style("white")
    sns.barplot(data = tp, y = "Norm CPM", x = "Chimera (Donor-Host)", hue = "Cell species", palette = {"Mouse":mouse, "Rat":rat}, alpha = 1, order = ["Host", "Rat-like env.", "Mouse-like env."])
    plt.ylabel("Pseudobulked normalized counts")
    plt.xlabel("Extrinsic environment")
    plt.title(title)
    #plt.legend(bbox_to_anchor=(1.375, 1.05))
    plt.legend([],[], frameon=False)
    plt.show()

In [None]:
#An option for opposing intrinsic/extrinsic
ill_poss([10, 20, 30], [5, 12.5, 20], "Intrinsically higher mouse and increasing expression")


In [None]:
#Another opposing option
ill_poss([15, 10, 5], [30, 20, 10], "Intrinsically higher rat and decreasing expression")


In [None]:
#A reinforcing option
ill_poss([5, 12.5, 20], [10, 20, 30], "Intrinsically higher rat and increasing expression")


In [None]:
#Another reinforcing option
ill_poss([30, 20, 10], [15, 10, 5], "Intrinsically higher mouse and decreasing expression")


In [None]:
#A way to get "extrinsic divergence" with only intrinsic divergence
ill_poss([30, 20, 10], [29.5, 20.5, 9.5], "Decreasing expression and heterochronic shift")


In [None]:
#Another way to get "extrinsic divergence" with only intrinsic divergence
ill_poss([10, 20, 30], [10.5, 19.5, 30.5], "Increasing expression and heterochronic shift")

In [None]:
#Commented and uncommented things below so that the comments before them match
#E.g. if "For interaction" is what is uncommented, it should be what is uncommented for everthing

#For opposing/reinforcing
#folder = "GSEAPY_New4_ReinfOppo_SameDirSigned"
#sort_by = "Multiply"

#For pure extrinsic
folder = "GSEAPY_New4_Extrinsic_SameDirSameExpr"
sort_by = "Signed proportion extrinsic"

#For interaction
folder = "GSEAPY_New4_Interaction_DifTimePatternSigned"
sort_by = "Signed interaction"

fdr = []
enrichment = []
ct_list = []
term_list = []
es = []
for file in os.listdir("Final/Div"):
    #Only do for brain since that is what we have Kaessmann data for
    if "Brain" in file:
        v = pd.read_csv("Final/Div/" + file, sep = ",")
        ct = file.replace("Brain_Or_2010_Div_New4_NewNorm_", "").replace("Mesenchymal_Or_2010_Div_New4_NewNorm_", "").replace("Chondrocyte_Or_2010_Div_New4_NewNorm_", "").replace(".csv", "")
        ct = d_ct_abrev[file_to_celltype(file)].replace(".", "_")
        
        #Get results of enrichment
        vv = pd.read_csv("Final/" + folder + "/" + file.replace(".csv", "") + "/gseapy.gene_set.prerank.report.csv", sep = ",")
        
        #For reinforcing/opposing
        #v[sort_by] = np.sign(v["Intrinsic"])*v["Proportion intrinsic"]*v["Proportion extrinsic"]*np.sign(v["Extrinsic"])
        
        #For pure extrinsic
        #v["Signed proportion extrinsic"] = np.sign(v["Extrinsic"])*v["Proportion extrinsic"]
        
        #For signed interaction
        v["Signed interaction"] = np.sign(v["Interaction"])*v["Proportion interaction"]
        
        v = v[["Unnamed: 0", sort_by]]
        v.columns = ["Gene", sort_by]
        
        #Add terms
        for index, row in vv.iterrows():
            term = row["Term"]
            term_list.append(row["Term"])
            #For opposing/reinforcing
            #gene_set = d_OR[term]
            
            #For pure extrinsic
            #gene_set = d_SameDir[term]
            
            #For interaction
            gene_set = d_DifDir[term]
            genes = row["Lead_genes"].split(";")
            fdr.append(max(row["FDR q-val"], 0.001))
            es.append(row["ES"])
            if row["ES"] < 0:
                asc = True
            else:
                asc = False
            v = v.sort_values(sort_by, ascending = asc)
            
            #Find the gene rank that was used as a cutoff in GSEAPY
            cut = 0
            for index, row in v.iterrows():
                cut += 1
                if genes[-1] == row["Gene"]:
                    break
        
            #Using that rank, get the number of genes that drive the enrichment and the number of genes going against it at an equivalent cutoff
            genes_agree = len(np.intersect1d(list(v["Gene"])[:cut], gene_set))
            genes_disagree = len(np.intersect1d(list(v["Gene"])[::-1][:cut], gene_set))
            ct_list.append(d_ct_abrev[file_to_celltype(file)])
            
            #Make sign of the fold-change properly match the sign of the original GSEAPY enrichment
            if asc:
                enrichment.append(-np.log2((genes_agree + 1)/(genes_disagree + 1)))
            else:
                enrichment.append(np.log2((genes_agree + 1)/(genes_disagree + 1)))

In [None]:
#Create dataframe of cell types, terms, log fold-enrichments, and FDRs
df_plot_new = pd.DataFrame([term_list, ct_list, enrichment, fdr, es]).T
df_plot_new.columns = ["Gene set", "Cell type", "Log$_{2}$ fold-enrichment", "FDR", "ES"]
df_plot_new["-Log$_{10}$ FDR"] = -np.log10(df_plot_new["FDR"].astype(float))
df_plot_new["Log$_{2}$ fold-enrichment"] = df_plot_new["Log$_{2}$ fold-enrichment"].astype(float)
df_plot_new

In [None]:
# Create subplots
sns.set(font_scale = 1.5)
sns.set_style("white")
fig, ax = plt.subplots(sharex=True, sharey=True)

# Separate data by color
grey_data = df_plot_new[df_plot_new['Gene set'] != 'UpRat_DownMouse']
blue_data = df_plot_new[df_plot_new['Gene set'] == 'UpRat_DownMouse']

# Scatter plots with zorder
sns.scatterplot(x=grey_data['Log$_{2}$ fold-enrichment'], y=grey_data['-Log$_{10}$ FDR'], c='orange', label='Increasing in mouse and\ndecreasing in rat', zorder=1)
sns.scatterplot(x=blue_data['Log$_{2}$ fold-enrichment'], y=blue_data['-Log$_{10}$ FDR'], c='blue', label='Increasing in rat and\ndecreasing in mouse', zorder=2)

# Customize plot settings
#plt.title("Enrichments split by reinforcing and opposing")
plt.title("Enrichments split by directionality of\nchange in expression over time")
ax.set_xlabel('Log$_{2}$ fold-enrichment')
ax.set_ylabel('-Log$_{10}$ FDR')

# Repeat similar steps for other subplots (7 Day, 14 Day, 28 Day)

# Show the legend
ax.legend(bbox_to_anchor= (2, 1))

# Display the plot
plt.show()

In [None]:
### The below three cells are repeats of the third cell above this one, so are not commented ###

In [None]:
#Plotting the results of Enrichment analysis with absolute measures
#For Reinforcing/Opposing
enrich = []
fdr = []
cell_types = []
genes = []
for file in os.listdir("Final/GSEAPY_New4_ReinfOppo_SameDir_Abs"):
    if "neurons_0" not in file:
        v = pd.read_csv("Final/GSEAPY_New4_ReinfOppo_SameDir_Abs/" + file + "/gseapy.gene_set.prerank.report.csv", sep = ",")
        out = []
        for index, row in v.iterrows():
            enrich.append(row["ES"])
            fdr.append(row["FDR q-val"])
            cell_types.append(d_ct_abrev[file_to_celltype(file)].replace(" ", "."))
            genes.append(row["Lead_genes"])
d_en = {}
for i in range(len(cell_types)):
    d_en[cell_types[i]] = genes[i]
    
df_same = df[df["Mult"] > 0.7]
gene_set = df_same.index

ct = []
enrichment = []
asc = False
for file in os.listdir("Final/Div"):
    if "Brain" in file:
        v = pd.read_csv("Final/Div/" + file, sep = ",")
        v["Multiply"] = np.abs(v["Proportion intrinsic"]*v["Proportion extrinsic"])
        asc = False
        v = v.sort_values("Multiply", ascending = asc)
        genes = d_en[d_ct_abrev[file_to_celltype(file)]].split(";")
        cut = 0
        for index, row in v.iterrows():
            cut += 1
            if genes[-1] == row["Unnamed: 0"]:
                break
        genes_agree = len(np.intersect1d(list(v["Unnamed: 0"])[:cut], gene_set))
        genes_disagree = len(np.intersect1d(list(v["Unnamed: 0"])[::-1][:cut], gene_set))
        ct.append(d_ct_abrev[file_to_celltype(file)])
        if asc:
            enrichment.append(-np.log2((genes_agree + 1)/(genes_disagree + 1)))
        else:
            enrichment.append(np.log2((genes_agree + 1)/(genes_disagree + 1)))
            
sns.set(font_scale = 1.5)
sns.set_style("white")
to_plot = pd.DataFrame([enrichment, fdr, cell_types]).T
to_plot.columns = ["Log$_2$ fold-enrichment", "FDR", "Cell type"]
sns.barplot(data = to_plot, x = "Cell type", y = "Log$_2$ fold-enrichment", color = "grey")
plt.title("Enrichment for reinforcing/opposing\nand same direction over time genes")
#plt.legend(bbox_to_anchor=(1.5, 1.05))
plt.legend([],[], frameon=False)
plt.xticks(rotation=90)

In [None]:
#Plotting the results of Enrichment analysis with absolute measures
#For Interaction/differences in temporal pattern of gene expression
enrich = []
fdr = []
cell_types = []
genes = []
for file in os.listdir("Final/GSEAPY_New4_Interaction_DifTimePattern"):
    if "neurons_0" not in file:
        v = pd.read_csv("Final/GSEAPY_New4_Interaction_DifTimePattern/" + file + "/gseapy.gene_set.prerank.report.csv", sep = ",")
        out = []
        for index, row in v.iterrows():
            enrich.append(row["ES"])
            fdr.append(row["FDR q-val"])
            cell_types.append(d_ct_abrev[file_to_celltype(file)].replace(" ", "."))
            genes.append(row["Lead_genes"])
d_en = {}
for i in range(len(cell_types)):
    d_en[cell_types[i]] = genes[i]
    
df_oppo = df[df["Mult"] < -0.25]
gene_set = df_oppo.index

ct = []
enrichment = []
asc = False
for file in os.listdir("Final/Div"):
    if "Brain" in file:
        v = pd.read_csv("Final/Div/" + file, sep = ",")
        asc = False
        v = v.sort_values("Proportion interaction", ascending = asc)
        genes = d_en[d_ct_abrev[file_to_celltype(file)]].split(";")
        cut = 0
        for index, row in v.iterrows():
            cut += 1
            if genes[-1] == row["Unnamed: 0"]:
                break
        genes_agree = len(np.intersect1d(list(v["Unnamed: 0"])[:cut], gene_set))
        genes_disagree = len(np.intersect1d(list(v["Unnamed: 0"])[::-1][:cut], gene_set))
        print(genes_agree, genes_disagree, d_ct_abrev[file_to_celltype(file)])
        ct.append(d_ct_abrev[file_to_celltype(file)])
        if asc:
            enrichment.append(-np.log2((genes_agree + 1)/(genes_disagree + 1)))
        else:
            enrichment.append(np.log2((genes_agree + 1)/(genes_disagree + 1)))
            
sns.set(font_scale = 1.5)
sns.set_style("white")
to_plot = pd.DataFrame([enrichment, fdr, cell_types]).T
to_plot.columns = ["Log$_2$ fold-enrichment", "FDR", "Cell type"]
sns.barplot(data = to_plot, x = "Cell type", y = "Log$_2$ fold-enrichment", color = "grey")
plt.title("Enrichment for interaction\nand opposite direction over time genes")
#plt.legend(bbox_to_anchor=(1.5, 1.05))
plt.legend([],[], frameon=False)
plt.xticks(rotation=90)

In [None]:
#Plotting the results of Enrichment analysis with absolute measures
#For pure extrinsic everything

enrich = []
fdr = []
cell_types = []
genes = []
for file in os.listdir("Final/GSEAPY_New4_Extrinsic_SameDir"):
    if "neurons_0" not in file:
        v = pd.read_csv("Final/GSEAPY_New4_Extrinsic_SameDir/" + file + "/gseapy.gene_set.prerank.report.csv", sep = ",")
        out = []
        for index, row in v.iterrows():
            enrich.append(row["ES"])
            fdr.append(row["FDR q-val"])
            cell_types.append(d_ct_abrev[file_to_celltype(file)].replace(" ", "."))
            genes.append(row["Lead_genes"])
d_en = {}
for i in range(len(cell_types)):
    d_en[cell_types[i]] = genes[i]
    
df_same = df[df["Mult"] > 0.7]
gene_set = df_oppo.index

ct = []
enrichment = []
asc = False
for file in os.listdir("Final/Div"):
    if "Brain" in file:
        v = pd.read_csv("Final/Div/" + file, sep = ",")
        asc = False
        v = v.sort_values("Proportion extrinsic", ascending = asc)
        genes = d_en[d_ct_abrev[file_to_celltype(file)]].split(";")
        cut = 0
        for index, row in v.iterrows():
            cut += 1
            if genes[-1] == row["Unnamed: 0"]:
                break
        genes_agree = len(np.intersect1d(list(v["Unnamed: 0"])[:cut], gene_set))
        genes_disagree = len(np.intersect1d(list(v["Unnamed: 0"])[::-1][:cut], gene_set))
        print(genes_agree, genes_disagree, d_ct_abrev[file_to_celltype(file)])
        ct.append(d_ct_abrev[file_to_celltype(file)])
        if asc:
            enrichment.append(-np.log2((genes_agree + 1)/(genes_disagree + 1)))
        else:
            enrichment.append(np.log2((genes_agree + 1)/(genes_disagree + 1)))
            
sns.set(font_scale = 1.5)
sns.set_style("white")
to_plot = pd.DataFrame([enrichment, fdr, cell_types]).T
to_plot.columns = ["Log$_2$ fold-enrichment", "FDR", "Cell type"]
sns.barplot(data = to_plot, x = "Cell type", y = "Log$_2$ fold-enrichment", color = "grey")
plt.title("Enrichment for extrinsic\nand same direction over time genes")
#plt.legend(bbox_to_anchor=(1.5, 1.05))
plt.legend([],[], frameon=False)
plt.xticks(rotation=90)