In [2]:
import seaborn as sns
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import os
from matplotlib.pyplot import figure
from collections import Counter
from scipy.stats import spearmanr,pearsonr,fisher_exact,binom_test
import scanpy as sc
import anndata
import leidenalg

sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=80)

  from pandas.core import (


-----
anndata     0.9.2
scanpy      1.9.5
-----
PIL                 9.4.0
anyio               NA
asciitree           NA
asttokens           NA
attr                22.1.0
babel               2.11.0
backcall            0.2.0
beta_ufunc          NA
binom_ufunc         NA
bottleneck          1.3.5
brotli              NA
certifi             2023.07.22
cffi                1.15.1
chardet             4.0.0
charset_normalizer  2.0.4
cloudpickle         2.2.1
colorama            0.4.6
comm                0.1.2
cycler              0.10.0
cython_runtime      NA
cytoolz             0.12.0
dask                2023.6.0
dateutil            2.8.2
debugpy             1.6.7
decorator           5.1.1
defusedxml          0.7.1
entrypoints         0.4
executing           0.8.3
fasteners           0.19
fastjsonschema      NA
h5py                3.7.0
hypergeom_ufunc     NA
idna                3.4
igraph              0.11.4
invgauss_ufunc      NA
ipykernel           6.19.2
ipython_genutils    0.2.0
ipywidgets

In [5]:
#Do filtering and create pseudobulk CPM

#Read in the anndata files created in Demultiplex_cluster_final.ipynb
v_brain = sc.read("Final/Brain_Subclustered_Prelim.h5ad")
v_mes = sc.read("Final/Mesenchymal_Subclustered_Prelim.h5ad")
v_chondro = sc.read("Final/Chondrocyte_Subclustered_Prelim.h5ad")
v_inhib = sc.read("Final/GABAergic_Forebrain.h5ad")
v = sc.read("Final/All_Cells_Start.h5ad")
f = list(pd.read_csv("RiboMito_Blacklist.txt", sep = "\t", header = None)[0]) + list(pd.read_csv("Mouse_Sex_Chr_Genes.txt", sep = "\t", header = None)[0])

#Function definitions
#Computes counts per 10000
def cpm_vec(x):
    s = sum(list(x))
    x = [i*10000/s for i in list(x)]
    return x

#Function to filter genes
#Rat was a bad choice of variable name since it means ratio here...
def remove_genes(df, ft, brain, cut = 0.2, rat = True):
    out = []
    #If we want to do filtering based on the proportion of cells with at least one count, we set rat = True
    #The inputted dataframe needs to already be binarized
    if rat:
        #Depending on whether it is brain or connective tissue we want different samples
        if brain:
            #This just works out to the number of cells every time, so proved unnecessary
            max_mr1_mi = np.max(df["MR1_mi Raw"])
            max_mr1_ri = np.max(df["MR1_ri Raw"])
            max_rm1_mi = np.max(df["RM1_mi Raw"])
            max_rm1_ri = np.max(df["RM1_ri Raw"])
            for index, row in df.iterrows():
                #Check if at least one sample has at least cut proportion of cells expressing
                if row["MR1_mi Raw"]/max_mr1_mi >= cut or row["MR1_ri Raw"]/max_mr1_ri >= cut or row["RM1_mi Raw"]/max_rm1_mi >= cut or row["RM1_ri Raw"]/max_rm1_ri >= cut:
                    out.append(row)
        else:
            max_mr1_mi = np.max(df["MR1_mi Raw"])
            max_mr1_ri = np.max(df["MR1_ri Raw"])
            max_rm1_mi = np.max(df["RM2_mi Raw"])
            max_rm1_ri = np.max(df["RM2_ri Raw"])
            for index, row in df.iterrows():
                if row["MR1_mi Raw"]/max_mr1_mi >= cut or row["MR1_ri Raw"]/max_mr1_ri >= cut or row["RM2_mi Raw"]/max_rm1_mi >= cut or row["RM2_ri Raw"]/max_rm1_ri >= cut:
                    out.append(row)
    else:
        #Otherwise, we simply filter based on total counts
        for index, row in df.iterrows():
            #Require all samples to have greater counts
            if ft == "and":
                if row["MR1_mi Raw"] >= cut and row["MR1_ri Raw"] >= cut and row["RM1_mi Raw"] >= cut and row["RM1_ri Raw"] >= cut and row["RM2_mi Raw"] >= cut and row["RM2_ri Raw"] >= cut:
                    out.append(row)
            #Require just one sample to have greater counts
            elif ft == "or":
                if row["MR1_mi Raw"] >= cut or row["MR1_ri Raw"] >= cut or row["RM1_mi Raw"] >= cut or row["RM1_ri Raw"] >= cut:
                    out.append(row)
            elif ft == "or2":
                if (row["MR1_mi Raw"] >= cut or row["MR1_ri Raw"] >= cut) and (row["RM1_mi Raw"] >= cut or row["RM1_ri Raw"] >= cut):
                    out.append(row)
    to_ret = pd.DataFrame(out)
    to_ret.columns = df.columns
    return to_ret

#Process a cluster (cell type) for input to filtering
def process(cluster, name, keep, ct, filt = f, binar = False, out_remove = True, pseudo = 1):
    #Subset the anndata object
    c = cluster[cluster.obs["BCS"].isin(keep)]
    
    #Get a dataframe of the counts and set columns/indices
    z = pd.DataFrame(c.X.todense()).T
    z.index = c.var.index
    z.columns = c.obs.index
    
    #Add information to the ct list that was passed
    ct.append([z.shape[1], i, name])
    
    #Binarize expression if desired
    if binar == True:
        z = pd.DataFrame(z.astype(bool).astype(int).sum(axis=1))
    else:
        z = pd.DataFrame(z.sum(axis=1))
    
    #Remove cells not in filt
    z = z.loc[~z.index.isin(filt)]
    z.columns = [name + " Raw"]
    return z, ct

#Downsamples counts according to multinomial distribution
def downsample_counts(x, target_total, seed):
    #Compute the multinomial probability distribution
    prob = np.float64(x)/np.sum(x)
    
    #Set random seed and sample
    np.random.seed(seed)
    return np.random.multinomial(n=target_total, pvals = prob, size = 1)[0]

c = 0
#New plan:
#Filter out genes that are not expressed in > 20% of cells in one of the cell types
chondro = [0]
prog = [1]

#Define the cell type groups for mesenchymal clusters
mes_1 = [1]
mes_2 = [2]
mes_0 = [0]
mes_4 = [4]
mes_5 = [5]
mes_prog = [3]

#Define cell type groups for brain
excit = [0, 5]
exc_prog = [4]
inh_prog = [2]
inter_prog = [10]
spinal_excit = [6]
spinal_inhib = [7]
excit_other = [17]

#Most inhibitory clusters do not have enough cells
#We required at least 10 cells for all of MR_m, MR_r, RM_m, and RM_r
#Do the same for GABAergic neurons from the forebrain
inhib_all = [0, 1, 3, 4]
inhib_0 = [0]
inhib_1 = [1]
inhib_3 = [3]
inhib_4 = [4]
inhib_25 = [2, 5]

#Only keep a subset of these according to these dictionaries
keeping_mes = {"Mesenchyme_2":mes_2, "Mesenchyme_0":mes_0, "Mesenchyme_cycling":mes_prog}
keeping_chondro = {"Chondrocytes":chondro}
keeping_brain = {"Glutamatergic_neurons":excit, "Glutamatergic_progenitors":exc_prog, "GABAergic_progenitors":inh_prog, "Intermediate_progenitors":inter_prog, "Spinal_glutamatergic_neurons":spinal_excit, "Spinal_GABAergic_neurons":spinal_inhib}
keeping_inhib = {"GABAergic_neurons_all":inhib_all}

#Define lists to iterate through
samples = [v_brain, v_mes, v_chondro, v_inhib]
to_keep = [keeping_brain, keeping_mes, keeping_chondro, keeping_inhib]

#Subset the anndata of raw counts by sample
v_MR1 = v[v.obs["Sample"].isin(["MR1"])]
v_MR1_m = v_MR1[v_MR1.obs["Species"].isin(["Mouse"])]
v_MR1_r = v_MR1[v_MR1.obs["Species"].isin(["Rat"])]

v_RM1 = v[v.obs["Sample"].isin(["RM1"])]
v_RM1_m = v_RM1[v_RM1.obs["Species"].isin(["Mouse"])]
v_RM1_r = v_RM1[v_RM1.obs["Species"].isin(["Rat"])]

v_RM2 = v[v.obs["Sample"].isin(["RM2"])]
v_RM2_m = v_RM2[v_RM2.obs["Species"].isin(["Mouse"])]
v_RM2_r = v_RM2[v_RM2.obs["Species"].isin(["Rat"])]

v_WT = v[v.obs["Sample"].isin(["WT"])]
v_WT_m = v_WT[v_WT.obs["Species"].isin(["Mouse"])]
v_WT_r = v_WT[v_WT.obs["Species"].isin(["Rat"])]

In [6]:
#Reran this cell commenting out the line with filtering in it and changing folder
for _ in range(len(samples)):
    #Set parameters depending on which position in the list (hardcoded based on list definitions above)
    if _ == 0:
        organ = "Brain"
        brain_bool = True
    elif _ == 1:
        organ = "Mesenchymal"
        brain_bool = False
    elif _ == 2:
        organ = "Chondrocyte"
        brain_bool = False
    elif _ == 3:
        organ = "Brain"
        brain_bool = True
    v_b = samples[_]
    keeping = to_keep[_]
    ct = []
    c += 1
    #Compute across all clusters include raw counts for downstream filtering.
    #List of ribosomal genes here: http://ribosome.med.miyazaki-u.ac.jp/rpg.cgi?mode=orglist&org=Mus%20musculus
    #Removed these along with the mt- genes
    d = {}

    for key in keeping.keys():
        print(key)
        i = keeping[key]
        #Get the cell barcodes that correspond to the right cell type
        keeping_these_cells = list(v_b.obs[v_b.obs["leiden"].isin([str(x) for x in i])]["BCS"])
        print(len(keeping_these_cells))
        
        #For each sample, process the anndata to create a dataframe of binarized counts
        MR1_mi = process(v_MR1_m, "MR1_mi", keeping_these_cells, ct, f, binar = True)
        ct = MR1_mi[1]
        MR1_mi = MR1_mi[0]
        MR1_ri = process(v_MR1_r, "MR1_ri", keeping_these_cells, ct, f, binar = True)
        ct = MR1_ri[1]
        MR1_ri = MR1_ri[0]
        RM1_mi = process(v_RM1_m, "RM1_mi", keeping_these_cells, ct, f, binar = True)
        ct = RM1_mi[1]
        RM1_mi = RM1_mi[0]
        RM1_ri = process(v_RM1_r, "RM1_ri", keeping_these_cells, ct, f, binar = True)
        ct = RM1_ri[1]
        RM1_ri = RM1_ri[0]
        RM2_mi = process(v_RM2_m, "RM2_mi", keeping_these_cells, ct, f, binar = True)
        ct = RM2_mi[1]
        RM2_mi = RM2_mi[0]
        RM2_ri = process(v_RM2_r, "RM2_ri", keeping_these_cells, ct, f, binar = True)
        ct = RM2_ri[1]
        RM2_ri = RM2_ri[0]
        WT_mi = process(v_WT_m, "WT_mi", keeping_these_cells, ct, f, binar = True)
        ct = WT_mi[1]
        WT_mi = WT_mi[0]
        WT_ri = process(v_WT_r, "WT_ri", keeping_these_cells, ct, f, binar = True)
        ct = WT_ri[1]
        WT_ri = WT_ri[0]
        
        #Join to create the full data frame
        vf = MR1_mi.join([MR1_ri, RM1_mi, RM1_ri, RM2_mi, RM2_ri, WT_mi, WT_ri])
        
        #Requires 20% of cells for at least one sample and at least 10 cells for one sample
        vf = remove_genes(remove_genes(vf, "or", brain_bool, 0.2), "or", brain_bool, 10, rat = False)
        keep_genes = list(vf.index)
        
        #Reprocess things, this time without binarizing
        MR1_mi = process(v_MR1_m, "MR1_mi", keeping_these_cells, ct, f, binar = False)
        MR1_mi = MR1_mi[0]
        MR1_ri = process(v_MR1_r, "MR1_ri", keeping_these_cells, ct, f, binar = False)
        MR1_ri = MR1_ri[0]
        RM1_mi = process(v_RM1_m, "RM1_mi", keeping_these_cells, ct, f, binar = False)       
        RM1_mi = RM1_mi[0]
        RM1_ri = process(v_RM1_r, "RM1_ri", keeping_these_cells, ct, f, binar = False)
        RM1_ri = RM1_ri[0]
        RM2_mi = process(v_RM2_m, "RM2_mi", keeping_these_cells, ct, f, binar = False)
        RM2_mi = RM2_mi[0]
        RM2_ri = process(v_RM2_r, "RM2_ri", keeping_these_cells, ct, f, binar = False)
        RM2_ri = RM2_ri[0]
        WT_mi = process(v_WT_m, "WT_mi", keeping_these_cells, ct, f, binar = False)
        WT_mi = WT_mi[0]
        WT_ri = process(v_WT_r, "WT_ri", keeping_these_cells, ct, f, binar = False)
        WT_ri = WT_ri[0]
        
        #Join all together
        vf = MR1_mi.join([MR1_ri, RM1_mi, RM1_ri, RM2_mi, RM2_ri, WT_mi, WT_ri])
        #Filter to only the passing genes identified above
        vf = vf[vf.index.isin(keep_genes)]
        
        vals = ["MR1_mi", "MR1_ri", "RM1_mi", "RM1_ri", "RM2_mi", "RM2_ri", "WT_mi", "WT_ri"]
        kvals_brain = ["MR1_mi", "MR1_ri", "RM1_mi", "RM1_ri"]
        kvals_chond = ["MR1_mi", "MR1_ri", "RM2_mi", "RM2_ri"]
        to_min = []
        #Need to specify this as we often see very low counts in WT or something else for some cell types
        #Specifies the minimum number of counts across samples/conditions that we will use for downsampling
        for val in vals:
            if _ == 0 or _ == 3:
                if val in kvals_brain:
                    to_min.append(np.sum(vf[val + " Raw"]))
            elif _ == 1 or _ == 2:
                if val in kvals_chond:
                    to_min.append(np.sum(vf[val + " Raw"]))
        min_counts = np.min(to_min)
        d = {}
        
        #Across 100 downsamplings, compute the counts per 10,000
        for iteration in range(0, 100):
            for val in vals:
                cur_arr = np.array(vf[val + " Raw"].copy())
                try:
                    new_arr = downsample_counts(cur_arr, min_counts, iteration) + 1
                    CPM = np.array(cpm_vec(new_arr))
                except:
                    CPM = np.repeat(0, len(cur_arr))
                
                if iteration:
                    d[val] = d[val] + CPM
                else:
                    d[val] = CPM
        
        #Average the counts per 10,000
        for val in d.keys():
            vf[val + " Norm CPM"] = d[val]/100

        #Compute all possible log fold-changes
        if _ == 0 or _ == 3:

            vf["DR/HR"] = np.log2(vf["RM1_ri Norm CPM"]/vf["MR1_ri Norm CPM"])
            vf["DM/HM"] = np.log2(vf["MR1_mi Norm CPM"]/vf["RM1_mi Norm CPM"])
            vf["DM/DR"] = np.log2(vf["MR1_mi Norm CPM"]/vf["RM1_ri Norm CPM"])
            vf["HM/HR"] = np.log2(vf["RM1_mi Norm CPM"]/vf["MR1_ri Norm CPM"])
            vf["DM/HR"] = np.log2(vf["MR1_mi Norm CPM"]/vf["MR1_ri Norm CPM"])
            vf["HM/DR"] = np.log2(vf["RM1_mi Norm CPM"]/vf["RM1_ri Norm CPM"])
        #For chondrocytes we will use the one with more reads
        elif _ == 1 or _ == 2:
            vf["DR/HR"] = np.log2(vf["RM2_ri Norm CPM"]/vf["MR1_ri Norm CPM"])
            vf["DM/HM"] = np.log2(vf["MR1_mi Norm CPM"]/vf["RM2_mi Norm CPM"])
            vf["DM/DR"] = np.log2(vf["MR1_mi Norm CPM"]/vf["RM2_ri Norm CPM"])
            vf["HM/HR"] = np.log2(vf["RM2_mi Norm CPM"]/vf["MR1_ri Norm CPM"])
            vf["DM/HR"] = np.log2(vf["MR1_mi Norm CPM"]/vf["MR1_ri Norm CPM"])
            vf["HM/DR"] = np.log2(vf["RM2_mi Norm CPM"]/vf["RM2_ri Norm CPM"])
        print(key, "Worked")
        #vf.to_csv("Final/Filtered_Pseudobulked/" + organ + "_Or_2010_New_Leiden_NewNorm_NoFilt_" + key + ".csv")

Glutamatergic_neurons
943
Glutamatergic_neurons Worked
Glutamatergic_progenitors
372
Glutamatergic_progenitors Worked
GABAergic_progenitors
392
GABAergic_progenitors Worked
Intermediate_progenitors
135
Intermediate_progenitors Worked
Spinal_glutamatergic_neurons
302
Spinal_glutamatergic_neurons Worked
Spinal_GABAergic_neurons
283
Spinal_GABAergic_neurons Worked
Mesenchyme_2
387
Mesenchyme_2 Worked
Mesenchyme_0
587
Mesenchyme_0 Worked
Mesenchyme_cycling
371
Mesenchyme_cycling Worked
Chondrocytes
211
Chondrocytes Worked
GABAergic_neurons_all
737
GABAergic_neurons_all Worked


In [7]:
943 + 737 + 211 + 371 + 587 + 387 + 283 + 302 + 135 + 392 + 372

4720

In [12]:
#Decomposition into extrinsic, intrinsic, and interaction with our now pseudobulked data
for file in os.listdir("Final/Filtered_Pseudobulked"):
    if "NewNorm" in file:
        print(file)
        
        #Read in the file and define lists we will append things to
        v = pd.read_csv("Final/Filtered_Pseudobulked/" + file)
        extr_prop = []
        intr_prop = []
        interaction_prop = []
        extr = []
        intr = []
        interaction = []
        for index, row in v.iterrows():
            #DM/HM	DR/HR	DM/DR	HM/HR	WMR M/R	WRM M/R
            
            #Compute interaction divergence
            N = row["DM/HM"] + row["DR/HR"]
            
            #Compute extrinsic divergence estimates as discussed in the manuscript
            E1 = -row["DM/HM"]
            E2 = row["DR/HR"]
            
            #Compute intrinsic divergence estimates as discussed in the manuscript
            I1 = row["DM/HR"]
            I2 = row["HM/DR"]
            
            #Average them, the negative signs are needed to make things match the manuscript in terms of directionality
            E = -(E1 + E2)
            I = -(I1 + I2)
            
            #Append the means
            extr.append(E/2)
            intr.append(I/2)
            
            #Divide interaction divergence by 2 as discussed in the manuscript
            interaction.append(N/2)
            extr_prop.append(abs(E)/(abs(N) + abs(E) + abs(I)))
            intr_prop.append(abs(I)/(abs(N) + abs(E) + abs(I)))
            interaction_prop.append(abs(N)/(abs(N) + abs(E) + abs(I)))
            
        #Set the columns of the dataframe
        v["Extrinsic"] = extr
        v["Intrinsic"] = intr
        v["Interaction"] = interaction
        v["Proportion extrinsic"] = extr_prop
        v["Proportion intrinsic"] = intr_prop
        v["Proportion interaction"] = interaction_prop
        
        #Classify genes has having some divergence if they have absolute log fold-change greater than 0.5 for at least one of the four comparisons
        #If not, then they do not have divergence and are generally ignored
        out_div = []
        out_nodiv = []
        for index, row in v.iterrows():
            if abs(row["HM/HR"]) >= 0.5 or abs(row["DM/DR"]) >= 0.5 or abs(row["DM/HR"]) >= 0.5 or abs(row["HM/DR"]) >= 0.5:
                out_div.append(row["Unnamed: 0"])
            else:
                out_nodiv.append(row["Unnamed: 0"])
        print(len(out_div))
        v_div = v[v["Unnamed: 0"].isin(out_div)].copy()
        v_nodiv = v[v["Unnamed: 0"].isin(out_nodiv)].copy()
        v_div.to_csv("Final/Div/" + file.replace("New_Leiden", "Div_New4"), index = False)
        v_nodiv.to_csv("Final/NoDiv/" + file.replace("New_Leiden", "NoDiv_New4"), index = False)

Brain_Or_2010_New_Leiden_NewNorm_GABAergic_neurons_all.csv
3795
Brain_Or_2010_New_Leiden_NewNorm_GABAergic_progenitors.csv
5295
Brain_Or_2010_New_Leiden_NewNorm_Glutamatergic_neurons.csv
4098
Brain_Or_2010_New_Leiden_NewNorm_Glutamatergic_progenitors.csv
5016
Brain_Or_2010_New_Leiden_NewNorm_Intermediate_progenitors.csv
4851
Brain_Or_2010_New_Leiden_NewNorm_Spinal_GABAergic_neurons.csv
4368
Brain_Or_2010_New_Leiden_NewNorm_Spinal_glutamatergic_neurons.csv
4610
Chondrocyte_Or_2010_New_Leiden_NewNorm_Chondrocytes.csv
5282
Mesenchymal_Or_2010_New_Leiden_NewNorm_Mesenchyme_0.csv
5222
Mesenchymal_Or_2010_New_Leiden_NewNorm_Mesenchyme_2.csv
5520
Mesenchymal_Or_2010_New_Leiden_NewNorm_Mesenchyme_cycling.csv
5015


In [13]:
#Function to get counts of cells for each sample
def get_cell_counts(v, out_file):
    
    #Subset to each sample/condtion
    v_MR1 = v[v.obs["Sample"].isin(["MR1"])]
    v_MR1_m = v_MR1[v_MR1.obs["Species"].isin(["Mouse"])]
    v_MR1_r = v_MR1[v_MR1.obs["Species"].isin(["Rat"])]

    v_RM1 = v[v.obs["Sample"].isin(["RM1"])]
    v_RM1_m = v_RM1[v_RM1.obs["Species"].isin(["Mouse"])]
    v_RM1_r = v_RM1[v_RM1.obs["Species"].isin(["Rat"])]

    v_RM2 = v[v.obs["Sample"].isin(["RM2"])]
    v_RM2_m = v_RM2[v_RM2.obs["Species"].isin(["Mouse"])]
    v_RM2_r = v_RM2[v_RM2.obs["Species"].isin(["Rat"])]

    v_WT = v[v.obs["Sample"].isin(["WT"])]
    v_WT_m = v_WT[v_WT.obs["Species"].isin(["Mouse"])]
    v_WT_r = v_WT[v_WT.obs["Species"].isin(["Rat"])]
    
    #Create counters of the cell types
    MR1_m = Counter(v_MR1_m.obs["leiden"])
    MR1_r = Counter(v_MR1_r.obs["leiden"])
    RM1_m = Counter(v_RM1_m.obs["leiden"])
    RM1_r = Counter(v_RM1_r.obs["leiden"])
    RM2_m = Counter(v_RM2_m.obs["leiden"])
    RM2_r = Counter(v_RM2_r.obs["leiden"])
    WT_m = Counter(v_WT_m.obs["leiden"])
    WT_r = Counter(v_WT_r.obs["leiden"])
    
    #Iterate through all the counters, adding the number of cells
    out = []
    keys = list(MR1_m.keys())
    keys.sort(key = lambda x: int(x))
    ta_MR1_m = []
    ta_MR1_r = []
    ta_RM1_m = []
    ta_RM1_r = []
    ta_RM2_m = []
    ta_RM2_r = []
    ta_WT_m = []
    ta_WT_r = []
    for key in keys:
        ta_MR1_m.append(MR1_m[key])
        ta_MR1_r.append(MR1_r[key])
        ta_RM1_m.append(RM1_m[key])
        ta_RM1_r.append(RM1_r[key])
        ta_RM2_m.append(RM2_m[key])
        ta_RM2_r.append(RM2_r[key])
        ta_WT_m.append(WT_m[key])
        ta_WT_r.append(WT_r[key])
    out.append(["MR1_m", "MR_m"] + ta_MR1_m)
    out.append(["MR1_r", "MR_r"] + ta_MR1_r)
    out.append(["RM1_m", "RM_m"] + ta_RM1_m)
    out.append(["RM1_r", "RM_r"] + ta_RM1_r)
    out.append(["RM2_m", "RM_m"] + ta_RM2_m)
    out.append(["RM2_r", "RM_r"] + ta_RM2_r)
    out.append(["WT_m", "Wt_m"] + ta_WT_m)
    out.append(["WT_r", "WT_r"] + ta_WT_r)
    
    #Write out
    df = pd.DataFrame(out)
    df.columns = ["Sample", "Condition"] + list(keys)
    df.to_csv("Final/" + out_file, sep = "\t", index = False)

In [15]:
#Compute cell counts
get_cell_counts(v_inhib, "Cell_counts_GABAergic_neurons.tsv")
get_cell_counts(v_chondro, "Cell_counts_chondrocytes.tsv")
get_cell_counts(v_brain, "Cell_counts_neuronal.tsv")
get_cell_counts(v_mes, "Cell_counts_mesenchymal.tsv")