# Import Stuff

In [1]:
import glob
import numpy as np
import pandas as pd
from Modules import plot_ax_violin
from scipy.stats import ttest_ind
from statsmodels.stats import multitest

# Define Functions

In [2]:
def read_ephys_data():
    df_ephys, df_seq, df_labels = plot_ax_violin.get_lab_pvalb_dataset()
    
    columns = [
               'Resting membrane potential (mV)', 'Input resistance (MOhm)', 'Capacitance (pF)',
               'AP firing threshold (pA)', 'AP symmetricity', 'AP peak amplitude (mV)', 
               'AP halfwidth (ms)', 'Frequency Slope (Hz/pA)', 'Attenuation',
               'Sag potential'
              ]
    column_names = ['Resting membrane potential (mV)',
                    'Input resistance (MOhm)',
                    'Capacitance (pF)',
                    'AP firing threshold (pA)',
                    'AP symmetricity',
                    'AP peak amplitude (mV)',
                    'AP halfwidth (ms)',
                    'Frequency Slope (Hz/pA)',
                    'Attenuation',
                    'Sag potential (mV)'
                   ]
    df_ephys = df_ephys.loc[df_ephys.index.isin(df_labels.index),columns].copy()
    df_ephys.columns = column_names
    df_labels = df_labels.loc[df_ephys.index]
    
    return df_ephys, df_labels

def get_mature_ephys():
    df_ephys, df_labels = read_ephys_data()
    df_labels = df_labels.loc[df_labels.Age>20]
    df_ephys = df_ephys.loc[df_labels.index,:].copy()
    df_ephys.index = df_labels['CellType']
    
    return df_ephys

def get_young_old_ephys():
    df_ephys, df_labels = read_ephys_data()
    df_labels = df_labels.loc[np.logical_or(df_labels.Age<21, df_labels.Age>21)]
    df_labels = df_labels.loc[df_labels.CellType=='vBC'].copy()
    df_labels['Label'] = ''
    young = df_labels.Age<21
    mature = df_labels.Age>21
    df_labels.loc[young,'Label'] = '<P21'
    df_labels.loc[mature,'Label'] = '>P21'
    
    df_ephys = df_ephys.loc[df_labels.index].copy()
    
    df_ephys.index = df_labels['Label']
    
    return df_ephys

def get_columns(celltypes, ending):
    return ['%s%s' % (celltype, ending) for celltype in celltypes]

def summarize_ephys_data(df_ephys):
    if 'vAAC' in df_ephys.index:
        celltypes = ['vAAC', 'vBIC', 'hBIC', 'vBC', 'hBC']
    else:
        celltypes = ['<P21', '>P21']
    endings = [' (Count)', ' (Mean)', ' (Standard Deviation)', ' (Minimum)', ' (Maximum)']
    columns = [column for ending in endings for column in get_columns(celltypes, ending)]
    if len(celltypes) == 2:
        columns = ['t-score', 'P-value', 'FDR'] + columns
    df_stat = pd.DataFrame(np.NaN, index=df_ephys.columns, columns=columns)
    
    df_group = df_ephys.T.groupby(df_ephys.index, axis=1)
    df_stat.loc[:,get_columns(celltypes, ' (Count)')] = df_group.count()[celltypes].values
    df_stat.loc[:,get_columns(celltypes, ' (Mean)')] = df_group.mean()[celltypes].values
    df_stat.loc[:,get_columns(celltypes, ' (Standard Deviation)')] = df_group.std()[celltypes].values
    df_stat.loc[:,get_columns(celltypes, ' (Minimum)')] = df_group.min()[celltypes].values
    df_stat.loc[:,get_columns(celltypes, ' (Maximum)')] = df_group.max()[celltypes].values
    for column in get_columns(celltypes, ' (Count)'):
        df_stat[column] = df_stat[column].astype(int)
    
    if len(celltypes) == 2:
        df_1 = df_ephys.loc[celltypes[0]]
        df_2 = df_ephys.loc[celltypes[1]]
        t_vals, pvals = ttest_ind(df_1.values, df_2.values, axis=0, equal_var=False, nan_policy='omit')
        df_stat['t-score'] = t_vals
        df_stat['P-value'] = pvals
        df_stat['FDR'] = multitest.multipletests(pvals, method='fdr_bh')[1]
    
    return df_stat

def get_differential_genes(target, fdr=.15):
    target = target.split('-PV-types')[0]
    directory = 'Differential/edgeR/%s' % target
    fnames = glob.glob('%s/*.txt' % directory)
    fnames.sort()
    
    data_frames = []
    
    for fname in fnames:
        df = pd.read_csv(fname, sep='\t', header=0, index_col=0)
        df = df.loc[np.abs(df.logFC)>1]
        df_sig = df.loc[df.FDR<fdr].copy()
        compare = fname.split('/')[-1][:-4].split(' vs ')

        
        if df_sig.shape[0] > 0:
            df_sig['Category 1'] = compare[0]
            df_sig['Category 2'] = compare[1]
            data_frames.append(df_sig)
        
    return pd.concat(data_frames, axis=0)

def get_differential_stats(target, count=8):
    directory = 'Differential/edgeR/%s' % target
    fnames = glob.glob('%s/*.txt' % directory)
    fnames.sort()
    
    data_frames = []
    
    for fname in fnames:
        df = pd.read_csv(fname, sep='\t', header=0, index_col=0)
        data_frames.append(df.loc[:,['logFC', 'FDR']])
    
    names = [fname.split('/')[-1][:-4] for fname in fnames]
    df_fc = pd.concat([df[['logFC']] for df in data_frames], axis=1, join='outer', sort=False)
    df_fdr = pd.concat([df[['FDR']] for df in data_frames], axis=1, join='outer', sort=False)
    df_sig = df_fdr.copy()
    df_sig[np.abs(df_fc)<=1] = np.NaN
    gene_signifs = df_sig.min(axis=1)
    genes = gene_signifs.nsmallest(n=count).index.tolist()
    
    df_fc = df_fc.loc[genes,:]
    df_fdr = df_fdr.loc[genes,:]
    
    df_fc.columns = names
    df_fdr.columns = names
        
    return df_fc, df_fdr

def get_differential_data(targets, fdr=.15):
    df = pd.concat([get_differential_genes(target, fdr=fdr) for target in targets], axis=0)
    df = df.loc[:,['logFC', 'PValue', 'FDR', 'Category 1', 'Category 2']].copy()
    df.columns = ['log2 Fold Change', 'p-value', 'p-adjusted (FDR)', 'Category 1', 'Category 2']
    
    params = {'sep':'\t', 'header':0, 'index_col':0}
    df_rate = pd.read_csv('Mapping/DifferentialMapping/Lin_Harris_rates.tsv', **params)
    df_rate = df_rate.loc[df_rate.max(axis=1)>0,:]
    df['Rate in this study (%)'] = df_rate.loc[df.index,'This Study']
    df['Rate in CA1-INs study (%)'] = df_rate.loc[df.index,'CA1-INs']
    
    return df

def get_sholl_data():
    fname = 'References/Sholl_Soma.tsv'
    df = pd.read_csv(fname, sep='\t', header=0, index_col=0)
    df.sort_values('Age', inplace=True)
    df.columns = [column.replace(' um', ' μm') for column in df.columns]
    df['Sholl Analysis on dendritic length;'] = ''
    df = df.iloc[:,[0,2,1,-1] + list(i for i in range(3,df.shape[1]-1))]
    
    return df

def get_summary():
    index = ['Figure 1C',
             'Figure 1D_1',
             'Figure 1D_2',
             'Figure 1E',
             'Figure 2A',
             'Figure 2C',
             'Figure 2E',
             'Figure 3A',
             'Figure 3B',
             'Figure 4A-E',
             'Figure 5B_1',
             'Figure 5B_2',
             'Figure 5B_3',
             'Figure 5C_1',
             'Figure 5C_2',
             'Figure 5C_3',
             'Figure 5D',
             'Figure 6B-D',
             'Figure 6E',
             'Figure 6F',
             'Figure 7A',
             'Figure 7C',
            ]
    columns = ['Figure', 'Figure Title', 'Plot', 'Plot Title', 'Summary']
    df = pd.DataFrame('', index=index, columns=columns)
    df.index.name = 'Sheet Names'
    
    figure_titles = {'Figure 1':'Morpho-transcriptomic profiling of PV-INs',
                     'Figure 2':'Transcriptomic properties of PV-INs',
                     'Figure 3':'Electrophysiological properties of PV-INs',
                     'Figure 4':'Transcriptomic correlates of morphological PV types',
                     'Figure 5':'Analysis of CAM expression in morphological PV types',
                     'Figure 6':'Morphological and electrophysiological analysis of vBC'\
                                ' type PV-INs during circuit maturation',
                     'Figure 7':'Age-dependent transcriptomic changes and'\
                                ' onset of hemoglobin expression in PV-INs'
                    }
    summaries = {'1C':  'Statistical information on differentially expressed genes between'\
                        ' PV-INs and SST-OLMs. Columns show log2 of fold change, p-value for'\
                        ' significance, p-adjusted (FDR) for multiple testing adjusted'\
                        ' significance, and whether a gene was enriched in PV-INs or SST-OLMs.'\
                        ' Statistics were calculated with edgeR.',
                 '1D_1':'Cell names, morphological types, and UMAP embedding coordinates for'\
                        ' PV-INs and SST-OLM cells.',
                 '1D_2':'Cell names, morphological types, and UMAP embedding coordinates for'\
                        ' PV-INs.',
                 '1E':  'Average classification accuracy, and standard deviation for Random Forest'\
                        ' classifications of PV-INs vs OLMs, BCs vs non-BCs, and BICs vs non-BICs.',
                 '2A':  'Cell names, morphological type, proMMT generated cell types, and'\
                        ' nbt-SNE generated coordinates for PV-INs.',
                 '2C':  'List of 25 genes that are differentially expressed at a cutoff of p-adjusted<0.05'\
                        ' between at least 1 pair of proMMT types. If a gene is DE across multiple comparisons,'\
                        ' it is listed multiple times. Statistics are calculated with edgeR. For each gene, we'\
                        ' list the log2 fold change, p-value, p-adjusted, the two categories across which the'\
                        ' comparison happened, and the % of cells that expressed the gene in our PV-INs, and the'\
                        ' PV-INs from the CA1-INs study.',
                 '2E':  'Quantification of percentage of PV-INs that mapped to a given continent'\
                        ' in the CA1-INs data, using different sets of reference genes.',
                 '3A':  'Electrophysiological properties of different morphological types.'\
                        ' For each morphological type, we look at number of cells (count), average'\
                        ' expression (mean), standard deviation, minimum, and maximum.',
                 '3B':  'UMAP based plotting of data of PV-INs using electrophysiological properties.'
                        ' For each cell we display, we have their dendro-morphological type,'\
                        ' axo-morphological type, proMMT generated type, morphological type, and UMAP'\
                        ' embedding coordinates.',
                 '4A-E':'List of 52 genes that are differentially expressed at a cutoff of p-adjusted<.15'\
                        ' between either morphological types, dendro-morphological types, or '\
                        ' axo-morphological types. If a gene is DE across multiple comparisons, it is listed '\
                        ' multiple times. Statistics are calculated with edgeR. For each gene, we list the'\
                        ' log2 fold change, p-value, p-adjusted, the two categories'\
                        ' across which the comparison happened, and the % of cells that expressed'\
                        ' the gene in our PV-INs, and the PV-INs from the CA1-INs study.',
                 '5B_1':'Log2 Fold changes of most significant CAMs in pairwise PV-IN subtype'\
                        ' comparisons. In cases where a gene was not expressed in at least 5 cells'\
                        ' with a level of TPM>15 between the two groups in the comparison, comparisons'\
                        ' were not made, and the values were left blank.',
                 '5B_2':'FDRs of most significant CAMs in pairwise PV-IN subtype comparisons.'\
                        ' Comparisons. In cases where a gene was not expressed in at least 5 cells'\
                        ' with a level of TPM>15 between the two groups in the comparison, comparisons'\
                        ' were not made, and the values were left blank.',
                 '5B_3':'Log2 Fold changes and FDRs of top 8 most differentially expressed genes'\
                        ' between PV-INs and SST-OLM cells.',
                 '5C_1':'Similarity between morphological PV types and SST-OLMs.'\
                        ' Similarities between cells were calculated as Pearson correlation of log'\
                        ' of CAM TPM expressions. To calculate similarities between two cell types,'\
                        ' the average of all pairwise similarities of cells in the two cell types were'\
                        ' taken. To not bias the results, when calculating the similarity of a cell type'\
                        ' with itself, the distance of cells to themselves were not used in the calculation.',
                 '5C_2':'Similarity between axo-morphological PV types and SST-OLMs.'\
                        ' Similarities between cells were calculated as Pearson correlation of log'\
                        ' of CAM TPM expressions. To calculate similarities between two cell types,'\
                        ' the average of all pairwise similarities of cells in the two cell types were'\
                        ' taken. To not bias the results, when calculating the similarity of a cell type'\
                        ' with itself, the distance of cells to themselves were not used in the calculation.',
                 '5C_3':'Similarity between dendro-morphological PV-IN types and SST-OLMs.'\
                        ' Similarities between cells were calculated as Pearson correlation of log'\
                        ' of CAM TPM expressions. To calculate similarities between two cell types,'\
                        ' the average of all pairwise similarities of cells in the two cell types were'\
                        ' taken. To not bias the results, when calculating the similarity of a cell type'\
                        ' with itself, the distance of cells to themselves were not used in the calculation.',
                 '5D':  'Log 2 of expression levels of specific CAMs in PV-INs. CAMs are taken from the'\
                        ' Földy et al. (2016), and were shown as markers for regular-spiking,'\
                        ' fast-spiking, or CA1 Pyramidal cells. Our PV-INs have expression consistent'\
                        ' with what they showed for fast-spiking cells. Rows are genes, and columns are'\
                        ' cells with their morphological cell types below the cell names.',
                 '6B-D':'Morphological data for a subset of vBCs that were used in our'\
                        ' morphological analysis. For each cell we list its name, age, the length'\
                        ' of its dendrite, the length of its axon, and the number of intersections at'\
                        ' 25μm intervals.',
                 '6E':  'Electrophysiological comparison of <P21 and >P21 vBCs. For each age'\
                        '  category we have number of cells (Count), average expression'\
                        ' (Mean), standard deviation, minimum, and maximum for all 10 electrophysiological'\
                        ' properties.',
                 '6F':  'UMAP based embedding of vertical Basket Cells using electrophysiological properties.'\
                        ' Columns are cell name, cell age (in days), x-coordinate, and y-coordinate of UMAP'\
                        ' embedding.',
                 '7A':  'Genes that were found to be up-regulated or down-regulated with age in vBCs,'\
                        ' using the Gini impurity measure. Statistics were calculated using'\
                        ' a Monte Carlo method. Switching Age lists the cut-off point where the sliding'\
                        ' window gives the best Gini impurity difference. p-value and p-adjusted are'\
                        ' measures of significance, with and without accounting for multiple testing.'\
                        ' A gene is considered up-regulated if it is "turned on" with age, and down-regulated'\
                        ' if it is "turned off" after the switching age. Cutoff for significance was'\
                        ' p-adjusted < 0.10. As such, about 5 or 6 of the 53 genes listed here are'\
                        ' expected to not be actually switching genes.',
                 '7C':  'Measure of accuracy in predicting the age of a cell using a random forest'\
                        ' classifier. For each of our PV-INs, we show cell name, morphological type,'\
                        ' cell age, and the rate (%) with which it is predicted to be older than P25.'\
                        ' Only vBCs were using in training the classifier.'
                }
    
    df['Figure'] = [ind[:8] for ind in df.index]
    df['Figure Title'] = df['Figure'].map(figure_titles)
    df['Plot'] = [ind[7:].split('_')[0] for ind in df.index]
    df['Plot Title'] = [ind[7:] for ind in df.index]
    df['Summary'] = df['Plot Title'].map(summaries)
    
    df = df.loc[:,['Figure', 'Figure Title', 'Plot', 'Summary']]
    
    return df

def set_width(worksheet, index, name, column, column_format):
    max_len = max((
        column.astype(str).map(len).max(),
        len(str(column.name))
        ))
    
    if type(name) is tuple:
        max_len = max(max_len, max([len(nam) for nam in name])) + 1
    elif type(name) is str:
        max_len = max(max_len, len(name)+1) + 1
    else:
        max_len += 1
    
    worksheet.set_column(index, index, max_len, column_format)
    
    return

def adjust_dataframe_numbers(df):
    for col, data in df.items():
        if data.dtype == np.float64:
            if np.abs(data).max()>1:
                df[col] = np.around(data,2)
    
    return

def write_excel(writer, df, sheet_name, writer_args, wrap_text=False):
    adjust_dataframe_numbers(df)
    df.to_excel(writer, sheet_name=sheet_name)
    worksheet = writer.sheets[sheet_name]
    set_width(worksheet, 0, df.index.name, df.index, writer_args['format'])
    for idx, (col, data) in enumerate(df.items(), start=1):
        if wrap_text and col == df.columns[-1]:
            continue
        set_width(worksheet, idx, col, data, writer_args['format'])
    
    if wrap_text:
        column = df.columns[-1]
        worksheet.set_column(df.shape[1], df.shape[1], 100, writer_args['format'])
    
    return

# Define Excel Generating Functions

In [3]:
def write_summary(writer, writer_args):
    df = get_summary()
    
    write_excel(writer, df, 'Summary', writer_args, wrap_text=True)
    
    return
    
def write_1C(writer, writer_args):
    fname = 'Differential/edgeR/General/Sst-OLM vs Pvalb.txt'
    df = pd.read_csv(fname, sep='\t', header=0, index_col=0)
    df = df.loc[np.logical_and(np.abs(df.logFC)>1, df.FDR<.05),['logFC', 'PValue', 'FDR']].copy()
    df['Regulated'] = (df.logFC>0).map({True:'PV', False:'SST'})
    df.columns = ['log2 Fold Change', 'p-value', 'p-adjusted (FDR)', 'Enriched In']
    df.sort_values('p-adjusted (FDR)', inplace=True)
    
    write_excel(writer, df, 'Figure 1C', writer_args)
    
    return

def write_1D(writer, writer_args):
    fname = 'Mapping/Embeddings/Lab_Pvalb+Lab_OLM.tsv'
    df = pd.read_csv(fname, sep='\t', header=0, index_col=0)
    write_excel(writer, df, 'Figure 1D_1', writer_args)
    
    fname = 'Mapping/Embeddings/Lab_Pvalb_Only.tsv'
    df = pd.read_csv(fname, sep='\t', header=0, index_col=0)
    write_excel(writer, df, 'Figure 1D_2', writer_args)
    
    return

def write_1E(writer, writer_args):
    fname = 'Mapping/Separation Accuracy.tsv'
    df = pd.read_csv(fname, sep='\t', header=0, index_col=0)
    write_excel(writer, df, 'Figure 1E', writer_args)
    
    return

def write_2A(writer, writer_args):
    fname = 'Mapping/Embeddings/Lab_Pvalb-tpm.tsv'
    params = {'sep':'\t', 'header':0, 'index_col':0}
    df_embedding_tpm = pd.read_csv(fname, **params)

    fname = 'Datasets/Lab_Pvalb-transcriptional_labels.tsv'
    df_labels = pd.read_csv(fname, na_values='Other', **params)
    df_labels = df_labels.loc[df_labels.Age>20]
    df_embedding_tpm = df_embedding_tpm.loc[df_labels.index,:]

    df = pd.concat((df_labels.loc[:,['Morph-PV-types', 'proMMT-PV-types']],
                    df_embedding_tpm.loc[:,['nbt-SNE_X', 'nbt-SNE_Y']]),
                   axis=1)
    df.columns = ['Morphology', 'proMMT classification', 'nbt-SNE X-coordinate', 'nbt-SNE Y-coordinate']
    
    write_excel(writer, df, 'Figure 2A', writer_args)
    
    return

def write_2C(writer, writer_args):
    targets = ['proMMT']
    df = get_differential_data(targets, fdr=0.05)
    df.sort_values('p-adjusted (FDR)', inplace=True)
    write_excel(writer, df, 'Figure 2C', writer_args)
    
    return

def write_2E(writer, writer_args):
    fname = 'Mapping/Excel/Map_to_Cell_Types.xlsx'
    df = pd.read_excel(fname, sheet_name='Cell Mappings (knn = 25)', index_col=[0,1]).fillna(0).astype(int)
    
    fname = 'Datasets/Lab_Pvalb-labels.tsv'
    df_labels = pd.read_csv(fname, sep='\t', header=0, index_col=0)
    df_labels = df_labels.loc[df_labels.Age>20]
    df = df.loc[df.index.get_level_values('Cell').isin(df_labels.index)]
    
    df = pd.concat([data.value_counts() for col, data in df.iteritems()], axis=1, join='outer').fillna(0).astype(int)
    df.index = df.index.map({2:'Pvalb.Tac1', 3:'Pvalb.C1ql1'}).fillna('Other')
    df_counts = df.groupby(df.index).sum()
    
    df_counts = df_counts / df_counts.sum(axis=0) * 100
    columns = ['Chi_Squared',
               'Correlation',
               'F_Classif',
               'Harris_Genes',
               'Mutual_Info',
               'PV_Pvalb_Kobak_Lin'
              ]
    column_names = ['Chi-squared',
                    'Correlation',
                    'F-classification',
                    'Harris et al.',
                    'Mutual information',
                    'Kobak on Pvalb'
                   ]
    df_counts = df_counts.loc[:,columns].copy()
    df_counts.columns=column_names
    
    write_excel(writer, df_counts, 'Figure 2E', writer_args)
    
    return

def write_3A(writer, writer_args):
    df_ephys = get_mature_ephys()
    df_stat = summarize_ephys_data(df_ephys)
    
    write_excel(writer, df_stat, 'Figure 3A', writer_args)
    
    return

def write_3B(writer, writer_args):
    df_embed = pd.read_csv('Mapping/Embeddings/Lab_Pvalb-ephys.tsv', sep='\t', header=0, index_col=0)
    fname = 'References/marker_ref.txt'
    
    fname = 'Datasets/Lab_Pvalb-transcriptional_labels.tsv'
    kwargs = {'sep':'\t', 'header':0, 'index_col':0}
    df_labels = pd.read_csv(fname, **kwargs)
    columns = ['MorphDirectional-PV-types', 'MorphMarker-PV-types', 'proMMT-PV-types', 'Morph-PV-types']
    column_names = ['Dendro-morphological types',
                    'Axo-morphological types',
                    'proMMT types',
                    '5 morphological types',
                   ]
    df = df_labels.loc[df_embed.index, columns].copy()
    df.columns = column_names
    df['UMAP_X'] = df_embed['UMAP_X']
    df['UMAP_Y'] = df_embed['UMAP_Y']
    
    write_excel(writer, df, 'Figure 3B', writer_args)
    
    return

def write_4AE(writer, writer_args):
    targets = ['Morph-PV-types',
               'MorphMarker-PV-types',
               'MorphDirectional-PV-types'
              ]
    df = get_differential_data(targets, fdr=.15)
    df.sort_values('p-adjusted (FDR)', inplace=True)
    write_excel(writer, df, 'Figure 4A-E', writer_args)
    
    return

def write_5B(writer, writer_args):
    df_fc, df_fdr = get_differential_stats('Morph_CAM', count=8)
    write_excel(writer, df_fc, 'Figure 5B_1', writer_args)
    write_excel(writer, df_fdr, 'Figure 5B_2', writer_args)
    
    df_fc, df_fdr = get_differential_stats('Lab_CAM', count=8)
    df = pd.DataFrame(np.NaN, index=df_fc.index, columns=['log2FC', 'FDR'])
    df.FDR = df_fdr.iloc[:,0]
    df.log2FC = df_fc.iloc[:,0]
    write_excel(writer, df, 'Figure 5B_3', writer_args)
    
    return

def write_5C(writer, writer_args):
    kwargs = {'sep':'\t', 'header':0, 'index_col':0}
    file_names = ['Morphology', 'Axomorphic', 'Dendromorphic']
    sheet_names = ['Figure 5C_1', 'Figure 5C_2', 'Figure 5C_3']
    
    for file_name, sheet_name in zip(file_names, sheet_names):
        df = pd.read_csv('CAM Data/%s.tsv' % file_name, **kwargs)
        write_excel(writer, df, sheet_name, writer_args)
    
    return

def write_5D(writer, writer_args):
    kwargs = {'sep':'\t', 'header':[0,1], 'index_col':0}
    fname = 'CAM Data/PNAS heatmap.tsv'
    df = pd.read_csv(fname, **kwargs)
    
    order = ['vAAC', ' ', 'vBIC', ' ', 'hBIC', ' ', 'vBC', ' ', 'hBC']
    celltypes = df.columns.get_level_values('CellType')
    cells = [cell for celltype in order for cell in df.columns[celltypes==celltype]]
    df = df.loc[:,cells].copy()
    
    write_excel(writer, df, 'Figure 5D', writer_args)
    
    return

def write_6BD(writer, writer_args):
    df = get_sholl_data()
    
    write_excel(writer, df, 'Figure 6B-D', writer_args)
    
    return

def write_6E(writer, writer_args):
    df_ephys = get_young_old_ephys()
    df_stat = summarize_ephys_data(df_ephys)
    
    write_excel(writer, df_stat, 'Figure 6E', writer_args)
    
    return

def write_6F(writer, writer_args):
    df = pd.read_csv('Mapping/Embeddings/Lab_Pvalb_vBC-electro.tsv', sep='\t', header=0, index_col=0)
    
    write_excel(writer, df, 'Figure 6F', writer_args)
    
    return

def write_7A(writer, writer_args):
    fname = 'Gini Monte Carlo/Gene Scores.tsv'
    df_scores = pd.read_csv(fname, sep='\t', header=0, index_col=0)
    df_scores = df_scores.loc[df_scores.P_Adj <= .1].copy()
    df_scores.sort_values(['Up_Down', 'Cutoff_Age'], ascending=False, inplace=True)
    df_scores = df_scores.loc[:,['Cutoff_Age', 'P_Value', 'P_Adj', 'Up_Down']].copy()
    df_scores.columns = ['Switching Age', 'p-value', 'p-adjusted (FDR)', 'Up or down regulated']
    
    write_excel(writer, df_scores, 'Figure 7A', writer_args)
    return

def write_7C(writer, writer_args):
    kwargs = {'sep':'\t', 'header':0, 'index_col':0}
    fname = 'Datasets/Lab_Pvalb-transcriptional_labels.tsv'
    df_labels = pd.read_csv(fname, **kwargs)
    
    fname = 'Datasets/Lab_Pvalb-Age-Predictions.tsv'
    df_predict = pd.read_csv(fname, **kwargs)
    df_predict = df_predict.loc[df_labels.index,:].iloc[:,1:].copy()
    
    columns = ['CellType', 'Age', '>P25 Prediction Rate (%)']
    df_summary = pd.DataFrame(np.NaN, index=df_predict.index, columns=columns)
    df_summary['CellType'] = df_labels['Morph-PV-types']
    df_summary['Age'] = df_labels['Age']
    df_summary['>P25 Prediction Rate (%)'] = df_predict.mean(axis=1) * 100
    df_summary.sort_values('Age', inplace=True)
    
    write_excel(writer, df_summary, 'Figure 7C', writer_args)
    return

def write_figure(writer, writer_args, fignum):
    ref_funcs = {1:(write_1C, write_1D, write_1E),
                 2:(write_2A, write_2C, write_2E),
                 3:(write_3A, write_3B),
                 4:(write_4AE,),
                 5:(write_5B, write_5C, write_5D),
                 6:(write_6BD, write_6E, write_6F),
                 7:(write_7A, write_7C)
                }
    
    functions = ref_funcs[fignum]
    
    for function in functions:
        function(writer, writer_args)
        
    return

# Create Excel

In [4]:
with pd.ExcelWriter('Manuscripts/Que_et_al.xlsx', engine='xlsxwriter') as writer:
    workbook=writer.book
    base_format = workbook.add_format({'text_wrap': True, 'valign': 'top'})
    kwargs = {'format':base_format}
    write_summary(writer, kwargs)
    for figure in (1,2,3,4,5,6,7):
        write_figure(writer, kwargs, figure)

  import sys
