In [1]:
import numpy as np
import pandas as pd

In [2]:
def read_morph_types():
    """
    get the morphologically defined types of cells
    """
    
    fname = 'Datasets/Lab_Pvalb-labels.tsv'
    params = {'sep':'\t', 'header':0, 'index_col':0}
    df = pd.read_csv(fname, **params)
    
    df.columns = ['Morphology', 'BodyPosition', 'Age', 'Gender', 'Hemisphere']
    
    return df

def read_mapping_types():
    fname = 'Mapping/Excel/Nearest_Neighbors.xlsx'
    kwargs = {'sheet_name':'Mapped Types', 'index_col':0, 'dtype':str}
    df = pd.read_excel(fname, **kwargs)
    
    df.PV_Harris_Genes = 'Continent ' + df.PV_Harris_Genes
    
    df = df.fillna('')
    
    return df.PV_Harris_Genes

def read_mapping_types():
    fname = 'Mapping/Excel/Map_to_Cell_Types.xlsx'
    kwargs = {'sheet_name':'Cell Mappings (knn = 25)', 'index_col':0, 'dtype':str}
    df = pd.read_excel(fname, **kwargs)
    df.fillna('', inplace=True)
    
    return 'Continent ' + df.PV_Pvalb_Kobak_Lin

def read_prommt_types():
    fname = 'Matlab/Lab_Pvalb_144_144_nbtsne.tsv'
    df = pd.read_csv(fname, sep='\t', header=[0,1], index_col=None, skiprows=1).T
    df['CellType'] = df.index.get_level_values(1).str.strip('.')
    df.index = df.index.get_level_values(0)
    
    return df.CellType

def read_ephys_types():
    fname = 'Mapping/LabelTypes/Lab_Pvalb-ephys.tsv'
    params = {'sep':'\t', 'header':0, 'index_col':0}
    df = pd.read_csv(fname, **params)
    
    return df.EphysType

def read_mouse_labels():
    fname = 'References/Cell_Info-2.xlsx'
    df = pd.read_excel(fname, sheet_name='Sheet1', index_col=0)
    df = df[['Gender', 'Side']]
    df.Gender = df.Gender.map({'unspecified':'Too Young', 'M':'Male', 'F':'Female'})
    df.Side = df.Side.map({'L':'Left', 'R':'Right'})
    
    return df

def add_mouse_parameters(df_cells):
    df_mouse = read_mouse_labels()
    df_cells['Gender'] = df_mouse.Gender
    df_cells['Hemisphere'] = df_mouse.Side
    
    return

def get_cell_types():
    """
    Get the types of a cell according to the various methods that we use to label them
    """
    
    df_cells = read_morph_types()
    df_cells['MorphMarker'] = df_cells.Morphology.map({'vAAC':'AAC',
                                                       'vBC':'BC',
                                                       'hBC':'BC',
                                                       'vBIC':'BIC',
                                                       'hBIC':'BIC'
                                                      }
                                                     )
    df_cells['MorphDirectional'] = df_cells.Morphology.map({'vAAC':'Vertical',
                                                            'vBC':'Vertical',
                                                            'hBC':'Horizontal',
                                                            'vBIC':'Vertical',
                                                            'hBIC':'Horizontal'
                                                           })
    df_cells['Mapping'] = read_mapping_types()
    df_cells['proMMT'] = read_prommt_types()
    df_cells['Ephys'] = read_ephys_types()
    #add_mouse_parameters(df_cells)
    
    columns = ['Morphology',
               'BodyPosition',
               'MorphMarker',
               'MorphDirectional',
               'Mapping',
               'proMMT',
               'Ephys',
               'Gender',
               'Hemisphere',
               'Age'
              ]
    column_names = ['Morph-PV-types',
                    'Soma-PV-types',
                    'MorphMarker-PV-types',
                    'MorphDirectional-PV-types',
                    'Mapping-PV-types',
                    'proMMT-PV-types',
                    'Ephys-PV-types',
                    'Gender-PV-types',
                    'Hemisphere-PV-types',
                    'Age'
                   ]
    
    df_cells = df_cells[columns].copy()
    df_cells.columns = column_names
    
    return df_cells

In [3]:
df = get_cell_types()
df.to_csv('Datasets/Lab_Pvalb-transcriptional_labels.tsv', sep='\t')