Spyros has gone through the plate annotations and re-annotated the cells using the controlled vocabulary of the [Cell Ontology](http://cellontology.org)

In [1]:
import pandas as pd
import os
import numpy as np

from maca import clean_annotation, clean_labels

metadata_folder = os.path.join('..', 'metadata' )

summary_folder = os.path.join(metadata_folder, 'summary')

annotation_folder = os.path.join(metadata_folder, 'manual_annotations')


%load_ext autoreload
%autoreload 2

In [2]:
ontology_renamer = {"Class": 'cell_ontology_class', 'Term IRI': "cell_ontology_term_iri"}
ontology_renamer

{'Class': 'cell_ontology_class', 'Term IRI': 'cell_ontology_term_iri'}

In [3]:


csv = os.path.join(annotation_folder, 'maca_3month_onotology_annotations_plates.csv')
plates_ontology = pd.read_csv(csv)
plates_ontology = plates_ontology.fillna('.')
plates_ontology['annotation_cleaned'] = plates_ontology['annotation_cleaned'].map(lambda x: x.split('_cells')[0]
        if not x in ('t_cells', 'b_cells') else x)

plates_ontology['annotation_cleaned'] = plates_ontology['annotation_cleaned'].replace(
    'macrophages/dendritic','myeloid')
plates_ontology['annotation_cleaned'] = plates_ontology['annotation_cleaned'].replace(
    'stromal_mesenchymal_stem_cell','stromal_stem')


old_to_new_name = {'Mammary_Gland': "Mammary",
            "Brain_FACS_microglia": "Brain_Microglia",
            "Brain_FACS_neurons": "Brain_Neurons"}
plates_ontology['tissue'] = plates_ontology['tissue'].replace(old_to_new_name)
# cols_to_drop = [x for x in plates_with_ontology if x.startswith('Unnamed')]
# plates_with_ontology = plates_with_ontology.drop(cols_to_drop, axis=1)
print(plates_ontology.shape)
plates_ontology.head(20)

(211, 8)


Unnamed: 0,tissue,annotation_original,subannotation_original,annotation_cleaned,subannotation_cleaned,n_cells,Class,Term IRI
0,Aorta,adipocytes,.,adipocytes,.,54,epicardial adipocyte,http://purl.obolibrary.org/obo/CL_1000309
1,Aorta,heterogenous group of cells,adipocytes,adipocytes,.,39,epicardial adipocyte,http://purl.obolibrary.org/obo/CL_1000309
2,Aorta,endothelial cells,.,endothelial,.,131,endothelial cell,http://purl.obolibrary.org/obo/CL_0000115
3,Aorta,fibroblasts,.,fibroblasts,.,35,fibroblast,http://purl.obolibrary.org/obo/CL_0000057
4,Aorta,hematopoetic cells,.,hematopoietic,.,17,hematopoietic cell,http://purl.obolibrary.org/obo/CL_0000988
5,Aorta,heterogenous group of cells,smooth muscle cells,smooth_muscle,.,37,smooth muscle cell,http://purl.obolibrary.org/obo/CL_0000192
6,Aorta,heterogenous group of cells,erythroblasts and adipocytes,unknown,.,51,.,.
7,Bladder,Basal,.,basal,.,99,basal cell of urothelium,http://purl.obolibrary.org/obo/CL_1000486
8,Bladder,LuminalA1,.,luminal,a,191,bladder cell,http://purl.obolibrary.org/obo/CL_1001319
9,Bladder,LuminalA2,.,luminal,a,245,bladder cell,http://purl.obolibrary.org/obo/CL_1001319


In [4]:
plates_ontology.head().values

array([['Aorta', 'adipocytes', '.', 'adipocytes', '.', 54,
        'epicardial adipocyte', 'http://purl.obolibrary.org/obo/CL_1000309'],
       ['Aorta', 'heterogenous group of cells', 'adipocytes', 'adipocytes',
        '.', 39, 'epicardial adipocyte',
        'http://purl.obolibrary.org/obo/CL_1000309'],
       ['Aorta', 'endothelial cells', '.', 'endothelial', '.', 131,
        'endothelial cell', 'http://purl.obolibrary.org/obo/CL_0000115'],
       ['Aorta', 'fibroblasts', '.', 'fibroblasts', '.', 35, 'fibroblast',
        'http://purl.obolibrary.org/obo/CL_0000057'],
       ['Aorta', 'hematopoetic cells', '.', 'hematopoietic', '.', 17,
        'hematopoietic cell', 'http://purl.obolibrary.org/obo/CL_0000988']], dtype=object)

In [5]:
plates_ontology.groupby(['tissue', 'annotation_cleaned','subannotation_cleaned', 'Class']).size().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,0
tissue,annotation_cleaned,subannotation_cleaned,Class,Unnamed: 4_level_1
Aorta,adipocytes,.,epicardial adipocyte,2
Aorta,endothelial,.,endothelial cell,1
Aorta,fibroblasts,.,fibroblast,1
Aorta,hematopoietic,.,hematopoietic cell,1
Aorta,smooth_muscle,.,smooth muscle cell,1
Aorta,unknown,.,.,1
Bladder,basal,.,basal cell of urothelium,1
Bladder,luminal,a,bladder cell,2
Bladder,luminal,b,bladder cell,1
Bladder,mesenchymal,a,mesenchymal cell,2


In [6]:
ontology_cols = ['tissue', 'annotation_cleaned', 'subannotation_cleaned', 'Class', 'Term IRI']

# plate_ontology_subset = plates_ontology[ontology_cols].replace('.', np.nan)
print(plates_ontology.shape)
plates_ontology = plates_ontology.drop_duplicates(subset=ontology_cols)
print(plates_ontology.shape)
plates_ontology.head()

(211, 8)
(155, 8)


Unnamed: 0,tissue,annotation_original,subannotation_original,annotation_cleaned,subannotation_cleaned,n_cells,Class,Term IRI
0,Aorta,adipocytes,.,adipocytes,.,54,epicardial adipocyte,http://purl.obolibrary.org/obo/CL_1000309
2,Aorta,endothelial cells,.,endothelial,.,131,endothelial cell,http://purl.obolibrary.org/obo/CL_0000115
3,Aorta,fibroblasts,.,fibroblasts,.,35,fibroblast,http://purl.obolibrary.org/obo/CL_0000057
4,Aorta,hematopoetic cells,.,hematopoietic,.,17,hematopoietic cell,http://purl.obolibrary.org/obo/CL_0000988
5,Aorta,heterogenous group of cells,smooth muscle cells,smooth_muscle,.,37,smooth muscle cell,http://purl.obolibrary.org/obo/CL_0000192


In [7]:
plate_ontology_subset = plates_ontology[ontology_cols]
plate_ontology_subset = plate_ontology_subset.set_index(['tissue', 'annotation_cleaned', 'subannotation_cleaned'])
plate_ontology_subset = plate_ontology_subset.sort_index()
plate_ontology_subset.index.names = ['tissue', 'annotation', 'subannotation']
print(plate_ontology_subset.shape)
plate_ontology_subset.head()

(155, 2)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Class,Term IRI
tissue,annotation,subannotation,Unnamed: 3_level_1,Unnamed: 4_level_1
Aorta,adipocytes,.,epicardial adipocyte,http://purl.obolibrary.org/obo/CL_1000309
Aorta,endothelial,.,endothelial cell,http://purl.obolibrary.org/obo/CL_0000115
Aorta,fibroblasts,.,fibroblast,http://purl.obolibrary.org/obo/CL_0000057
Aorta,hematopoietic,.,hematopoietic cell,http://purl.obolibrary.org/obo/CL_0000988
Aorta,smooth_muscle,.,smooth muscle cell,http://purl.obolibrary.org/obo/CL_0000192


### Read annotation

In [8]:
csv = os.path.join(summary_folder, 'maca_3month_annotations_plates.csv')
plate_annotation = pd.read_csv(csv, index_col=0)
plate_annotation = plate_annotation.fillna('.')
print(plate_annotation.shape)
plate_annotation.head()

(42192, 5)


Unnamed: 0,annotation,plate.barcode,subannotation,tissue,annotation_subannotation
A21.MAA000594.3_8_M.1.1,fibroblasts,MAA000594,.,Aorta,fibroblasts
F8.MAA000594.3_8_M.1.1,unknown,MAA000594,.,Aorta,unknown
H11.MAA000594.3_8_M.1.1,unknown,MAA000594,.,Aorta,unknown
A22.MAA000594.3_8_M.1.1,unknown,MAA000594,.,Aorta,unknown
H12.MAA000594.3_8_M.1.1,adipocytes,MAA000594,.,Aorta,adipocytes


In [9]:
left_on = ['tissue', 'annotation', 'subannotation']


In [10]:
plate_annotation.query('tissue == "Thymus"').groupby(left_on).size().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0
tissue,annotation,subannotation,Unnamed: 3_level_1
Thymus,stromal_stem,.,33
Thymus,t_cells,double_negative1,32
Thymus,t_cells,double_negative4,92
Thymus,t_cells,double_positive,44
Thymus,t_cells,double_positive_favoring_cd8+,637
Thymus,t_cells,double_positive_rapidly_dividing_favoring_cd8+,103
Thymus,t_cells,immature_single_positive_cd4+,280
Thymus,t_cells,immature_single_positive_cd8+,62


In [11]:
plate_ontology_subset.loc['Thymus', :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Class,Term IRI
annotation,subannotation,Unnamed: 2_level_1,Unnamed: 3_level_1
stromal_stem,.,mesenchymal stem cell,http://purl.obolibrary.org/obo/CL_0000134
t_cells,double_negative1,T cell,http://purl.obolibrary.org/obo/CL_0000084
t_cells,double_negative4,T cell,http://purl.obolibrary.org/obo/CL_0000084
t_cells,double_positive,T cell,http://purl.obolibrary.org/obo/CL_0000084
t_cells,double_positive_favoring_cd8+,T cell,http://purl.obolibrary.org/obo/CL_0000084
t_cells,double_positive_rapidly_dividing_favoring_cd8+,T cell,http://purl.obolibrary.org/obo/CL_0000084
t_cells,immature_single_positive_cd4+,T cell,http://purl.obolibrary.org/obo/CL_0000084
t_cells,immature_single_positive_cd8+,T cell,http://purl.obolibrary.org/obo/CL_0000084


In [12]:
# right_on = ['tissue', 'annotation_cleaned', 'subannotation_cleaned']
plate_annotation_with_ontology = plate_annotation.join(
    plate_ontology_subset, on=left_on)
# plate_annotation_with_ontology = plate_annotation_with_ontology.drop_duplicates()
print(plate_annotation_with_ontology.shape)
plate_annotation_with_ontology.fillna('NA').groupby(left_on + ['Class']).size().to_frame()

(42192, 7)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,0
tissue,annotation,subannotation,Class,Unnamed: 4_level_1
Aorta,adipocytes,.,epicardial adipocyte,93
Aorta,endothelial,.,endothelial cell,131
Aorta,fibroblasts,.,fibroblast,35
Aorta,hematopoietic,.,hematopoietic cell,17
Aorta,smooth_muscle,.,smooth muscle cell,37
Aorta,unknown,.,.,51
Bladder,basal,.,basal cell of urothelium,99
Bladder,luminal,a,bladder cell,436
Bladder,luminal,b,bladder cell,96
Bladder,mesenchymal,a,mesenchymal cell,335


In [13]:
plate_annotation_with_ontology.loc[plate_annotation_with_ontology['Class'].isnull()].drop_duplicates()

Unnamed: 0,annotation,plate.barcode,subannotation,tissue,annotation_subannotation,Class,Term IRI


In [14]:
plate_annotation_with_ontology.head()

Unnamed: 0,annotation,plate.barcode,subannotation,tissue,annotation_subannotation,Class,Term IRI
A21.MAA000594.3_8_M.1.1,fibroblasts,MAA000594,.,Aorta,fibroblasts,fibroblast,http://purl.obolibrary.org/obo/CL_0000057
F8.MAA000594.3_8_M.1.1,unknown,MAA000594,.,Aorta,unknown,.,.
H11.MAA000594.3_8_M.1.1,unknown,MAA000594,.,Aorta,unknown,.,.
A22.MAA000594.3_8_M.1.1,unknown,MAA000594,.,Aorta,unknown,.,.
H12.MAA000594.3_8_M.1.1,adipocytes,MAA000594,.,Aorta,adipocytes,epicardial adipocyte,http://purl.obolibrary.org/obo/CL_1000309


### Unify NAs

In [15]:
plate_annotation_with_ontology = plate_annotation_with_ontology.replace('.', np.nan).replace('NA', np.nan)
print(plate_annotation_with_ontology.isnull().sum().sum())
plate_annotation_with_ontology[['Class', 'Term IRI']] = plate_annotation_with_ontology[['Class', 'Term IRI']].fillna('unknown')
print(plate_annotation_with_ontology.isnull().sum().sum())
plate_annotation_with_ontology.head()

27047
26593


Unnamed: 0,annotation,plate.barcode,subannotation,tissue,annotation_subannotation,Class,Term IRI
A21.MAA000594.3_8_M.1.1,fibroblasts,MAA000594,,Aorta,fibroblasts,fibroblast,http://purl.obolibrary.org/obo/CL_0000057
F8.MAA000594.3_8_M.1.1,unknown,MAA000594,,Aorta,unknown,unknown,unknown
H11.MAA000594.3_8_M.1.1,unknown,MAA000594,,Aorta,unknown,unknown,unknown
A22.MAA000594.3_8_M.1.1,unknown,MAA000594,,Aorta,unknown,unknown,unknown
H12.MAA000594.3_8_M.1.1,adipocytes,MAA000594,,Aorta,adipocytes,epicardial adipocyte,http://purl.obolibrary.org/obo/CL_1000309


### Add cell ontology ID

In [16]:
plate_annotation_with_ontology['cell_ontology_id'] = 'CL:' + plate_annotation_with_ontology['Term IRI'].str.split('_').str[-1]
plate_annotation_with_ontology.head()

Unnamed: 0,annotation,plate.barcode,subannotation,tissue,annotation_subannotation,Class,Term IRI,cell_ontology_id
A21.MAA000594.3_8_M.1.1,fibroblasts,MAA000594,,Aorta,fibroblasts,fibroblast,http://purl.obolibrary.org/obo/CL_0000057,CL:0000057
F8.MAA000594.3_8_M.1.1,unknown,MAA000594,,Aorta,unknown,unknown,unknown,CL:unknown
H11.MAA000594.3_8_M.1.1,unknown,MAA000594,,Aorta,unknown,unknown,unknown,CL:unknown
A22.MAA000594.3_8_M.1.1,unknown,MAA000594,,Aorta,unknown,unknown,unknown,CL:unknown
H12.MAA000594.3_8_M.1.1,adipocytes,MAA000594,,Aorta,adipocytes,epicardial adipocyte,http://purl.obolibrary.org/obo/CL_1000309,CL:1000309


### Make all columns lowercase with underscores

In [17]:
plate_annotation_with_ontology = plate_annotation_with_ontology.rename(columns=ontology_renamer)
plate_annotation_with_ontology.head()

Unnamed: 0,annotation,plate.barcode,subannotation,tissue,annotation_subannotation,cell_ontology_class,cell_ontology_term_iri,cell_ontology_id
A21.MAA000594.3_8_M.1.1,fibroblasts,MAA000594,,Aorta,fibroblasts,fibroblast,http://purl.obolibrary.org/obo/CL_0000057,CL:0000057
F8.MAA000594.3_8_M.1.1,unknown,MAA000594,,Aorta,unknown,unknown,unknown,CL:unknown
H11.MAA000594.3_8_M.1.1,unknown,MAA000594,,Aorta,unknown,unknown,unknown,CL:unknown
A22.MAA000594.3_8_M.1.1,unknown,MAA000594,,Aorta,unknown,unknown,unknown,CL:unknown
H12.MAA000594.3_8_M.1.1,adipocytes,MAA000594,,Aorta,adipocytes,epicardial adipocyte,http://purl.obolibrary.org/obo/CL_1000309,CL:1000309


### Write CSV

In [18]:
csv = os.path.join(metadata_folder, 'maca_3month_annotations_plates_ontology.csv')
plate_annotation_with_ontology.to_csv(csv)

## Replace 10x tooo

### Read 10x ontology

In [19]:
csv = os.path.join(annotation_folder, 'maca_3month_onotology_annotations_10x.csv')
tenx_ontology = pd.read_csv(csv)
tenx_ontology = tenx_ontology.fillna('.')
tenx_ontology['annotation_cleaned'] = tenx_ontology['annotation_cleaned'].map(lambda x: x.split('_cells')[0]
        if not x in ('t_cells', 'b_cells') else x)

tenx_ontology['annotation_cleaned'] = tenx_ontology['annotation_cleaned'].replace(
    'macrophages/dendritic','myeloid')
tenx_ontology['annotation_cleaned'] = tenx_ontology['annotation_cleaned'].replace(
    'stromal_mesenchymal_stem_cell','stromal_stem')


old_to_new_name = {'Mammary_Gland': "Mammary",
            "Brain_FACS_microglia": "Brain_Microglia",
            "Brain_FACS_neurons": "Brain_Neurons"}
tenx_ontology['tissue'] = tenx_ontology['tissue'].replace(old_to_new_name)
# cols_to_drop = [x for x in tenx_with_ontology if x.startswith('Unnamed')]
# tenx_with_ontology = tenx_with_ontology.drop(cols_to_drop, axis=1)
print(tenx_ontology.shape)
tenx_ontology.head(20)

(125, 8)


Unnamed: 0,tissue,annotation_original,subannotation_original,annotation_cleaned,subannotation_cleaned,n_cells,Class,Term IRI
0,Bladder,Basal,.,basal,.,266,basal cell of urothelium,http://purl.obolibrary.org/obo/CL_1000486
1,Bladder,Endothelial cells,.,endothelial,.,68,endothelial cell,http://purl.obolibrary.org/obo/CL_0000115
2,Bladder,Immune cells,.,immune,.,57,leukocyte,http://purl.obolibrary.org/obo/CL_0000738
3,Bladder,LuminalA1,.,luminal,a,313,bladder cell,http://purl.obolibrary.org/obo/CL_1001319
4,Bladder,LuminalA2,.,luminal,a,219,bladder cell,http://purl.obolibrary.org/obo/CL_1001319
5,Bladder,LuminalB,.,luminal,b,391,bladder cell,http://purl.obolibrary.org/obo/CL_1001319
6,Bladder,MesenchymalA1,.,mesenchymal,a,169,mesenchymal cell,http://purl.obolibrary.org/obo/CL_0008019
7,Bladder,MesenchymalA2,.,mesenchymal,a,501,mesenchymal cell,http://purl.obolibrary.org/obo/CL_0008019
8,Bladder,MesenchymalB1,.,mesenchymal,b,187,mesenchymal cell,http://purl.obolibrary.org/obo/CL_0008019
9,Bladder,MesenchymalB2,.,mesenchymal,b,329,mesenchymal cell,http://purl.obolibrary.org/obo/CL_0008019


In [20]:
# plate_ontology_subset = tenx_ontology[ontology_cols].replace('.', np.nan)
print(tenx_ontology.shape)
tenx_ontology = tenx_ontology.drop_duplicates(subset=ontology_cols)
print(tenx_ontology.shape)
tenx_ontology.head()

(125, 8)
(110, 8)


Unnamed: 0,tissue,annotation_original,subannotation_original,annotation_cleaned,subannotation_cleaned,n_cells,Class,Term IRI
0,Bladder,Basal,.,basal,.,266,basal cell of urothelium,http://purl.obolibrary.org/obo/CL_1000486
1,Bladder,Endothelial cells,.,endothelial,.,68,endothelial cell,http://purl.obolibrary.org/obo/CL_0000115
2,Bladder,Immune cells,.,immune,.,57,leukocyte,http://purl.obolibrary.org/obo/CL_0000738
3,Bladder,LuminalA1,.,luminal,a,313,bladder cell,http://purl.obolibrary.org/obo/CL_1001319
5,Bladder,LuminalB,.,luminal,b,391,bladder cell,http://purl.obolibrary.org/obo/CL_1001319


In [21]:
tenx_ontology_subset = tenx_ontology[ontology_cols]
tenx_ontology_subset = tenx_ontology_subset.set_index(['tissue', 'annotation_cleaned', 'subannotation_cleaned'])
tenx_ontology_subset = tenx_ontology_subset.sort_index()
tenx_ontology_subset.index.names = ['tissue', 'annotation', 'subannotation']
print(tenx_ontology_subset.shape)
tenx_ontology_subset.head()

(110, 2)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Class,Term IRI
tissue,annotation,subannotation,Unnamed: 3_level_1,Unnamed: 4_level_1
Bladder,basal,.,basal cell of urothelium,http://purl.obolibrary.org/obo/CL_1000486
Bladder,endothelial,.,endothelial cell,http://purl.obolibrary.org/obo/CL_0000115
Bladder,immune,.,leukocyte,http://purl.obolibrary.org/obo/CL_0000738
Bladder,luminal,a,bladder cell,http://purl.obolibrary.org/obo/CL_1001319
Bladder,luminal,b,bladder cell,http://purl.obolibrary.org/obo/CL_1001319


### Read 10x annotation

In [22]:
csv = os.path.join(summary_folder, 'maca_3month_annotations_10x.csv')
tenx_annotation = pd.read_csv(csv, index_col=0)
tenx_annotation = tenx_annotation.fillna('.')
print(tenx_annotation.shape)
tenx_annotation.head()

(54837, 4)


Unnamed: 0,annotation,subannotation,tissue,annotation_subannotation
10X_P4_3_AAAGTAGAGATGCCAG,mesenchymal,b,Bladder,mesenchymal: b
10X_P4_3_AACCGCGTCCAACCAA,mesenchymal,a,Bladder,mesenchymal: a
10X_P4_3_AACTCCCGTCGGGTCT,mesenchymal,a,Bladder,mesenchymal: a
10X_P4_3_AACTCTTAGTTGCAGG,luminal,a,Bladder,luminal: a
10X_P4_3_AACTCTTTCATAACCG,mesenchymal,a,Bladder,mesenchymal: a


### Join the ontology and annotations

In [23]:
# right_on = ['tissue', 'annotation_cleaned', 'subannotation_cleaned']
tenx_annotation_with_ontology = tenx_annotation.join(
    tenx_ontology_subset, on=left_on)
# tenx_annotation_with_ontology = tenx_annotation_with_ontology.drop_duplicates()
print(tenx_annotation_with_ontology.shape)
tenx_annotation_with_ontology.fillna('NA').groupby(left_on + ['Class']).size().to_frame()

(54837, 6)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,0
tissue,annotation,subannotation,Class,Unnamed: 4_level_1
Bladder,basal,.,basal cell of urothelium,266
Bladder,endothelial,.,endothelial cell,68
Bladder,immune,.,leukocyte,57
Bladder,luminal,a,bladder cell,532
Bladder,luminal,b,bladder cell,391
Bladder,mesenchymal,a,mesenchymal cell,670
Bladder,mesenchymal,b,mesenchymal cell,516
Heart,cardiomyocytes,.,cardiac muscle cell,83
Heart,endothelial,coronary_vascular,endothelial cell,178
Heart,endothelial,endocardial,endocardial cell,65


In [24]:
tenx_annotation_with_ontology.loc[tenx_annotation_with_ontology['Class'].isnull()].drop_duplicates()

Unnamed: 0,annotation,subannotation,tissue,annotation_subannotation,Class,Term IRI


In [25]:
tenx_annotation_with_ontology.loc[tenx_annotation_with_ontology['annotation'] == 'unknown'].drop_duplicates()

Unnamed: 0,annotation,subannotation,tissue,annotation_subannotation,Class,Term IRI
10X_P7_8_ACTTGTTCATATGGTC,unknown,.,Lung,unknown,.,.
10X_P7_14_AAACCTGGTGCCTGGT,unknown,.,Muscle,unknown,.,.
10X_P4_0_AAGGCAGGTGGTTTCA,unknown,.,Tongue,unknown,.,.


### Unify NAs

In [26]:
tenx_annotation_with_ontology.isnull().sum().sum()

0

In [27]:
tenx_annotation_with_ontology = tenx_annotation_with_ontology.replace('.', np.nan)
print(tenx_annotation_with_ontology.isnull().sum().sum())
tenx_annotation_with_ontology[['Class', 'Term IRI']] = tenx_annotation_with_ontology[['Class', 'Term IRI']].fillna('unknown')
tenx_annotation_with_ontology.isnull().sum().sum()

29397


28601

### Rename ontology columns

In [28]:
tenx_annotation_with_ontology = tenx_annotation_with_ontology.rename(columns=ontology_renamer)
tenx_annotation_with_ontology.head()

Unnamed: 0,annotation,subannotation,tissue,annotation_subannotation,cell_ontology_class,cell_ontology_term_iri
10X_P4_3_AAAGTAGAGATGCCAG,mesenchymal,b,Bladder,mesenchymal: b,mesenchymal cell,http://purl.obolibrary.org/obo/CL_0008019
10X_P4_3_AACCGCGTCCAACCAA,mesenchymal,a,Bladder,mesenchymal: a,mesenchymal cell,http://purl.obolibrary.org/obo/CL_0008019
10X_P4_3_AACTCCCGTCGGGTCT,mesenchymal,a,Bladder,mesenchymal: a,mesenchymal cell,http://purl.obolibrary.org/obo/CL_0008019
10X_P4_3_AACTCTTAGTTGCAGG,luminal,a,Bladder,luminal: a,bladder cell,http://purl.obolibrary.org/obo/CL_1001319
10X_P4_3_AACTCTTTCATAACCG,mesenchymal,a,Bladder,mesenchymal: a,mesenchymal cell,http://purl.obolibrary.org/obo/CL_0008019


### Write CSVs

In [29]:
csv = os.path.join(metadata_folder, 'maca_3month_annotations_tenx.csv')
tenx_annotation_with_ontology.to_csv(csv)