In [1]:
import pandas as pd
import os

metadata_folder = os.path.join('..', 'metadata' )

%load_ext autoreload
%autoreload 2

pd.options.display.max_columns = 100

In [2]:
old_to_new_name = {'Mammary_Gland': "Mammary",
            "Brain_FACS_microglia": "Brain_microglia",
            "Brain_FACS_neurons": "Brain_neurons"}

## Rename plate Metadata

In [3]:
csv = os.path.join(metadata_folder, 'MACA_Metadata.csv')
plates = pd.read_csv(csv, index_col=0)
print(plates.shape)
plates.head()

(4190, 24)


Unnamed: 0,Lysis Plate Batch,dNTP.batch,oligodT.order.no,plate.type,preparation.site,date.prepared,date.sorted,tissue,subtissue,mouse.id,FACS.selection,nozzle.size,FACS.instument,Experiment ID,Columns sorted,Double check,Plate,Location,Comments,mouse.age,mouse.number,mouse.sex,mouse.parabiont,mouse.parabiont_pair
,,,,,,,,,,,,,,,,,,,,,,,,
MAA100002,,,,Biorad 96well,Stanford,,170522,?,?,?,?,?,?,,,,,,,,,,False,
MAA100021,,,,Biorad 96well,Stanford,,?,?,?,?,?,?,?,,,,,,,,,,False,
D041901,2.0,490668.0,4/25/17,Biorad HSP3805,Biohub,6/9/17,170614,Bladder,,1_7_M,Multiple,,,,,,1.0,MACA2_2,,1.0,7.0,M,False,
D041914,2.0,490668.0,4/25/17,Biorad HSP3805,Biohub,6/9/17,170615,Bladder,,3_8_M,Multiple,100,Aria,exp8,,,,MACA4_7,,3.0,8.0,M,False,
D042205,2.0,490668.0,4/25/17,Biorad HSP3805,Biohub,6/9/17,170614,Bladder,,1_6_M,Multiple,,,,,,1.0,MACA2_2,,1.0,6.0,M,False,


In [4]:
plates.columns

Index(['Lysis Plate Batch', 'dNTP.batch', 'oligodT.order.no', 'plate.type',
       'preparation.site', 'date.prepared', 'date.sorted', 'tissue',
       'subtissue', 'mouse.id', 'FACS.selection', 'nozzle.size',
       'FACS.instument', 'Experiment ID ', 'Columns sorted', 'Double check',
       'Plate', 'Location ', 'Comments', 'mouse.age', 'mouse.number',
       'mouse.sex', 'mouse.parabiont', 'mouse.parabiont_pair'],
      dtype='object')

In [5]:
plates.fillna(".").groupby('tissue').size()

tissue
?                  2
Bladder          106
Brain            727
Colon            242
Fat              471
Heart            332
Kidney           149
Liver            181
Lung             228
Mammary_Gland     47
Marrow           422
Muscle           462
Pancreas         125
Skin             170
Spleen           149
Thymus           173
Tongue           108
Trachea           96
dtype: int64

In [6]:
subset = plates.query('tissue == "Brain"')
subset.fillna('.').groupby('FACS.selection').size()

FACS.selection
.              4
Microglia    242
Multiple       2
Neurons      479
dtype: int64

In [11]:
valid_facs_selections = ('Microglia', 'Neurons')

def rename_tissue(row):
    if row['tissue'] == 'Brain':
        if row['FACS.selection'] in valid_facs_selections:
            return row['tissue'] + '_' + row['FACS.selection']
        else:
            return row['tissue']
    elif row['tissue'] in old_to_new_name:
        return old_to_new_name[row['tissue']]
    else:
        return row['tissue']

In [12]:
plates['tissue_v2'] = plates.apply(rename_tissue, axis=1)
plates.groupby(['tissue', 'tissue_v2']).size()

tissue         tissue_v2      
?              ?                    2
Bladder        Bladder            106
Brain          Brain                6
               Brain_Microglia    242
               Brain_Neurons      479
Colon          Colon              242
Fat            Fat                471
Heart          Heart              332
Kidney         Kidney             149
Liver          Liver              181
Lung           Lung               228
Mammary_Gland  Mammary             47
Marrow         Marrow             422
Muscle         Muscle             462
Pancreas       Pancreas           125
Skin           Skin               170
Spleen         Spleen             149
Thymus         Thymus             173
Tongue         Tongue             108
Trachea        Trachea             96
dtype: int64

### Save the CSV!

In [16]:
print(csv)
plates.to_csv(csv)

../metadata/MACA_Metadata.csv


## Rename 10x metadata

In [17]:
csv = os.path.join(metadata_folder, 'MACA_10x.csv')
tenx = pd.read_csv(csv, index_col=0)
print(tenx.shape)
tenx.head()

(94, 13)


Unnamed: 0_level_0,tenx.index,mouse.id,tissue,tissue.notes,mouse.age,mouse.sex,mouse.parabiont,mouse.parabiont_pair,pool,url.csv,url.mtx,url.genes,url.barcodes
channel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
10X_P1_1,A3,30-M-2,Kidney,KIDNEY,30,M,False,,1,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...
10X_P1_2,B3,30-M-2,Spleen,SPLEEN,30,M,False,,1,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...
10X_P1_3,C3,30-M-2,Heart,HEART (ALL MINUS AORTA),30,M,False,,1,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...
10X_P1_4,D3,30-M-2,Heart,HEART (LV+RV ONLY),30,M,False,,1,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...
10X_P1_5,E3,30-M-2,Marrow,BM (NON-STC),30,M,False,,1,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...,https://s3.amazonaws.com/czbiohub-maca/10x_dat...


In [18]:
tenx.groupby('tissue').size()

tissue
Bladder           4
Brain             9
Colon             3
Fat               2
Heart             6
Kidney           11
Liver            11
Lung             12
Mammary_Gland     2
Marrow            8
Muscle            6
Pancreas          3
Spleen            9
Thymus            1
Tongue            5
Trachea           2
dtype: int64

In [20]:
tenx['tissue_v2'] = tenx['tissue'].map(lambda x: old_to_new_name[x] if x in old_to_new_name else x)
tenx.groupby(['tissue', 'tissue_v2']).size()

tissue         tissue_v2
Bladder        Bladder       4
Brain          Brain         9
Colon          Colon         3
Fat            Fat           2
Heart          Heart         6
Kidney         Kidney       11
Liver          Liver        11
Lung           Lung         12
Mammary_Gland  Mammary       2
Marrow         Marrow        8
Muscle         Muscle        6
Pancreas       Pancreas      3
Spleen         Spleen        9
Thymus         Thymus        1
Tongue         Tongue        5
Trachea        Trachea       2
dtype: int64

### Save the CSV!

In [21]:
print(csv)
tenx.to_csv(csv)

../metadata/MACA_10x.csv


## Rename manual annotations

In [27]:
! ls $metadata_folder/manual_annotations

[1m[36mAorta[m[m                [1m[36mHeart[m[m                [1m[36mPancreas[m[m
[1m[36mBladder[m[m              [1m[36mKidney[m[m               [1m[36mSkin[m[m
[1m[36mBrain_FACS_microglia[m[m [1m[36mLiver[m[m                [1m[36mSpleen[m[m
[1m[36mBrain_FACS_neurons[m[m   [1m[36mLung[m[m                 [1m[36mThymus[m[m
[1m[36mColon[m[m                [1m[36mMammary_Gland[m[m        [1m[36mTongue[m[m
[1m[36mDiaphragm[m[m            [1m[36mMarrow[m[m               [1m[36mTrachea[m[m
[1m[36mFat[m[m                  [1m[36mMuscle[m[m


In [14]:
tissue_folders = ! ls $metadata_folder/manual_annotations

base_folder = os.path.join(metadata_folder, 'manual_annotations')

for folder in tissue_folders:
    print(folder)
    if folder in old_to_new_name:
        renamed_folder = old_to_new_name[folder]
        ! mv $base_folder/$folder $base_folder/$renamed_folder

Aorta
Bladder
Brain_FACS_microglia
Brain_FACS_neurons
Colon
Diaphragm
Fat
Heart
Kidney
Liver
Lung
Mammary_Gland
Marrow
Muscle
Pancreas
Skin
Spleen
Thymus
Tongue
Trachea


In [15]:
! ls $metadata_folder/manual_annotations

[1m[36mAorta[m[m           [1m[36mColon[m[m           [1m[36mKidney[m[m          [1m[36mMarrow[m[m          [1m[36mSpleen[m[m
[1m[36mBladder[m[m         [1m[36mDiaphragm[m[m       [1m[36mLiver[m[m           [1m[36mMuscle[m[m          [1m[36mThymus[m[m
[1m[36mBrain_microglia[m[m [1m[36mFat[m[m             [1m[36mLung[m[m            [1m[36mPancreas[m[m        [1m[36mTongue[m[m
[1m[36mBrain_neurons[m[m   [1m[36mHeart[m[m           [1m[36mMammary[m[m         [1m[36mSkin[m[m            [1m[36mTrachea[m[m
