In [1]:
import numpy as np
import anndata
import scanpy as sc
import pandas as pd

#path where to save the adata object
import os

path = os.path.abspath('').replace('\\', '/')+'/dataset/'

# A single cell atlas of human and mouse white adipose tissue Preparation Instructions

To utilize data from the Liver Cell Atlas, follow these steps to download and prepare the necessary files for your analysis.  
The files are large, we recommend at least 32GB of RAM to run this script.

## Step 1: Navigate to the Download Page

- Go to [https://singlecell.broadinstitute.org/single_cell/study/SCP1376/a-single-cell-atlas-of-human-and-mouse-white-adipose-tissue#study-download](https://singlecell.broadinstitute.org/single_cell/study/SCP1376/a-single-cell-atlas-of-human-and-mouse-white-adipose-tissue#study-download) in your web browser.

## Step 2: Download Required Datasets

For each species of interest, download the specified files.

### A single cell atlas of human and mouse white adipose tissue: Human

- Hs.metadata.tsv
- Hs10X.counts.barcodes.tsv.gz
- Hs10X.counts.features.tsv.gz
- Hs10X.counts.mtx.gz

### A single cell atlas of human and mouse white adipose tissue: Mouse

- Mm.metadata.tsv
- Mm10X.counts.barcodes.tsv.gz
- Mm10X.counts.features.tsv.gz
- Mm10X.counts.mtx.gz

## Step 3: Organize the Files

After downloading place all files into the `scSpecies/dataset` folder.

In [2]:
#human dataset
annot_human = pd.read_csv(path+"Hs.metadata.tsv", delimiter="\t")
annot_human.rename(columns={'cell_type__custom': 'cell_type_coarse', 'cell_subtype__custom': 'cell_type_fine', 'biosample_id': 'batch'}, inplace=True)
annot_human = annot_human.set_index('cell_id').rename_axis(None)

keep_list = [key for key in annot_human.columns if 'ontology_id' not in key]
annot_human = annot_human[keep_list]

#mouse dataset
annot_mouse = pd.read_csv(path+"Mm.metadata.tsv", delimiter="\t")

annot_mouse.rename(columns={'cell_type__custom': 'cell_type_coarse', 'cell_subtype__custom': 'cell_type_fine', 'biosample_id': 'batch'}, inplace=True)
annot_mouse = annot_mouse.set_index('cell_id').rename_axis(None)

keep_list = [key for key in annot_mouse.columns if 'ontology_id' not in key]
annot_mouse = annot_mouse[keep_list]

#rename coarse and fine cell labels to have consistent labels among all datasets
translation_dict = {
    'ASPC': 'Preadipocytes', 
    'LEC': 'Lymphatic ECs', 
    'SMC': 'Smooth muscle cells',
    'adipocyte': 'Adipocytes',
    'b_cell': 'B cells',
    'dendritic_cell': 'DCs',
    'endometrium': 'Endometrium',
    'endothelial': 'Endothelials',
    'female_epithelial': 'Female epithelials',
    'male_epithelial': 'Male epithelials',
    'macrophage': 'Macrophages',
    'mast_cell': 'Mast cells',
    'mesothelium': 'Mesothelial cells',
    'monocyte': 'Monocytes',
    'neutrophil': 'Neutrophils',
    'nk_cell': 'NK cells',
    'pericyte': 'Pericytes',
    't_cell': 'T cells',
    'hASPC1': 'Preadipocytes 1', 
    'hASPC2': 'Preadipocytes 2', 
    'hASPC3': 'Preadipocytes 3', 
    'hASPC4': 'Preadipocytes 4', 
    'hASPC5': 'Preadipocytes 5', 
    'hASPC6': 'Preadipocytes 6',
    'mASPC1': 'Preadipocytes 1', 
    'mASPC2': 'Preadipocytes 2', 
    'mASPC3': 'Preadipocytes 3',  
    'mASPC4': 'Preadipocytes 4', 
    'mASPC5': 'Preadipocytes 5', 
    'mASPC6': 'Preadipocytes 6',
    'hLEC1': 'Lymphatic ECs 1', 
    'hLEC2': 'Lymphatic ECs 2',
    'mLEC1': 'Lymphatic ECs 1', 
    'mLEC2': 'Lymphatic ECs 2',
    'hSMC1': 'Smooth muscle cells', 
    'hSMC2': 'Smooth muscle cells',
    'mSMC': 'Smooth muscle cells',
    'hAd1': 'Adipocytes 1', 
    'hAd2': 'Adipocytes 2', 
    'hAd3': 'Adipocytes 3', 
    'hAd4': 'Adipocytes 4', 
    'hAd5': 'Adipocytes 5', 
    'hAd6': 'Adipocytes 6', 
    'hAd7': 'Adipocytes 7',
    'mAd1': 'Adipocytes 1', 
    'mAd2': 'Adipocytes 2', 
    'mAd3': 'Adipocytes 3', 
    'mAd4': 'Adipocytes 4', 
    'mAd5': 'Adipocytes 5', 
    'mAd6': 'Adipocytes 6',
    'hBcell': 'B cells',
    'mBcell': 'B cells',
    'hPlasmablast': 'Plasma',
    'hcDC1': 'cDCs 1', 
    'hcDC2': 'cDCs 2', 
    'hpDC': 'pDCs', 
    'hASDC': 'ASDC', 
    'mcDC1': 'cDCs 1', 
    'mcDC2': 'cDCs 2', 
    'mDC3': 'DCs 3',
    'hEndM': 'Endothelials M', 
    'hEndoA1': 'Endothelials A1', 
    'hEndoA2': 'Endothelials A2', 
    'hEndoS1': 'Endothelials S1', 
    'hEndoS2': 'Endothelials S2', 
    'hEndoS3': 'Endothelials S3', 
    'hEndoV': 'Endothelials V',
    'mEndoA1': 'Endothelials A1', 
    'mEndoA2': 'Endothelials A2', 
    'mEndoS1': 'Endothelials S1', 
    'mEndoS2': 'Endothelials S2', 
    'mEndoV': 'Endothelials V',
    'hMac1': 'Macrophages 1', 
    'hMac2': 'Macrophages 2', 
    'hMac3': 'Macrophages 3',
    'mMac1': 'Macrophages 1', 
    'mMac2': 'Macrophages 2', 
    'mMac3': 'Macrophages 3', 
    'mMac4': 'Macrophages 4',
    'hMast': 'Mast cells',
    'mMast': 'Mast cells',
    'hMes1': 'Mesothelial 1', 
    'hMes2': 'Mesothelial 2', 
    'hMes3': 'Mesothelial 3',
    'mMes1': 'Mesothelial 1', 
    'mMes2': 'Mesothelial 2', 
    'mMes3': 'Mesothelial 3',
    'hMono1': 'Monocytes 1', 
    'hMono2': 'Monocytes 2',
    'mMono1': 'Monocytes 1', 
    'mMono2': 'Monocytes 2',
    'hNeu': 'Neutrophils',
    'mNeu': 'Neutrophils',
    'hNK': 'NK cells',
    'mNK': 'NK cells',
    'hPeri': 'Pericytes',
    'mPeri': 'Pericytes',
    'hTcell1': 'T cells 1', 
    'hTcell2': 'T cells 2', 
    'hTreg': 'Regulatory T',
    'mTcell1': 'T cells 1', 
    'mTcell2': 'T cells 2', 
    'mTcell3': 'T cells 3',
    'luminal_epithelial_AV': 'Female epithelials AV',
    'luminal_epithelial_HS': 'Female epithelials HS',
    'male_epithelial_1': 'Male epithelials 1',
    'male_epithelial_2': 'Male epithelials 2',
    'male_epithelial_3': 'Male epithelials 3',
    'myoepithelial': 'Myo epithelials',
    'mammary_fibroblast': 'Mammary fibroblasts',
}

annot_mouse.cell_type_coarse = annot_mouse.cell_type_coarse.replace(translation_dict)
annot_human.cell_type_coarse = annot_human.cell_type_coarse.replace(translation_dict)    
annot_mouse.cell_type_fine = annot_mouse.cell_type_fine.replace(translation_dict)
annot_human.cell_type_fine = annot_human.cell_type_fine.replace(translation_dict)      

In [5]:
def return_matrix(data_path):
    counts = sc.read_mtx(data_path+".counts.mtx.gz").transpose()
    counts.var_names = list(pd.read_csv(data_path+".counts.features.tsv.gz", header=None, delimiter="\t")[1])
    counts.obs_names = list(pd.read_csv(data_path+".counts.barcodes.tsv.gz", header=None)[0])
    return counts




In [6]:
adata_mouse = return_matrix(path + 'Mm10X')

_, ind_a, ind_b = np.intersect1d(adata_mouse.obs_names, annot_mouse.index, return_indices=True)

adata_mouse = adata_mouse[ind_a]
annot_mouse = annot_mouse.iloc[ind_b]

adata_mouse.obs = annot_mouse
adata_mouse.write_h5ad(path+'adipose_mouse.h5ad')

adata_human = return_matrix(path + 'Hs10X')

_, ind_a, ind_b = np.intersect1d(adata_human.obs_names, annot_human.index, return_indices=True)

adata_human = adata_human[ind_a]
annot_human = annot_human.iloc[ind_b]

adata_human.obs = annot_human
adata_human.write_h5ad(path+'adipose_human.h5ad')