### adapted from https://github.com/dpcook/fun_analysis/blob/master/tabula_muris/mouse_atlas_scanpy.ipynb

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
import scanpy.api as sc
from igraph import *
from MulticoreTSNE import MulticoreTSNE as TSNE #faster TSNE alternative
from anndata import read_h5ad
import dill # for saving everything


  from ._conv import register_converters as _register_converters


# Load data

## 18 months

In [None]:

# females
path = '/data/maca/data/MACA_18m_F_AORTA_50/'
adata_aorta_18m_f_50 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_aorta_18m_f_50.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_aorta_18m_f_50.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_aorta_18m_f_50.obs['Tissue'] = 'Aorta'
adata_aorta_18m_f_50.obs['subtissue'] = ''
adata_aorta_18m_f_50.obs['Age'] = '18m'
adata_aorta_18m_f_50.obs['Sex'] = 'female'
adata_aorta_18m_f_50.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_aorta_18m_f_50, min_genes=250)

path = '/data/maca/data/MACA_18m_F_BM_50/'
adata_marrow_18m_f_50 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_marrow_18m_f_50.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_marrow_18m_f_50.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_marrow_18m_f_50.obs['Tissue'] = 'Marrow'
adata_marrow_18m_f_50.obs['Age'] = '18m'
adata_marrow_18m_f_50.obs['Sex'] = 'female'
adata_marrow_18m_f_50.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_marrow_18m_f_50, min_genes=250)

path = '/data/maca/data/MACA_18m_F_BM_51/'
adata_marrow_18m_f_51 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_marrow_18m_f_51.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_marrow_18m_f_51.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_marrow_18m_f_51.obs['Tissue'] = 'Marrow'
adata_marrow_18m_f_51.obs['Age'] = '18m'
adata_marrow_18m_f_51.obs['Sex'] = 'female'
adata_marrow_18m_f_51.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_marrow_18m_f_51, min_genes=250)

path = '/data/maca/data/MACA_18m_F_BREAST_50/'
adata_breast_18m_f_50 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_breast_18m_f_50.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_breast_18m_f_50.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_breast_18m_f_50.obs['Tissue'] = 'Breast'
adata_breast_18m_f_50.obs['Age'] = '18m'
adata_breast_18m_f_50.obs['Sex'] = 'female'
adata_breast_18m_f_50.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_breast_18m_f_50, min_genes=250)

path = '/data/maca/data/MACA_18m_F_BREAST_51/'
adata_breast_18m_f_51 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_breast_18m_f_51.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_breast_18m_f_51.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_breast_18m_f_51.obs['Tissue'] = 'Breast'
adata_breast_18m_f_51.obs['Age'] = '18m'
adata_breast_18m_f_51.obs['Sex'] = 'female'
adata_breast_18m_f_51.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_breast_18m_f_51, min_genes=250)

path = '/data/maca/data/MACA_18m_F_GAT_50/'
adata_gat_18m_f_50 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_gat_18m_f_50.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_gat_18m_f_50.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_gat_18m_f_50.obs['Tissue'] = 'Gat'
adata_gat_18m_f_50.obs['Age'] = '18m'
adata_gat_18m_f_50.obs['Sex'] = 'female'
adata_gat_18m_f_50.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_gat_18m_f_50, min_genes=250)

path = '/data/maca/data/MACA_18m_F_KIDNEY_50/'
adata_kidney_18m_f_50 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_kidney_18m_f_50.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_kidney_18m_f_50.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_kidney_18m_f_50.obs['Tissue'] = 'Kidney'
adata_kidney_18m_f_50.obs['Age'] = '18m'
adata_kidney_18m_f_50.obs['Sex'] = 'female'
adata_kidney_18m_f_50.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_kidney_18m_f_50, min_genes=250)

path = '/data/maca/data/MACA_18m_F_KIDNEY_51/'
adata_kidney_18m_f_51 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_kidney_18m_f_51.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_kidney_18m_f_51.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_kidney_18m_f_51.obs['Tissue'] = 'Kidney'
adata_kidney_18m_f_51.obs['Age'] = '18m'
adata_kidney_18m_f_51.obs['Sex'] = 'female'
adata_kidney_18m_f_51.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_kidney_18m_f_51, min_genes=250)

path = '/data/maca/data/MACA_18m_F_LIVER_HEPATOCYTES_51/'
adata_liver_hepatocytes_18m_f_51 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_liver_hepatocytes_18m_f_51.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_liver_hepatocytes_18m_f_51.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_liver_hepatocytes_18m_f_51.obs['Tissue'] = 'Liver_hepatocytes'
adata_liver_hepatocytes_18m_f_51.obs['Age'] = '18m'
adata_liver_hepatocytes_18m_f_51.obs['Sex'] = 'female'
adata_liver_hepatocytes_18m_f_51.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_liver_hepatocytes_18m_f_51, min_genes=250)

path = '/data/maca/data/MACA_18m_F_LIVER_NPC_51/'
adata_liver_npc_18m_f_51 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_liver_npc_18m_f_51.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_liver_npc_18m_f_51.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_liver_npc_18m_f_51.obs['Tissue'] = 'Liver_NPC'
adata_liver_npc_18m_f_51.obs['Age'] = '18m'
adata_liver_npc_18m_f_51.obs['Sex'] = 'female'
adata_liver_npc_18m_f_51.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_liver_npc_18m_f_51, min_genes=250)

path = '/data/maca/data/MACA_18m_F_LUNG_50/'
adata_lung_18m_f_50 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_lung_18m_f_50.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_lung_18m_f_50.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_lung_18m_f_50.obs['Tissue'] = 'Lung'
adata_lung_18m_f_50.obs['Age'] = '18m'
adata_lung_18m_f_50.obs['Sex'] = 'female'
adata_lung_18m_f_50.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_lung_18m_f_50, min_genes=250)

path = '/data/maca/data/MACA_18m_F_LUNG_51/'
adata_lung_18m_f_51 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_lung_18m_f_51.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_lung_18m_f_51.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_lung_18m_f_51.obs['Tissue'] = 'Lung'
adata_lung_18m_f_51.obs['Age'] = '18m'
adata_lung_18m_f_51.obs['Sex'] = 'female'
adata_lung_18m_f_51.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_lung_18m_f_51, min_genes=250)

path = '/data/maca/data/MACA_18m_F_MAT_50/'
adata_mat_18m_f_50 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_mat_18m_f_50.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_mat_18m_f_50.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_mat_18m_f_50.obs['Tissue'] = 'Mat'
adata_mat_18m_f_50.obs['Age'] = '18m'
adata_mat_18m_f_50.obs['Sex'] = 'female'
adata_mat_18m_f_50.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_mat_18m_f_50, min_genes=250)

path = '/data/maca/data/MACA_18m_F_MUSCLE_50_pre_sort/'
adata_muscle_18m_f_50 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_muscle_18m_f_50.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_muscle_18m_f_50.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_muscle_18m_f_50.obs['Tissue'] = 'Muscle'
adata_muscle_18m_f_50.obs['Age'] = '18m'
adata_muscle_18m_f_50.obs['Sex'] = 'female'
adata_muscle_18m_f_50.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_muscle_18m_f_50, min_genes=250)

path = '/data/maca/data/MACA_18m_F_MUSCLE_51_pre_sort/'
adata_muscle_18m_f_51 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_muscle_18m_f_51.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_muscle_18m_f_51.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_muscle_18m_f_51.obs['Tissue'] = 'Muscle'
adata_muscle_18m_f_51.obs['Age'] = '18m'
adata_muscle_18m_f_51.obs['Sex'] = 'female'
adata_muscle_18m_f_51.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_muscle_18m_f_51, min_genes=250)

path = '/data/maca/data/MACA_18m_F_PANCREASE_ENDOCRINE_50/'
adata_pancrease_endocrine_18m_f_50 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_pancrease_endocrine_18m_f_50.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_pancrease_endocrine_18m_f_50.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_pancrease_endocrine_18m_f_50.obs['Tissue'] = 'Pancrease_endocrine'
adata_pancrease_endocrine_18m_f_50.obs['Age'] = '18m'
adata_pancrease_endocrine_18m_f_50.obs['Sex'] = 'female'
adata_pancrease_endocrine_18m_f_50.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_pancrease_endocrine_18m_f_50, min_genes=250)

path = '/data/maca/data/MACA_18m_F_PANCREASE_EXOCRINE_50/'
adata_pancrease_exocrine_f_18m_50 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_pancrease_exocrine_f_18m_50.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_pancrease_exocrine_f_18m_50.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_pancrease_exocrine_f_18m_50.obs['Tissue'] = 'Pancrease_exocrine'
adata_pancrease_exocrine_f_18m_50.obs['Age'] = '18m'
adata_pancrease_exocrine_f_18m_50.obs['Sex'] = 'female'
adata_pancrease_exocrine_f_18m_50.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_pancrease_exocrine_f_18m_50, min_genes=250)

path = '/data/maca/data/MACA_18m_F_SCAT_50/'
adata_scat_18m_f_50 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_scat_18m_f_50.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_scat_18m_f_50.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_scat_18m_f_50.obs['Tissue'] = 'Scat'
adata_scat_18m_f_50.obs['Age'] = '18m'
adata_scat_18m_f_50.obs['Sex'] = 'female'
adata_scat_18m_f_50.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_scat_18m_f_50, min_genes=250)

path = '/data/maca/data/MACA_18m_F_SPLEEN_50/'
adata_spleen_18m_f_50 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_spleen_18m_f_50.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_spleen_18m_f_50.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_spleen_18m_f_50.obs['Tissue'] = 'Spleen'
adata_spleen_18m_f_50.obs['Age'] = '18m'
adata_spleen_18m_f_50.obs['Sex'] = 'female'
adata_spleen_18m_f_50.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_spleen_18m_f_50, min_genes=250)

path = '/data/maca/data/MACA_18m_F_SPLEEN_51/'
adata_spleen_18m_f_51 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_spleen_18m_f_51.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_spleen_18m_f_51.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_spleen_18m_f_51.obs['Tissue'] = 'Spleen'
adata_spleen_18m_f_51.obs['Age'] = '18m'
adata_spleen_18m_f_51.obs['Sex'] = 'female'
adata_spleen_18m_f_51.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_spleen_18m_f_51, min_genes=250)

path = '/data/maca/data/MACA_18m_F_THYMUS_50/'
adata_thymus_18m_f_50 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_thymus_18m_f_50.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_thymus_18m_f_50.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_thymus_18m_f_50.obs['Tissue'] = 'Thymus'
adata_thymus_18m_f_50.obs['Age'] = '18m'
adata_thymus_18m_f_50.obs['Sex'] = 'female'
adata_thymus_18m_f_50.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_thymus_18m_f_50, min_genes=250)

# males
path = '/data/maca/data/MACA_18m_M_AORTA_52/'
adata_aorta_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_aorta_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_aorta_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_aorta_18m_m_52.obs['Tissue'] = 'Aorta'
adata_aorta_18m_m_52.obs['Age'] = '18m'
adata_aorta_18m_m_52.obs['Sex'] = 'male'
adata_aorta_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_aorta_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_BAT_52/'
adata_bat_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_bat_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_bat_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_bat_18m_m_52.obs['Tissue'] = 'Bat'
adata_bat_18m_m_52.obs['Age'] = '18m'
adata_bat_18m_m_52.obs['Sex'] = 'male'
adata_bat_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_bat_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_BLADDER_52_unstain/'
adata_bladder_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_bladder_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_bladder_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_bladder_18m_m_52.obs['Tissue'] = 'Bladder'
adata_bladder_18m_m_52.obs['Age'] = '18m'
adata_bladder_18m_m_52.obs['Sex'] = 'male'
adata_bladder_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_bladder_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_BM_52/'
adata_marrow_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_marrow_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_marrow_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_marrow_18m_m_52.obs['Tissue'] = 'Marrow'
adata_marrow_18m_m_52.obs['Age'] = '18m'
adata_marrow_18m_m_52.obs['Sex'] = 'male'
adata_marrow_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_marrow_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_BM_53/'
adata_marrow_18m_m_53 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_marrow_18m_m_53.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_marrow_18m_m_53.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_marrow_18m_m_53.obs['Tissue'] = 'Marrow'
adata_marrow_18m_m_53.obs['Age'] = '18m'
adata_marrow_18m_m_53.obs['Sex'] = 'male'
adata_marrow_18m_m_53.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_marrow_18m_m_53, min_genes=250)

path = '/data/maca/data/MACA_18m_M_CORTEX_52/'
adata_cortex_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_cortex_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_cortex_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_cortex_18m_m_52.obs['Tissue'] = 'Cortex'
adata_cortex_18m_m_52.obs['Age'] = '18m'
adata_cortex_18m_m_52.obs['Sex'] = 'male'
adata_cortex_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_cortex_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_GAT_52/'
adata_gat_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_gat_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_gat_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_gat_18m_m_52.obs['Tissue'] = 'Gat'
adata_gat_18m_m_52.obs['Age'] = '18m'
adata_gat_18m_m_52.obs['Sex'] = 'male'
adata_gat_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_gat_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_HEART_4CHAMBERS_53/'
adata_heart_18m_m_53 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_heart_18m_m_53.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_heart_18m_m_53.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_heart_18m_m_53.obs['Tissue'] = 'Heart'
adata_heart_18m_m_53.obs['Age'] = '18m'
adata_heart_18m_m_53.obs['Sex'] = 'male'
adata_heart_18m_m_53.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_heart_18m_m_53, min_genes=250)

path = '/data/maca/data/MACA_18m_M_KIDNEY_52/'
adata_kidney_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_kidney_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_kidney_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_kidney_18m_m_52.obs['Tissue'] = 'Kidney'
adata_kidney_18m_m_52.obs['Age'] = '18m'
adata_kidney_18m_m_52.obs['Sex'] = 'male'
adata_kidney_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_kidney_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_LUNG_52/'
adata_lung_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_lung_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_lung_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_lung_18m_m_52.obs['Tissue'] = 'Lung'
adata_lung_18m_m_52.obs['Age'] = '18m'
adata_lung_18m_m_52.obs['Sex'] = 'male'
adata_lung_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_lung_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_LUNG_53/'
adata_lung_18m_m_53 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_lung_18m_m_53.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_lung_18m_m_53.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_lung_18m_m_53.obs['Tissue'] = 'Lung'
adata_lung_18m_m_53.obs['Age'] = '18m'
adata_lung_18m_m_53.obs['Sex'] = 'male'
adata_lung_18m_m_53.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_lung_18m_m_53, min_genes=250)

path = '/data/maca/data/MACA_18m_M_MAT_52/'
adata_mat_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_mat_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_mat_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_mat_18m_m_52.obs['Tissue'] = 'Mat'
adata_mat_18m_m_52.obs['Age'] = '18m'
adata_mat_18m_m_52.obs['Sex'] = 'male'
adata_mat_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_mat_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_MUSCLE_52/'
adata_muscle_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_muscle_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_muscle_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_muscle_18m_m_52.obs['Tissue'] = 'Muscle'
adata_muscle_18m_m_52.obs['Age'] = '18m'
adata_muscle_18m_m_52.obs['Sex'] = 'male'
adata_muscle_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_muscle_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_MUSCLE_53/'
adata_muscle_18m_m_53 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_muscle_18m_m_53.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_muscle_18m_m_53.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_muscle_18m_m_53.obs['Tissue'] = 'Muscle'
adata_muscle_18m_m_53.obs['Age'] = '18m'
adata_muscle_18m_m_53.obs['Sex'] = 'male'
adata_muscle_18m_m_53.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_muscle_18m_m_53, min_genes=250)

path = '/data/maca/data/MACA_18m_M_PANCREASE_ENDOCRINE_52/'
adata_pancrease_endocrine_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_pancrease_endocrine_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_pancrease_endocrine_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_pancrease_endocrine_18m_m_52.obs['Tissue'] = 'Pancrease_endocrine'
adata_pancrease_endocrine_18m_m_52.obs['Age'] = '18m'
adata_pancrease_endocrine_18m_m_52.obs['Sex'] = 'male'
adata_pancrease_endocrine_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_pancrease_endocrine_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_PANCREASE_EXOCRINE_52/'
adata_pancrease_exocrine_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_pancrease_exocrine_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_pancrease_exocrine_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_pancrease_exocrine_18m_m_52.obs['Tissue'] = 'Pancrease_exocrine'
adata_pancrease_exocrine_18m_m_52.obs['Age'] = '18m'
adata_pancrease_exocrine_18m_m_52.obs['Sex'] = 'male'
adata_pancrease_exocrine_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_pancrease_exocrine_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_SCAT_52/'
adata_scat_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_scat_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_scat_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_scat_18m_m_52.obs['Tissue'] = 'Scat'
adata_scat_18m_m_52.obs['Age'] = '18m'
adata_scat_18m_m_52.obs['Sex'] = 'male'
adata_scat_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_scat_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_SCAT_53/'
adata_scat_18m_m_53 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_scat_18m_m_53.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_scat_18m_m_53.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_scat_18m_m_53.obs['Tissue'] = 'Scat'
adata_scat_18m_m_53.obs['Age'] = '18m'
adata_scat_18m_m_53.obs['Sex'] = 'male'
adata_scat_18m_m_53.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_scat_18m_m_53, min_genes=250)

path = '/data/maca/data/MACA_18m_M_SKIN_52/'
adata_skin_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_skin_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_skin_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_skin_18m_m_52.obs['Tissue'] = 'Skin'
adata_skin_18m_m_52.obs['Age'] = '18m'
adata_skin_18m_m_52.obs['Sex'] = 'male'
adata_skin_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_skin_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_THYMUS_52/'
adata_thymus_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_thymus_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_thymus_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_thymus_18m_m_52.obs['Tissue'] = 'Thymus'
adata_thymus_18m_m_52.obs['Age'] = '18m'
adata_thymus_18m_m_52.obs['Sex'] = 'male'
adata_thymus_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_thymus_18m_m_52, min_genes=250)

path = '/data/maca/data/MACA_18m_M_TONGUE_52_unstain/'
adata_tongue_18m_m_52 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_tongue_18m_m_52.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_tongue_18m_m_52.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_tongue_18m_m_52.obs['Tissue'] = 'Tongue'
adata_tongue_18m_m_52.obs['Age'] = '18m'
adata_tongue_18m_m_52.obs['Sex'] = 'male'
adata_tongue_18m_m_52.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_tongue_18m_m_52, min_genes=250)



## Concatenate the 18m data & save

In [None]:
adata18 = adata_aorta_18m_f_50.concatenate([adata_marrow_18m_f_50,
                                       adata_marrow_18m_f_51,
                                       adata_breast_18m_f_50,
                                       adata_breast_18m_f_51,
                                       adata_gat_18m_f_50,
                                       adata_kidney_18m_f_50,
                                       adata_kidney_18m_f_51,
                                       adata_liver_hepatocytes_18m_f_51,
                                       adata_liver_npc_18m_f_51,
                                       adata_lung_18m_f_50,
                                       adata_lung_18m_f_51,
                                       adata_mat_18m_f_50,
                                       adata_muscle_18m_f_50,
                                       adata_muscle_18m_f_51,
                                       adata_pancrease_endocrine_18m_f_50,
                                       adata_pancrease_exocrine_f_18m_50,
                                       adata_scat_18m_f_50,
                                       adata_spleen_18m_f_50,
                                       adata_spleen_18m_f_51,
                                       adata_thymus_18m_f_50,
                                          adata_aorta_18m_m_52,
                                          adata_bat_18m_m_52,
                                          adata_bladder_18m_m_52,
                                         adata_marrow_18m_m_52,
                                          adata_marrow_18m_m_53,
                                          adata_cortex_18m_m_52,
                                          adata_gat_18m_m_52,
                                          adata_heart_18m_m_53,
                                          adata_kidney_18m_m_52,
                                          adata_lung_18m_m_52,
                                          adata_lung_18m_m_53,
                                          adata_mat_18m_m_52,
                                          adata_muscle_18m_m_52,
                                          adata_muscle_18m_m_53,
                                          adata_pancrease_endocrine_18m_m_52,
                                          adata_pancrease_exocrine_18m_m_52,
                                          adata_scat_18m_m_52,
                                          adata_scat_18m_m_53,
                                          adata_skin_18m_m_52,
                                          adata_thymus_18m_m_52,
                                          adata_tongue_18m_m_52])

In [None]:
adata18.obs["CellType"]='NA'
adata18

In [None]:
adata18.write('./write/maca18m.h5ad')

## 21 months

In [None]:
# females

path = '/data/maca/data/MACA_21m_F_BM_54/'
adata_marrow_21m_f_54 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_marrow_21m_f_54.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_marrow_21m_f_54.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_marrow_21m_f_54.obs['Tissue'] = 'Marrow'
adata_marrow_21m_f_54.obs['Age'] = '21m'
adata_marrow_21m_f_54.obs['Sex'] = 'female'
adata_marrow_21m_f_54.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_marrow_21m_f_54, min_genes=250)

path = '/data/maca/data/MACA_21m_F_BM_55/'
adata_marrow_21m_f_55 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_marrow_21m_f_55.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_marrow_21m_f_55.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_marrow_21m_f_55.obs['Tissue'] = 'Marrow'
adata_marrow_21m_f_55.obs['Age'] = '21m'
adata_marrow_21m_f_55.obs['Sex'] = 'female'
adata_marrow_21m_f_55.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_marrow_21m_f_55, min_genes=250)

path = '/data/maca/data/MACA_21m_F_BREAST_54/'
adata_breast_21m_f_54 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_breast_21m_f_54.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_breast_21m_f_54.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_breast_21m_f_54.obs['Tissue'] = 'Breast'
adata_breast_21m_f_54.obs['Age'] = '21m'
adata_breast_21m_f_54.obs['Sex'] = 'female'
adata_breast_21m_f_54.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_breast_21m_f_54, min_genes=250)

path = '/data/maca/data/MACA_21m_F_BREAST_55/'
adata_breast_21m_f_55 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_breast_21m_f_55.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_breast_21m_f_55.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_breast_21m_f_55.obs['Tissue'] = 'Breast'
adata_breast_21m_f_55.obs['Age'] = '21m'
adata_breast_21m_f_55.obs['Sex'] = 'female'
adata_breast_21m_f_55.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_breast_21m_f_55, min_genes=250)

path = '/data/maca/data/MACA_21m_F_HEART_4CHAMBERS_54/'
adata_heart_21m_f_54 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_heart_21m_f_54.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_heart_21m_f_54.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_heart_21m_f_54.obs['Tissue'] = 'Marrow'
adata_heart_21m_f_54.obs['Age'] = '21m'
adata_heart_21m_f_54.obs['Sex'] = 'female'
adata_heart_21m_f_54.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_heart_21m_f_54, min_genes=250)

path = '/data/maca/data/MACA_21m_F_HEART_4CHAMBERS_55/'
adata_heart_21m_f_55 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_heart_21m_f_55.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_heart_21m_f_55.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_heart_21m_f_55.obs['Tissue'] = 'Marrow'
adata_heart_21m_f_55.obs['Age'] = '21m'
adata_heart_21m_f_55.obs['Sex'] = 'female'
adata_heart_21m_f_55.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_heart_21m_f_55, min_genes=250)

path = '/data/maca/data/MACA_21m_F_HEPATOCYTES_54/'
adata_hepatocytes_21m_f_54 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_hepatocytes_21m_f_54.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_hepatocytes_21m_f_54.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_hepatocytes_21m_f_54.obs['Tissue'] = 'Liver_hepatocytes'
adata_hepatocytes_21m_f_54.obs['Age'] = '21m'
adata_hepatocytes_21m_f_54.obs['Sex'] = 'female'
adata_hepatocytes_21m_f_54.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_hepatocytes_21m_f_54, min_genes=250)

path = '/data/maca/data/MACA_21m_F_KIDNEY_54/'
adata_kidney_21m_f_54 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_kidney_21m_f_54.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_kidney_21m_f_54.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_kidney_21m_f_54.obs['Tissue'] = 'Kidney'
adata_kidney_21m_f_54.obs['Age'] = '21m'
adata_kidney_21m_f_54.obs['Sex'] = 'female'
adata_kidney_21m_f_54.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_kidney_21m_f_54, min_genes=250)

path = '/data/maca/data/MACA_21m_F_KIDNEY_55/'
adata_kidney_21m_f_55 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_kidney_21m_f_55.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_kidney_21m_f_55.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_kidney_21m_f_55.obs['Tissue'] = 'Kidney'
adata_kidney_21m_f_55.obs['Age'] = '21m'
adata_kidney_21m_f_55.obs['Sex'] = 'female'
adata_kidney_21m_f_55.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_kidney_21m_f_55, min_genes=250)

path = '/data/maca/data/MACA_21m_F_LUNG_54/'
adata_lung_21m_f_54 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_lung_21m_f_54.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_lung_21m_f_54.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_lung_21m_f_54.obs['Tissue'] = 'Lung'
adata_lung_21m_f_54.obs['Age'] = '21m'
adata_lung_21m_f_54.obs['Sex'] = 'female'
adata_lung_21m_f_54.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_lung_21m_f_54, min_genes=250)

path = '/data/maca/data/MACA_21m_F_LUNG_55/'
adata_lung_21m_f_55 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_lung_21m_f_55.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_lung_21m_f_55.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_lung_21m_f_55.obs['Tissue'] = 'Lung'
adata_lung_21m_f_55.obs['Age'] = '21m'
adata_lung_21m_f_55.obs['Sex'] = 'female'
adata_lung_21m_f_55.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_lung_21m_f_55, min_genes=250)

path = '/data/maca/data/MACA_21m_F_MUSCLE_54/'
adata_muscle_21m_f_54 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_muscle_21m_f_54.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_muscle_21m_f_54.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_muscle_21m_f_54.obs['Tissue'] = 'Muscle'
adata_muscle_21m_f_54.obs['Age'] = '21m'
adata_muscle_21m_f_54.obs['Sex'] = 'female'
adata_muscle_21m_f_54.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_muscle_21m_f_54, min_genes=250)

path = '/data/maca/data/MACA_21m_F_MUSCLE_55/'
adata_muscle_21m_f_55 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_muscle_21m_f_55.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_muscle_21m_f_55.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_muscle_21m_f_55.obs['Tissue'] = 'Muscle'
adata_muscle_21m_f_55.obs['Age'] = '21m'
adata_muscle_21m_f_55.obs['Sex'] = 'female'
adata_muscle_21m_f_55.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_muscle_21m_f_55, min_genes=250)

path = '/data/maca/data/MACA_21m_F_NPC_54/'
adata_npc_21m_f_54 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_npc_21m_f_54.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_npc_21m_f_54.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_npc_21m_f_54.obs['Tissue'] = 'Npc'
adata_npc_21m_f_54.obs['Age'] = '21m'
adata_npc_21m_f_54.obs['Sex'] = 'female'
adata_npc_21m_f_54.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_npc_21m_f_54, min_genes=250)

path = '/data/maca/data/MACA_21m_F_PANCREAS_ENDOCRINE/'
adata_pancrease_endocrine_21m_f = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_pancrease_endocrine_21m_f.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_pancrease_endocrine_21m_f.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_pancrease_endocrine_21m_f.obs['Tissue'] = 'Pancrease_endocrine'
adata_pancrease_endocrine_21m_f.obs['Age'] = '18m'
adata_pancrease_endocrine_21m_f.obs['Sex'] = 'female'
adata_pancrease_endocrine_21m_f.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_pancrease_endocrine_21m_f, min_genes=250)

path = '/data/maca/data/MACA_21m_F_PANCREASE_EXOCRINE/'
adata_pancrease_exocrine_f_21m = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_pancrease_exocrine_f_21m.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_pancrease_exocrine_f_21m.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_pancrease_exocrine_f_21m.obs['Tissue'] = 'Pancrease_exocrine'
adata_pancrease_exocrine_f_21m.obs['Age'] = '21m'
adata_pancrease_exocrine_f_21m.obs['Sex'] = 'female'
adata_pancrease_exocrine_f_21m.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_pancrease_exocrine_f_21m, min_genes=250)

path = '/data/maca/data/MACA_21m_F_SCAT_54/'
adata_scat_21m_f_54 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_scat_21m_f_54.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_scat_21m_f_54.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_scat_21m_f_54.obs['Tissue'] = 'Scat'
adata_scat_21m_f_54.obs['Age'] = '21m'
adata_scat_21m_f_54.obs['Sex'] = 'female'
adata_scat_21m_f_54.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_scat_21m_f_54, min_genes=250)

path = '/data/maca/data/MACA_21m_F_SCAT_55/'
adata_scat_21m_f_55 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_scat_21m_f_55.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_scat_21m_f_55.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_scat_21m_f_55.obs['Tissue'] = 'Scat'
adata_scat_21m_f_55.obs['Age'] = '21m'
adata_scat_21m_f_55.obs['Sex'] = 'female'
adata_scat_21m_f_55.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_scat_21m_f_55, min_genes=250)

path = '/data/maca/data/MACA_21m_F_SKIN_54/'
adata_skin_21m_f_54 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_skin_21m_f_54.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_skin_21m_f_54.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_skin_21m_f_54.obs['Tissue'] = 'Skin'
adata_skin_21m_f_54.obs['Age'] = '21m'
adata_skin_21m_f_54.obs['Sex'] = 'female'
adata_skin_21m_f_54.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_skin_21m_f_54, min_genes=250)

path = '/data/maca/data/MACA_21m_F_SKIN_55/'
adata_skin_21m_f_55 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_skin_21m_f_55.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_skin_21m_f_55.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_skin_21m_f_55.obs['Tissue'] = 'Skin'
adata_skin_21m_f_55.obs['Age'] = '21m'
adata_skin_21m_f_55.obs['Sex'] = 'female'
adata_skin_21m_f_55.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_skin_21m_f_55, min_genes=250)

path = '/data/maca/data/MACA_21m_F_SPLEEN_54/'
adata_spleen_21m_f_54 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_spleen_21m_f_54.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_spleen_21m_f_54.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_spleen_21m_f_54.obs['Tissue'] = 'Spleen'
adata_spleen_21m_f_54.obs['Age'] = '21m'
adata_spleen_21m_f_54.obs['Sex'] = 'female'
adata_spleen_21m_f_54.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_spleen_21m_f_54, min_genes=250)

path = '/data/maca/data/MACA_21m_F_SPLEEN_55/'
adata_spleen_21m_f_55 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_spleen_21m_f_55.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_spleen_21m_f_55.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_spleen_21m_f_55.obs['Tissue'] = 'Spleen'
adata_spleen_21m_f_55.obs['Age'] = '21m'
adata_spleen_21m_f_55.obs['Sex'] = 'female'
adata_spleen_21m_f_55.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_spleen_21m_f_55, min_genes=250)

path = '/data/maca/data/MACA_21m_F_THYMUS_54/'
adata_thymus_21m_f_54 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_thymus_21m_f_54.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_thymus_21m_f_54.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_thymus_21m_f_54.obs['Tissue'] = 'Thymus'
adata_thymus_21m_f_54.obs['Age'] = '21m'
adata_thymus_21m_f_54.obs['Sex'] = 'female'
adata_thymus_21m_f_54.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_thymus_21m_f_54, min_genes=250)

path = '/data/maca/data/MACA_21m_F_THYMUS_55/'
adata_thymus_21m_f_55 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_thymus_21m_f_55.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_thymus_21m_f_55.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_thymus_21m_f_55.obs['Tissue'] = 'Thymus'
adata_thymus_21m_f_55.obs['Age'] = '21m'
adata_thymus_21m_f_55.obs['Sex'] = 'female'
adata_thymus_21m_f_55.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_thymus_21m_f_55, min_genes=250)




## Concatenate the 21m data & save

In [None]:
adata21 = adata_marrow_21m_f_54.concatenate([adata_marrow_21m_f_55,
                                       adata_breast_21m_f_54,
                                       adata_breast_21m_f_55,
                                       adata_heart_21m_f_54,
                                       adata_heart_21m_f_55,
                                       adata_hepatocytes_21m_f_54,
                                       adata_kidney_21m_f_54,
                                       adata_kidney_21m_f_55,
                                       adata_lung_21m_f_54,
                                       adata_lung_21m_f_55,
                                       adata_muscle_21m_f_54,
                                       adata_muscle_21m_f_55,
                                       adata_npc_21m_f_54,
                                       adata_pancrease_endocrine_21m_f,
                                       adata_pancrease_exocrine_f_21m,
                                       adata_scat_21m_f_54,
                                       adata_scat_21m_f_55,
                                       adata_skin_21m_f_54,
                                       adata_skin_21m_f_55,
                                       adata_spleen_21m_f_54,
                                          adata_spleen_21m_f_55,
                                          adata_thymus_21m_f_54,
                                          adata_thymus_21m_f_55])

adata21.obs["CellType"]='NA'
adata21

In [None]:
adata21.write('./write/maca21m.h5ad')

## 24 months

In [None]:
# males

path = '/data/maca/data/MACA_24m_M_BAT_58/'
adata_bat_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_bat_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_bat_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_bat_24m_m_58.obs['Tissue'] = 'Bat'
adata_bat_24m_m_58.obs['Age'] = '24m'
adata_bat_24m_m_58.obs['Sex'] = 'male'
adata_bat_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_bat_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_BLADDER_58/'
adata_bladder_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_bladder_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_bladder_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_bladder_24m_m_58.obs['Tissue'] = 'Bladder'
adata_bladder_24m_m_58.obs['Age'] = '24m'
adata_bladder_24m_m_58.obs['Sex'] = 'male'
adata_bladder_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_bladder_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_BLADDER_59/'
adata_bladder_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_bladder_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_bladder_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_bladder_24m_m_59.obs['Tissue'] = 'Bladder'
adata_bladder_24m_m_59.obs['Age'] = '24m'
adata_bladder_24m_m_59.obs['Sex'] = 'male'
adata_bladder_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_bladder_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_BLADDER_60/'
adata_bladder_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_bladder_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_bladder_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_bladder_24m_m_60.obs['Tissue'] = 'Bladder'
adata_bladder_24m_m_60.obs['Age'] = '24m'
adata_bladder_24m_m_60.obs['Sex'] = 'male'
adata_bladder_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_bladder_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_BM_58/'
adata_marrow_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_marrow_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_marrow_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_marrow_24m_m_58.obs['Tissue'] = 'Marrow'
adata_marrow_24m_m_58.obs['Age'] = '24m'
adata_marrow_24m_m_58.obs['Sex'] = 'male'
adata_marrow_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_marrow_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_BM_59/'
adata_marrow_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_marrow_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_marrow_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_marrow_24m_m_59.obs['Tissue'] = 'Marrow'
adata_marrow_24m_m_59.obs['Age'] = '24m'
adata_marrow_24m_m_59.obs['Sex'] = 'male'
adata_marrow_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_marrow_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_BM_60/'
adata_marrow_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_marrow_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_marrow_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_marrow_24m_m_60.obs['Tissue'] = 'Marrow'
adata_marrow_24m_m_60.obs['Age'] = '24m'
adata_marrow_24m_m_60.obs['Sex'] = 'male'
adata_marrow_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_marrow_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_BM_61/'
adata_marrow_24m_m_61 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_marrow_24m_m_61.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_marrow_24m_m_61.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_marrow_24m_m_61.obs['Tissue'] = 'Marrow'
adata_marrow_24m_m_61.obs['Age'] = '24m'
adata_marrow_24m_m_61.obs['Sex'] = 'male'
adata_marrow_24m_m_61.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_marrow_24m_m_61, min_genes=250)

path = '/data/maca/data/MACA_24m_M_GAT_58/'
adata_gat_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_gat_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_gat_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_gat_24m_m_58.obs['Tissue'] = 'Gat'
adata_gat_24m_m_58.obs['Age'] = '24m'
adata_gat_24m_m_58.obs['Sex'] = 'male'
adata_gat_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_gat_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_GAT_59/'
adata_gat_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_gat_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_gat_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_gat_24m_m_59.obs['Tissue'] = 'Gat'
adata_gat_24m_m_59.obs['Age'] = '24m'
adata_gat_24m_m_59.obs['Sex'] = 'male'
adata_gat_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_gat_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_HEART_58/'
adata_heart_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_heart_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_heart_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_heart_24m_m_58.obs['Tissue'] = 'Heart'
adata_heart_24m_m_58.obs['Age'] = '24m'
adata_heart_24m_m_58.obs['Sex'] = 'male'
adata_heart_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_heart_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_HEART_59/'
adata_heart_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_heart_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_heart_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_heart_24m_m_59.obs['Tissue'] = 'Heart'
adata_heart_24m_m_59.obs['Age'] = '24m'
adata_heart_24m_m_59.obs['Sex'] = 'male'
adata_heart_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_heart_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_HEART_60/'
adata_heart_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_heart_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_heart_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_heart_24m_m_60.obs['Tissue'] = 'Heart'
adata_heart_24m_m_60.obs['Age'] = '24m'
adata_heart_24m_m_60.obs['Sex'] = 'male'
adata_heart_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_heart_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_HEART_61/'
adata_heart_24m_m_61 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_heart_24m_m_61.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_heart_24m_m_61.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_heart_24m_m_61.obs['Tissue'] = 'Heart'
adata_heart_24m_m_61.obs['Age'] = '24m'
adata_heart_24m_m_61.obs['Sex'] = 'male'
adata_heart_24m_m_61.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_heart_24m_m_61, min_genes=250)

path = '/data/maca/data/MACA_24m_M_HEPATOCYTES_58/'
adata_hepatocytes_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_hepatocytes_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_hepatocytes_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_hepatocytes_24m_m_58.obs['Tissue'] = 'Hepatocytes'
adata_hepatocytes_24m_m_58.obs['Age'] = '24m'
adata_hepatocytes_24m_m_58.obs['Sex'] = 'male'
adata_hepatocytes_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_hepatocytes_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_HEPATOCYTES_59/'
adata_hepatocytes_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_hepatocytes_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_hepatocytes_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_hepatocytes_24m_m_59.obs['Tissue'] = 'Hepatocytes'
adata_hepatocytes_24m_m_59.obs['Age'] = '24m'
adata_hepatocytes_24m_m_59.obs['Sex'] = 'male'
adata_hepatocytes_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_hepatocytes_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_KIDNEY_58/'
adata_kidney_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_kidney_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_kidney_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_kidney_24m_m_58.obs['Tissue'] = 'Kidney'
adata_kidney_24m_m_58.obs['Age'] = '24m'
adata_kidney_24m_m_58.obs['Sex'] = 'male'
adata_kidney_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_kidney_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_KIDNEY_59/'
adata_kidney_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_kidney_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_kidney_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_kidney_24m_m_59.obs['Tissue'] = 'Kidney'
adata_kidney_24m_m_59.obs['Age'] = '24m'
adata_kidney_24m_m_59.obs['Sex'] = 'male'
adata_kidney_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_kidney_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_KIDNEY_60/'
adata_kidney_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_kidney_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_kidney_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_kidney_24m_m_60.obs['Tissue'] = 'Kidney'
adata_kidney_24m_m_60.obs['Age'] = '24m'
adata_kidney_24m_m_60.obs['Sex'] = 'male'
adata_kidney_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_kidney_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_KIDNEY_61/'
adata_kidney_24m_m_61 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_kidney_24m_m_61.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_kidney_24m_m_61.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_kidney_24m_m_61.obs['Tissue'] = 'Kidney'
adata_kidney_24m_m_61.obs['Age'] = '24m'
adata_kidney_24m_m_61.obs['Sex'] = 'male'
adata_kidney_24m_m_61.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_kidney_24m_m_61, min_genes=250)

path = '/data/maca/data/MACA_24m_M_LUNG_60/'
adata_lung_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_lung_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_lung_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_lung_24m_m_60.obs['Tissue'] = 'Lung'
adata_lung_24m_m_60.obs['Age'] = '24m'
adata_lung_24m_m_60.obs['Sex'] = 'male'
adata_lung_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_lung_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_LUNG_61/'
adata_lung_24m_m_61 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_lung_24m_m_61.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_lung_24m_m_61.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_lung_24m_m_61.obs['Tissue'] = 'Lung'
adata_lung_24m_m_61.obs['Age'] = '24m'
adata_lung_24m_m_61.obs['Sex'] = 'male'
adata_lung_24m_m_61.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_lung_24m_m_61, min_genes=250)

path = '/data/maca/data/MACA_24m_M_MAT_58/'
adata_mat_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_mat_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_mat_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_mat_24m_m_58.obs['Tissue'] = 'Mat'
adata_mat_24m_m_58.obs['Age'] = '24m'
adata_mat_24m_m_58.obs['Sex'] = 'male'
adata_mat_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_mat_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_MAT_59/'
adata_mat_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_mat_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_mat_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_mat_24m_m_59.obs['Tissue'] = 'Mat'
adata_mat_24m_m_59.obs['Age'] = '24m'
adata_mat_24m_m_59.obs['Sex'] = 'male'
adata_mat_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_mat_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_MUSCLE_58/'
adata_muscle_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_muscle_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_muscle_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_muscle_24m_m_58.obs['Tissue'] = 'Muscle'
adata_muscle_24m_m_58.obs['Age'] = '24m'
adata_muscle_24m_m_58.obs['Sex'] = 'male'
adata_muscle_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_muscle_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_MUSCLE_59/'
adata_muscle_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_muscle_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_muscle_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_muscle_24m_m_59.obs['Tissue'] = 'Muscle'
adata_muscle_24m_m_59.obs['Age'] = '24m'
adata_muscle_24m_m_59.obs['Sex'] = 'male'
adata_muscle_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_muscle_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_MUSCLE_60/'
adata_muscle_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_muscle_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_muscle_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_muscle_24m_m_60.obs['Tissue'] = 'Muscle'
adata_muscle_24m_m_60.obs['Age'] = '24m'
adata_muscle_24m_m_60.obs['Sex'] = 'male'
adata_muscle_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_muscle_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_MUSCLE_61/'
adata_muscle_24m_m_61 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_muscle_24m_m_61.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_muscle_24m_m_61.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_muscle_24m_m_61.obs['Tissue'] = 'Muscle'
adata_muscle_24m_m_61.obs['Age'] = '24m'
adata_muscle_24m_m_61.obs['Sex'] = 'male'
adata_muscle_24m_m_61.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_muscle_24m_m_61, min_genes=250)

path = '/data/maca/data/MACA_24m_M_PANCREASE_EXO_60/'
adata_pancrease_exocrine_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_pancrease_exocrine_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_pancrease_exocrine_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_pancrease_exocrine_24m_m_60.obs['Tissue'] = 'Pancrease_exocrine'
adata_pancrease_exocrine_24m_m_60.obs['Age'] = '24m'
adata_pancrease_exocrine_24m_m_60.obs['Sex'] = 'male'
adata_pancrease_exocrine_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_pancrease_exocrine_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_SCAT_58/'
adata_scat_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_scat_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_scat_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_scat_24m_m_58.obs['Tissue'] = 'Scat'
adata_scat_24m_m_58.obs['Age'] = '24m'
adata_scat_24m_m_58.obs['Sex'] = 'male'
adata_scat_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_scat_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_SPLEEN_58/'
adata_spleen_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_spleen_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_spleen_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_spleen_24m_m_58.obs['Tissue'] = 'Spleen'
adata_spleen_24m_m_58.obs['Age'] = '24m'
adata_spleen_24m_m_58.obs['Sex'] = 'male'
adata_spleen_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_spleen_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_SPLEEN_59/'
adata_spleen_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_spleen_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_spleen_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_spleen_24m_m_59.obs['Tissue'] = 'Spleen'
adata_spleen_24m_m_59.obs['Age'] = '24m'
adata_spleen_24m_m_59.obs['Sex'] = 'male'
adata_spleen_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_spleen_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_SPLEEN_60/'
adata_spleen_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_spleen_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_spleen_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_spleen_24m_m_60.obs['Tissue'] = 'Spleen'
adata_spleen_24m_m_60.obs['Age'] = '24m'
adata_spleen_24m_m_60.obs['Sex'] = 'male'
adata_spleen_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_spleen_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_SPLEEN_61/'
adata_spleen_24m_m_61 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_spleen_24m_m_61.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_spleen_24m_m_61.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_spleen_24m_m_61.obs['Tissue'] = 'Spleen'
adata_spleen_24m_m_61.obs['Age'] = '24m'
adata_spleen_24m_m_61.obs['Sex'] = 'male'
adata_spleen_24m_m_61.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_spleen_24m_m_61, min_genes=250)

path = '/data/maca/data/MACA_24m_M_THYMUS_58/'
adata_thymus_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_thymus_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_thymus_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_thymus_24m_m_58.obs['Tissue'] = 'Thymus'
adata_thymus_24m_m_58.obs['Age'] = '24m'
adata_thymus_24m_m_58.obs['Sex'] = 'male'
adata_thymus_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_thymus_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_THYMUS_59/'
adata_thymus_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_thymus_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_thymus_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_thymus_24m_m_59.obs['Tissue'] = 'Thymus'
adata_thymus_24m_m_59.obs['Age'] = '24m'
adata_thymus_24m_m_59.obs['Sex'] = 'male'
adata_thymus_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_thymus_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_THYMUS_60/'
adata_thymus_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_thymus_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_thymus_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_thymus_24m_m_60.obs['Tissue'] = 'Thymus'
adata_thymus_24m_m_60.obs['Age'] = '24m'
adata_thymus_24m_m_60.obs['Sex'] = 'male'
adata_thymus_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_thymus_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_TONGUE_58/'
adata_tongue_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_tongue_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_tongue_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_tongue_24m_m_58.obs['Tissue'] = 'Tongue'
adata_tongue_24m_m_58.obs['Age'] = '24m'
adata_tongue_24m_m_58.obs['Sex'] = 'male'
adata_tongue_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_tongue_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_TONGUE_59/'
adata_tongue_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_tongue_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_tongue_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_tongue_24m_m_59.obs['Tissue'] = 'Tongue'
adata_tongue_24m_m_59.obs['Age'] = '24m'
adata_tongue_24m_m_59.obs['Sex'] = 'male'
adata_tongue_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_tongue_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_TONGUE_60/'
adata_tongue_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_tongue_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_tongue_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_tongue_24m_m_60.obs['Tissue'] = 'Tongue'
adata_tongue_24m_m_60.obs['Age'] = '24m'
adata_tongue_24m_m_60.obs['Sex'] = 'male'
adata_tongue_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_tongue_24m_m_60, min_genes=250)




## Concatenate the 24m data  & save

In [None]:
# not working - these runs are empty
#adata_bat_24m_m_58,adata_gat_24m_m_58,adata_gat_24m_m_59,
 #                                      adata_heart_24m_m_60,
  #                                     adata_heart_24m_m_61,
   #                                    adata_kidney_24m_m_60,
    #                                   adata_kidney_24m_m_61,
     #                                  adata_lung_24m_m_60,
      #                                  adata_lung_24m_m_61,
       #                                   adata_mat_24m_m_58,
        #                                  adata_mat_24m_m_59,
         #                                 adata_scat_24m_m_58,
          #                                adata_spleen_24m_m_60,
           #                               adata_spleen_24m_m_61,
            #                              adata_thymus_24m_m_58,
             #                             adata_thymus_24m_m_59,

In [None]:
adata24 = adata_bladder_24m_m_58.concatenate([adata_bladder_24m_m_59,
                                       adata_bladder_24m_m_60,
                                       adata_marrow_24m_m_58,
                                       adata_marrow_24m_m_59,
                                       adata_marrow_24m_m_60,
                                       adata_marrow_24m_m_61,
                                       adata_heart_24m_m_58,
                                       adata_heart_24m_m_59,
                                       adata_hepatocytes_24m_m_58,
                                       adata_hepatocytes_24m_m_59,
                                       adata_kidney_24m_m_58,
                                       adata_kidney_24m_m_59,
                                         adata_muscle_24m_m_58,
                                          adata_muscle_24m_m_59,
                                          adata_muscle_24m_m_60,
                                          adata_muscle_24m_m_61,
                                          adata_pancrease_exocrine_24m_m_60,
                                          adata_spleen_24m_m_58,
                                          adata_spleen_24m_m_59,
                                          adata_thymus_24m_m_60,
                                          adata_tongue_24m_m_58,
                                          adata_tongue_24m_m_59,
                                          adata_tongue_24m_m_60])

adata24.obs["CellType"]='NA'
adata24

In [None]:
adata24.write('./write/maca24m.h5ad')

# 18, 21 and 24 months

In [9]:
path = '/data/maca/data/'
maca10x182124metadata = pd.read_csv('/data/maca/data/MACA_10x_18-21-24_qc.csv',usecols = ['channel','tissue','tissue.notes','mouse.age','mouse.sex'])

maca10x182124metadata = maca10x182124metadata.rename(columns = {'mouse.age':'age','mouse.sex':'sex','tissue.notes':'subtissue'})
maca10x182124metadata.index = range(len(maca10x182124metadata))

In [25]:
maca10x182124metadata.head()
#len(maca10x182124metadata)

Unnamed: 0,channel,age,sex,tissue,subtissue
0,MACA_18m_F_AORTA_50,18,F,AORTA,
1,MACA_18m_F_BM_50,18,F,BM,
2,MACA_18m_F_BM_51,18,F,BM,
3,MACA_18m_F_BREAST_50,18,F,BREAST,
4,MACA_18m_F_BREAST_51,18,F,BREAST,


In [None]:
adata182124aux

In [24]:
adata182124aux = []
for i in range(0,len(maca10x182124metadata)):
    path = '/data/maca/data/' + maca10x182124metadata.channel[i] + '/'
    foo = sc.read(path + 'matrix.mtx', cache=True).transpose()
    foo.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
    foo.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
    foo.obs['Tissue'] = maca10x182124metadata.tissue[i]
    foo.obs['subTissue'] = maca10x182124metadata.subtissue[i]
    foo.obs['Age'] = str(maca10x182124metadata.age[i]) + 'm'
    
    if maca10x182124metadata.sex[i] == 'M':
        foo.obs['Sex'] = 'male'
    else:
        foo.obs['Sex'] = 'female'
    sc.pp.filter_cells(foo, min_genes=250)
    
    foo.obs['Channel'] = maca10x182124metadata.channel[i]# + foo.obs_names.split("-")[0]
    
    foo.obs['cell_ontology_id'] = 'NA' #maca10x3metadata.cell_ontology_class[i]
    #foo.obs['CellTypeID'] = maca10x3metadata.cell_ontology_id[i]
    #foo.obs['FreeAnn'] = maca10x3metadata.free_annotation[i]
    
    adata182124aux.append(foo)
    #print(adata3.X.size*8/1000000) # convert to Mb


first = adata182124aux.pop()
adata182124 = first.concatenate(adata182124aux)
#adata18.obs['Cell'] = adata18.obs['Channel'] + '_' + adata18.obs_names
#adata18.obs['Cell'] = adata18.obs['Cell'].apply(lambda x: pd.Series(x.split('-')))[0]

adata182124.obs.head()

Unnamed: 0,Tissue,subTissue,Age,Sex,n_genes,Channel,Cell,batch
AAACCTGAGGCACATG-1-0,TONGUE,,24m,male,510,MACA_24m_M_TONGUE_60,,0
AAACCTGCAGGGTACA-1-0,TONGUE,,24m,male,2107,MACA_24m_M_TONGUE_60,,0
AAACCTGCAGTAAGCG-1-0,TONGUE,,24m,male,3481,MACA_24m_M_TONGUE_60,,0
AAACCTGTCATTATCC-1-0,TONGUE,,24m,male,2599,MACA_24m_M_TONGUE_60,,0
AAACGGGAGATGAGAG-1-0,TONGUE,,24m,male,410,MACA_24m_M_TONGUE_60,,0


In [32]:
adata182124.obs['Cell'] = adata182124.obs['Channel'] + '_' + adata182124.obs_names
adata182124.obs['Cell'] = adata182124.obs['Cell'].apply(lambda x: pd.Series(x.split('-')))[0]
adata182124.obs.head()

Unnamed: 0,Tissue,subTissue,Age,Sex,n_genes,Channel,Cell,batch
AAACCTGAGGCACATG-1-0,TONGUE,,24m,male,510,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60_AAACCTGAGGCACATG,0
AAACCTGCAGGGTACA-1-0,TONGUE,,24m,male,2107,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60_AAACCTGCAGGGTACA,0
AAACCTGCAGTAAGCG-1-0,TONGUE,,24m,male,3481,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60_AAACCTGCAGTAAGCG,0
AAACCTGTCATTATCC-1-0,TONGUE,,24m,male,2599,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60_AAACCTGTCATTATCC,0
AAACGGGAGATGAGAG-1-0,TONGUE,,24m,male,410,MACA_24m_M_TONGUE_60,MACA_24m_M_TONGUE_60_AAACGGGAGATGAGAG,0


## 3months

In [15]:
path = '/data/maca/data/10x/'
maca10x3metadata = pd.read_csv('/data/maca/data/10x/MACA_10x.csv',usecols = ['channel','tissue','mouse.age','mouse.sex','pool'])
maca10x3metadata2 = pd.read_csv('/data/maca/data/10x/TM_droplet_metadata.csv', low_memory=False)

maca10x3metadata = maca10x3metadata.rename(columns = {'mouse.age':'age','mouse.sex':'sex'})
#maca10x3metadata = maca10x3metadata[maca10x3metadata.age == 3]
maca10x3metadata.index = range(len(maca10x3metadata))


In [16]:
len(maca10x3metadata2)

70118

In [17]:
len(maca10x3metadata)

94

In [18]:
maca10x3metadata


Unnamed: 0,channel,tissue,age,sex,pool
0,10X_P1_1,Kidney,30,M,1
1,10X_P1_2,Spleen,30,M,1
2,10X_P1_3,Heart,30,M,1
3,10X_P1_4,Heart,30,M,1
4,10X_P1_5,Marrow,30,M,1
5,10X_P1_6,Lung,30,M,1
6,10X_P1_7,Pancreas,30,M,1
7,10X_P1_8,Colon,30,M,1
8,10X_P1_9,Kidney,30,M,1
9,10X_P1_10,Spleen,30,M,1


In [19]:
adata3aux = []
for i in range(0,len(maca10x3metadata)):
    #print(i)
    path = '/data/maca/data/10x/' + maca10x3metadata.channel[i] + '/'
    foo = sc.read(path + 'matrix.mtx', cache=True).transpose()
    foo.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
    foo.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
    foo.obs['Tissue'] = maca10x3metadata.tissue[i]
    #foo.obs['subTissue'] = maca10x3metadata.subtissue[i]
    foo.obs['Age'] = str(maca10x3metadata.age[i]) + 'm'
    
    if maca10x3metadata.sex[i] == 'M':
        foo.obs['Sex'] = 'male'
    else:
        foo.obs['Sex'] = 'female'
    sc.pp.filter_cells(foo, min_genes=250)
    
    foo.obs['Channel'] = maca10x3metadata.channel[i]# + foo.obs_names.split("-")[0]
    
    foo.obs['CellType'] = 'NA' #maca10x3metadata.cell_ontology_class[i]
    #foo.obs['CellTypeID'] = maca10x3metadata.cell_ontology_id[i]
    #foo.obs['FreeAnn'] = maca10x3metadata.free_annotation[i]
    
    adata3aux.append(foo)
    #print(adata3.X.size*8/1000000) # convert to Mb


In [20]:
first = adata3aux.pop()
adata3 = first.concatenate(adata3aux)
adata3.obs['Cell'] = adata3.obs['Channel'] + '_' + adata3.obs_names
adata3.obs['Cell'] = adata3.obs['Cell'].apply(lambda x: pd.Series(x.split('-')))[0]

In [None]:
# use join to bring in the rest of the meatadata #anndata.__version__ # check version

In [31]:
adata3.obs.head()

Unnamed: 0,Tissue,Age,Sex,n_genes,Channel,CellType,batch,Cell
AAACCTGAGAGCCCAA-1-0,Trachea,3m,male,669,10X_P8_15,,0,10X_P8_15_AAACCTGAGAGCCCAA
AAACCTGAGCTAGTCT-1-0,Trachea,3m,male,1071,10X_P8_15,,0,10X_P8_15_AAACCTGAGCTAGTCT
AAACCTGAGGATGTAT-1-0,Trachea,3m,male,1311,10X_P8_15,,0,10X_P8_15_AAACCTGAGGATGTAT
AAACCTGAGGGATACC-1-0,Trachea,3m,male,1804,10X_P8_15,,0,10X_P8_15_AAACCTGAGGGATACC
AAACCTGAGTTTAGGA-1-0,Trachea,3m,male,1085,10X_P8_15,,0,10X_P8_15_AAACCTGAGTTTAGGA


In [21]:
adata3

AnnData object with n_obs × n_vars = 205714 × 23433 
    obs: 'Tissue', 'Age', 'Sex', 'n_genes', 'Channel', 'CellType', 'batch', 'Cell'

## concatenate all time points

In [22]:
#adata = adata18.concatenate([adata21,adata24,adata3])
adata = adata182124.concatenate(adata3)
#adata.write('./write/maca.h5ad')

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  obs = pd.concat(out_obss)


In [None]:
adata = read_h5ad('./write/maca.h5ad')
type(adata.X)


In [None]:
adata.shape

In [23]:
adata

AnnData object with n_obs × n_vars = 426361 × 23433 
    obs: 'Age', 'Cell', 'CellType', 'Channel', 'Sex', 'Tissue', 'batch', 'n_genes', 'subTissue'

In [None]:
adata.obs.head()

# Preprocessing

In [None]:
sc.pp.filter_genes(adata, min_cells=5)
sc.pp.filter_cells(adata, min_genes=250)

In [None]:
426360/23433

In [None]:
adata

In [None]:
# add the total counts per cell as observations-annotation to adata
adata.obs['n_counts'] = np.sum(adata.X, axis=1).A1

In [None]:
adata

In [None]:
axs = sc.pl.violin(adata, ['n_genes', 'n_counts'],
                   jitter=0.4, multi_panel=True)

In [None]:
ax = sc.pl.scatter(adata, x='n_counts', y='n_genes')

In [None]:
sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4) #simple lib size normalization?
adata.raw = adata

In [None]:
filter_result = sc.pp.filter_genes_dispersion(
    adata.X, min_mean=0.0125, max_mean=10, min_disp=0.5)
sc.pl.filter_genes_dispersion(filter_result)

In [None]:
adata = adata[:, filter_result.gene_subset]
adata.shape

In [None]:
sc.pp.log1p(adata)

In [None]:
sc.pp.scale(adata, max_value=10, zero_center=False)

# Exploration
## PCA

In [None]:
sc.tl.pca(adata)

In [None]:
ax = sc.pl.pca_scatter(adata, color=['Tissue'], right_margin=0.5)

In [None]:
ax = sc.pl.pca_scatter(adata, color=['Age'], right_margin=0.5)

In [None]:
ax = sc.pl.pca_scatter(adata, color=['Sex'], right_margin=0.5)

In [None]:
ax = sc.pl.pca_scatter(adata, color='n_counts', right_margin=0.5)

In [None]:
sc.pl.pca_variance_ratio(adata, log=True)

## Louvain clustering

In [None]:
sc.pp.neighbors(adata, n_neighbors=15)#, method='gauss')
sc.tl.louvain(adata, resolution = 0.3)

In [None]:
adata

## UMAP

In [None]:
sc.tl.umap(adata)

In [None]:
sc.settings.set_figure_params(dpi=200)
sc.pl.umap(adata, color=['Tissue'], right_margin=0.5)

In [None]:
sc.settings.set_figure_params(dpi=200)
sc.pl.umap(adata, color=['louvain'], right_margin=0.5)

## tSNE

In [None]:
sc.tl.tsne(adata, perplexity=50)

In [None]:
#dill.dump_session('scanpy-maca_notebook.db')

In [None]:
sc.settings.set_figure_params(dpi=200)
sc.pl.tsne(adata, color=['Tissue'], right_margin=0.5)

In [None]:
sc.pl.tsne(adata, color=['Sex'], right_margin=0.5)

In [None]:
sc.pl.tsne(adata, color=['louvain'], right_margin=0.5)

In [None]:
adata.shape
#(adata.obs['Tissue'] == "Bladder").shape

In [None]:
sc.pl.tsne(adata[adata.obs['Tissue'] == "Tongue",:], color=['Age'], right_margin=0.5)

In [None]:
(adata.obs['Tissue'] == "Tongue").head()

In [None]:
sc.pl.tsne(adata[adata.obs['louvain'] == "6",:], color=['Tissue'], right_margin=0.5)

In [None]:
sc.pl.violin(adata, ['Cdkn2a'],groupby='Age', rotation=90)#, save='.pdf')

In [None]:
#### sc.pl.tsne(adata, color=['louvain'], right_margin=0.5)

## tSNE per tissue

In [None]:
tiss = adata[adata.obs['Tissue'] == "Bladder",:]
tiss

In [None]:
adata_3m = adata[adata.obs['Age'] == "3m",:]
sc.pl.violin(adata_3m, ['Cdkn2a'],groupby='Tissue', rotation=90)#, save='.pdf')

In [None]:
adata_18m = adata[adata.obs['Age'] == "18m",:]
sc.pl.violin(adata_18m, ['Cdkn2a'],groupby='Tissue', rotation=90)#, save='.pdf')

In [None]:
sc.pp.neighbors(tiss, n_neighbors=15)#, method='gauss')

In [None]:
sc.tl.louvain(tiss, resolution = 0.2)

In [None]:
sc.tl.tsne(tiss, perplexity=50)

In [None]:
tiss

In [None]:
sc.pl.tsne(tiss, color=['louvain'], right_margin=0.5)

## finding marker genes

In [None]:
sc.tl.rank_genes_groups(tiss, 'louvain')
sc.pl.rank_genes_groups(tiss, n_genes=20)#, save='.pdf')
#adata.write(results_file)

In [None]:
sc.tl.rank_genes_groups(tiss, 'louvain', method='logreg')
sc.pl.rank_genes_groups(tiss, n_genes=20)

In [None]:
pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(5)

## Force-directed graph

In [None]:
sc.tl.draw_graph(adata) # be patient here...

In [None]:
sc.pl.draw_graph(adata, color=['Tissue'])

In [None]:
sc.pl.draw_graph(adata, color=['Age'])

In [None]:
sc.pl.draw_graph(adata, color=['Sex'])

In [None]:
sc.pl.draw_graph(adata, color=['louvain'])

# Pseudotime analysis

In [None]:
#sc.tl.dpt(adata, n_branchings=1)

In [None]:
#sc.pl.diffmap(adata, color=['dpt_pseudotime', 'dpt_groups', 'age'])

In [None]:
#sc.pl.diffmap(adata, color=['dpt_pseudotime'])

In [None]:
#sc.pl.diffmap(adata, color=['dpt_groups'])

In [None]:
#sc.pl.diffmap(adata, color=['Age'])

# Save processed data

In [None]:
adata.write('./write/maca.processed.h5ad')