### adapted from https://github.com/dpcook/fun_analysis/blob/master/tabula_muris/mouse_atlas_scanpy.ipynb
### and https://nbviewer.jupyter.org/github/theislab/scanpy_usage/blob/master/170505_seurat/seurat.ipynb


In [None]:
import gc
gc.collect()


In [None]:
import numpy as np
import pandas as pd
import csv
import scanpy.api as sc
from igraph import *
from MulticoreTSNE import MulticoreTSNE as TSNE #faster TSNE alternative
from anndata import read_h5ad
sc.logging.print_versions()
results_file = './write/maca.processed.h5ad'


# Load data

In [None]:
adata = read_h5ad('./write/maca.h5ad')

### 24 months

In [None]:
# males

path = '/data/maca/data/MACA_24m_M_BAT_58/'
adata_bat_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_bat_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_bat_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_bat_24m_m_58.obs['Tissue'] = 'Bat'
adata_bat_24m_m_58.obs['Age'] = '24m'
adata_bat_24m_m_58.obs['Sex'] = 'male'
adata_bat_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_bat_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_BLADDER_58/'
adata_bladder_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_bladder_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_bladder_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_bladder_24m_m_58.obs['Tissue'] = 'Bladder'
adata_bladder_24m_m_58.obs['Age'] = '24m'
adata_bladder_24m_m_58.obs['Sex'] = 'male'
adata_bladder_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_bladder_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_BLADDER_59/'
adata_bladder_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_bladder_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_bladder_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_bladder_24m_m_59.obs['Tissue'] = 'Bladder'
adata_bladder_24m_m_59.obs['Age'] = '24m'
adata_bladder_24m_m_59.obs['Sex'] = 'male'
adata_bladder_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_bladder_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_BLADDER_60/'
adata_bladder_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_bladder_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_bladder_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_bladder_24m_m_60.obs['Tissue'] = 'Bladder'
adata_bladder_24m_m_60.obs['Age'] = '24m'
adata_bladder_24m_m_60.obs['Sex'] = 'male'
adata_bladder_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_bladder_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_BM_58/'
adata_marrow_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_marrow_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_marrow_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_marrow_24m_m_58.obs['Tissue'] = 'Marrow'
adata_marrow_24m_m_58.obs['Age'] = '24m'
adata_marrow_24m_m_58.obs['Sex'] = 'male'
adata_marrow_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_marrow_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_BM_59/'
adata_marrow_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_marrow_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_marrow_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_marrow_24m_m_59.obs['Tissue'] = 'Marrow'
adata_marrow_24m_m_59.obs['Age'] = '24m'
adata_marrow_24m_m_59.obs['Sex'] = 'male'
adata_marrow_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_marrow_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_BM_60/'
adata_marrow_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_marrow_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_marrow_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_marrow_24m_m_60.obs['Tissue'] = 'Marrow'
adata_marrow_24m_m_60.obs['Age'] = '24m'
adata_marrow_24m_m_60.obs['Sex'] = 'male'
adata_marrow_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_marrow_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_BM_61/'
adata_marrow_24m_m_61 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_marrow_24m_m_61.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_marrow_24m_m_61.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_marrow_24m_m_61.obs['Tissue'] = 'Marrow'
adata_marrow_24m_m_61.obs['Age'] = '24m'
adata_marrow_24m_m_61.obs['Sex'] = 'male'
adata_marrow_24m_m_61.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_marrow_24m_m_61, min_genes=250)

path = '/data/maca/data/MACA_24m_M_GAT_58/'
adata_gat_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_gat_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_gat_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_gat_24m_m_58.obs['Tissue'] = 'Gat'
adata_gat_24m_m_58.obs['Age'] = '24m'
adata_gat_24m_m_58.obs['Sex'] = 'male'
adata_gat_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_gat_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_GAT_59/'
adata_gat_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_gat_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_gat_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_gat_24m_m_59.obs['Tissue'] = 'Gat'
adata_gat_24m_m_59.obs['Age'] = '24m'
adata_gat_24m_m_59.obs['Sex'] = 'male'
adata_gat_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_gat_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_HEART_58/'
adata_heart_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_heart_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_heart_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_heart_24m_m_58.obs['Tissue'] = 'Heart'
adata_heart_24m_m_58.obs['Age'] = '24m'
adata_heart_24m_m_58.obs['Sex'] = 'male'
adata_heart_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_heart_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_HEART_59/'
adata_heart_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_heart_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_heart_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_heart_24m_m_59.obs['Tissue'] = 'Heart'
adata_heart_24m_m_59.obs['Age'] = '24m'
adata_heart_24m_m_59.obs['Sex'] = 'male'
adata_heart_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_heart_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_HEART_60/'
adata_heart_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_heart_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_heart_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_heart_24m_m_60.obs['Tissue'] = 'Heart'
adata_heart_24m_m_60.obs['Age'] = '24m'
adata_heart_24m_m_60.obs['Sex'] = 'male'
adata_heart_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_heart_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_HEART_61/'
adata_heart_24m_m_61 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_heart_24m_m_61.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_heart_24m_m_61.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_heart_24m_m_61.obs['Tissue'] = 'Heart'
adata_heart_24m_m_61.obs['Age'] = '24m'
adata_heart_24m_m_61.obs['Sex'] = 'male'
adata_heart_24m_m_61.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_heart_24m_m_61, min_genes=250)

path = '/data/maca/data/MACA_24m_M_HEPATOCYTES_58/'
adata_hepatocytes_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_hepatocytes_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_hepatocytes_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_hepatocytes_24m_m_58.obs['Tissue'] = 'Hepatocytes'
adata_hepatocytes_24m_m_58.obs['Age'] = '24m'
adata_hepatocytes_24m_m_58.obs['Sex'] = 'male'
adata_hepatocytes_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_hepatocytes_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_HEPATOCYTES_59/'
adata_hepatocytes_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_hepatocytes_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_hepatocytes_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_hepatocytes_24m_m_59.obs['Tissue'] = 'Hepatocytes'
adata_hepatocytes_24m_m_59.obs['Age'] = '24m'
adata_hepatocytes_24m_m_59.obs['Sex'] = 'male'
adata_hepatocytes_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_hepatocytes_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_KIDNEY_58/'
adata_kidney_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_kidney_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_kidney_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_kidney_24m_m_58.obs['Tissue'] = 'Kidney'
adata_kidney_24m_m_58.obs['Age'] = '24m'
adata_kidney_24m_m_58.obs['Sex'] = 'male'
adata_kidney_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_kidney_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_KIDNEY_59/'
adata_kidney_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_kidney_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_kidney_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_kidney_24m_m_59.obs['Tissue'] = 'Kidney'
adata_kidney_24m_m_59.obs['Age'] = '24m'
adata_kidney_24m_m_59.obs['Sex'] = 'male'
adata_kidney_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_kidney_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_KIDNEY_60/'
adata_kidney_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_kidney_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_kidney_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_kidney_24m_m_60.obs['Tissue'] = 'Kidney'
adata_kidney_24m_m_60.obs['Age'] = '24m'
adata_kidney_24m_m_60.obs['Sex'] = 'male'
adata_kidney_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_kidney_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_KIDNEY_61/'
adata_kidney_24m_m_61 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_kidney_24m_m_61.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_kidney_24m_m_61.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_kidney_24m_m_61.obs['Tissue'] = 'Kidney'
adata_kidney_24m_m_61.obs['Age'] = '24m'
adata_kidney_24m_m_61.obs['Sex'] = 'male'
adata_kidney_24m_m_61.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_kidney_24m_m_61, min_genes=250)

path = '/data/maca/data/MACA_24m_M_LUNG_60/'
adata_lung_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_lung_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_lung_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_lung_24m_m_60.obs['Tissue'] = 'Lung'
adata_lung_24m_m_60.obs['Age'] = '24m'
adata_lung_24m_m_60.obs['Sex'] = 'male'
adata_lung_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_lung_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_LUNG_61/'
adata_lung_24m_m_61 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_lung_24m_m_61.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_lung_24m_m_61.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_lung_24m_m_61.obs['Tissue'] = 'Lung'
adata_lung_24m_m_61.obs['Age'] = '24m'
adata_lung_24m_m_61.obs['Sex'] = 'male'
adata_lung_24m_m_61.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_lung_24m_m_61, min_genes=250)

path = '/data/maca/data/MACA_24m_M_MAT_58/'
adata_mat_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_mat_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_mat_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_mat_24m_m_58.obs['Tissue'] = 'Mat'
adata_mat_24m_m_58.obs['Age'] = '24m'
adata_mat_24m_m_58.obs['Sex'] = 'male'
adata_mat_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_mat_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_MAT_59/'
adata_mat_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_mat_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_mat_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_mat_24m_m_59.obs['Tissue'] = 'Mat'
adata_mat_24m_m_59.obs['Age'] = '24m'
adata_mat_24m_m_59.obs['Sex'] = 'male'
adata_mat_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_mat_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_MUSCLE_58/'
adata_muscle_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_muscle_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_muscle_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_muscle_24m_m_58.obs['Tissue'] = 'Muscle'
adata_muscle_24m_m_58.obs['Age'] = '24m'
adata_muscle_24m_m_58.obs['Sex'] = 'male'
adata_muscle_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_muscle_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_MUSCLE_59/'
adata_muscle_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_muscle_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_muscle_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_muscle_24m_m_59.obs['Tissue'] = 'Muscle'
adata_muscle_24m_m_59.obs['Age'] = '24m'
adata_muscle_24m_m_59.obs['Sex'] = 'male'
adata_muscle_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_muscle_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_MUSCLE_60/'
adata_muscle_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_muscle_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_muscle_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_muscle_24m_m_60.obs['Tissue'] = 'Muscle'
adata_muscle_24m_m_60.obs['Age'] = '24m'
adata_muscle_24m_m_60.obs['Sex'] = 'male'
adata_muscle_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_muscle_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_MUSCLE_61/'
adata_muscle_24m_m_61 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_muscle_24m_m_61.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_muscle_24m_m_61.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_muscle_24m_m_61.obs['Tissue'] = 'Muscle'
adata_muscle_24m_m_61.obs['Age'] = '24m'
adata_muscle_24m_m_61.obs['Sex'] = 'male'
adata_muscle_24m_m_61.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_muscle_24m_m_61, min_genes=250)

path = '/data/maca/data/MACA_24m_M_PANCREASE_EXO_60/'
adata_pancrease_exocrine_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_pancrease_exocrine_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_pancrease_exocrine_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_pancrease_exocrine_24m_m_60.obs['Tissue'] = 'Pancrease_exocrine'
adata_pancrease_exocrine_24m_m_60.obs['Age'] = '24m'
adata_pancrease_exocrine_24m_m_60.obs['Sex'] = 'male'
adata_pancrease_exocrine_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_pancrease_exocrine_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_SCAT_58/'
adata_scat_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_scat_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_scat_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_scat_24m_m_58.obs['Tissue'] = 'Scat'
adata_scat_24m_m_58.obs['Age'] = '24m'
adata_scat_24m_m_58.obs['Sex'] = 'male'
adata_scat_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_scat_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_SPLEEN_58/'
adata_spleen_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_spleen_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_spleen_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_spleen_24m_m_58.obs['Tissue'] = 'Spleen'
adata_spleen_24m_m_58.obs['Age'] = '24m'
adata_spleen_24m_m_58.obs['Sex'] = 'male'
adata_spleen_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_spleen_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_SPLEEN_59/'
adata_spleen_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_spleen_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_spleen_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_spleen_24m_m_59.obs['Tissue'] = 'Spleen'
adata_spleen_24m_m_59.obs['Age'] = '24m'
adata_spleen_24m_m_59.obs['Sex'] = 'male'
adata_spleen_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_spleen_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_SPLEEN_60/'
adata_spleen_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_spleen_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_spleen_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_spleen_24m_m_60.obs['Tissue'] = 'Spleen'
adata_spleen_24m_m_60.obs['Age'] = '24m'
adata_spleen_24m_m_60.obs['Sex'] = 'male'
adata_spleen_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_spleen_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_SPLEEN_61/'
adata_spleen_24m_m_61 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_spleen_24m_m_61.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_spleen_24m_m_61.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_spleen_24m_m_61.obs['Tissue'] = 'Spleen'
adata_spleen_24m_m_61.obs['Age'] = '24m'
adata_spleen_24m_m_61.obs['Sex'] = 'male'
adata_spleen_24m_m_61.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_spleen_24m_m_61, min_genes=250)

path = '/data/maca/data/MACA_24m_M_THYMUS_58/'
adata_thymus_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_thymus_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_thymus_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_thymus_24m_m_58.obs['Tissue'] = 'Thymus'
adata_thymus_24m_m_58.obs['Age'] = '24m'
adata_thymus_24m_m_58.obs['Sex'] = 'male'
adata_thymus_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_thymus_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_THYMUS_59/'
adata_thymus_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_thymus_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_thymus_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_thymus_24m_m_59.obs['Tissue'] = 'Thymus'
adata_thymus_24m_m_59.obs['Age'] = '24m'
adata_thymus_24m_m_59.obs['Sex'] = 'male'
adata_thymus_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_thymus_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_THYMUS_60/'
adata_thymus_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_thymus_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_thymus_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_thymus_24m_m_60.obs['Tissue'] = 'Thymus'
adata_thymus_24m_m_60.obs['Age'] = '24m'
adata_thymus_24m_m_60.obs['Sex'] = 'male'
adata_thymus_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_thymus_24m_m_60, min_genes=250)

path = '/data/maca/data/MACA_24m_M_TONGUE_58/'
adata_tongue_24m_m_58 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_tongue_24m_m_58.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_tongue_24m_m_58.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_tongue_24m_m_58.obs['Tissue'] = 'Tongue'
adata_tongue_24m_m_58.obs['Age'] = '24m'
adata_tongue_24m_m_58.obs['Sex'] = 'male'
adata_tongue_24m_m_58.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_tongue_24m_m_58, min_genes=250)

path = '/data/maca/data/MACA_24m_M_TONGUE_59/'
adata_tongue_24m_m_59 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_tongue_24m_m_59.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_tongue_24m_m_59.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_tongue_24m_m_59.obs['Tissue'] = 'Tongue'
adata_tongue_24m_m_59.obs['Age'] = '24m'
adata_tongue_24m_m_59.obs['Sex'] = 'male'
adata_tongue_24m_m_59.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_tongue_24m_m_59, min_genes=250)

path = '/data/maca/data/MACA_24m_M_TONGUE_60/'
adata_tongue_24m_m_60 = sc.read(path + 'matrix.mtx', cache=True).transpose()
adata_tongue_24m_m_60.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
adata_tongue_24m_m_60.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
adata_tongue_24m_m_60.obs['Tissue'] = 'Tongue'
adata_tongue_24m_m_60.obs['Age'] = '24m'
adata_tongue_24m_m_60.obs['Sex'] = 'male'
adata_tongue_24m_m_60.obs['Cell'] = 'NA'
sc.pp.filter_cells(adata_tongue_24m_m_60, min_genes=250)




In [None]:
adata24 = adata_bladder_24m_m_58.concatenate([adata_bladder_24m_m_59,
                                       adata_bladder_24m_m_60,
                                       adata_marrow_24m_m_58,
                                       adata_marrow_24m_m_59,
                                       adata_marrow_24m_m_60,
                                       adata_marrow_24m_m_61,
                                       adata_heart_24m_m_58,
                                       adata_heart_24m_m_59,
                                       adata_hepatocytes_24m_m_58,
                                       adata_hepatocytes_24m_m_59,
                                       adata_kidney_24m_m_58,
                                       adata_kidney_24m_m_59,
                                         adata_muscle_24m_m_58,
                                          adata_muscle_24m_m_59,
                                          adata_muscle_24m_m_60,
                                          adata_muscle_24m_m_61,
                                          adata_pancrease_exocrine_24m_m_60,
                                          adata_spleen_24m_m_58,
                                          adata_spleen_24m_m_59,
                                          adata_thymus_24m_m_60,
                                          adata_tongue_24m_m_58,
                                          adata_tongue_24m_m_59,
                                          adata_tongue_24m_m_60])

adata24.obs["CellType"]='NA'
adata24

### 3m

In [None]:
path = '/data/maca/data/10x/'
maca10x3metadata = pd.read_csv('/data/maca/data/10x/MACA_10x.csv',usecols = ['channel','tissue','mouse.age','mouse.sex','pool'])
maca10x3metadata2 = pd.read_csv('/data/maca/data/10x/TM_droplet_metadata.csv', low_memory=False)

maca10x3metadata = maca10x3metadata.rename(columns = {'mouse.age':'age','mouse.sex':'sex'})
#maca10x3metadata = maca10x3metadata[maca10x3metadata.age == 3]
maca10x3metadata.index = range(len(maca10x3metadata))

adata3aux = []
for i in range(0,len(maca10x3metadata)):
    #print(i)
    path = '/data/maca/data/10x/' + maca10x3metadata.channel[i] + '/'
    foo = sc.read(path + 'matrix.mtx', cache=True).transpose()
    foo.var_names = np.genfromtxt(path + 'genes.tsv', dtype=str)[:, 1]
    foo.obs_names = np.genfromtxt(path + 'barcodes.tsv', dtype=str)
    foo.obs['Tissue'] = maca10x3metadata.tissue[i]
    #foo.obs['subTissue'] = maca10x3metadata.subtissue[i]
    foo.obs['Age'] = str(maca10x3metadata.age[i]) + 'm'
    
    if maca10x3metadata.sex[i] == 'M':
        foo.obs['Sex'] = 'male'
    else:
        foo.obs['Sex'] = 'female'
    sc.pp.filter_cells(foo, min_genes=250)
    
    foo.obs['Channel'] = maca10x3metadata.channel[i]# + foo.obs_names.split("-")[0]
    
    foo.obs['CellType'] = 'NA' #maca10x3metadata.cell_ontology_class[i]
    #foo.obs['CellTypeID'] = maca10x3metadata.cell_ontology_id[i]
    #foo.obs['FreeAnn'] = maca10x3metadata.free_annotation[i]
    
    adata3aux.append(foo)
    #print(adata3.X.size*8/1000000) # convert to Mb

first = adata3aux.pop()
adata3 = first.concatenate(adata3aux)
adata3.obs['Cell'] = adata3.obs['Channel'] + '_' + adata3.obs_names
adata3.obs['Cell'] = adata3.obs['Cell'].apply(lambda x: pd.Series(x.split('-')))[0]

In [None]:
adata = adata24.concatenate([adata3])


# Preprocessing

In [None]:
sc.pp.filter_genes(adata, min_cells=5)
sc.pp.filter_cells(adata, min_genes=250)

In [None]:
adata

In [None]:
# add the total counts per cell as observations-annotation to adata
adata.obs['n_counts'] = np.sum(adata.X, axis=1).A1

In [None]:
adata

In [None]:
axs = sc.pl.violin(adata, ['n_genes', 'n_counts'],
                   jitter=0.4, multi_panel=True)

In [None]:
ax = sc.pl.scatter(adata, x='n_counts', y='n_genes')

In [None]:
adata.raw = sc.pp.log1p(adata, copy=True) # freezes the state of the AnnData object returned by sc.pp.log1p
sc.pp.normalize_per_cell(adata, counts_per_cell_after=1e4) #simple lib size normalization?

In [None]:
filter_result = sc.pp.filter_genes_dispersion(
    adata.X, min_mean=0.0125, max_mean=10, min_disp=0.5)
sc.pl.filter_genes_dispersion(filter_result)

In [None]:
adata = adata[:, filter_result.gene_subset]

In [None]:
sc.pp.log1p(adata)

In [None]:
sc.pp.scale(adata, max_value=10)

In [None]:
#adata.write(results_file)

# Exploration


## Choose a tissue

In [None]:
tissofinterest = "Muscle"
tiss = adata[adata.obs['Tissue'] == tissofinterest,:]
tiss2 = tiss.obs[tiss.obs['Age'] == '3m']
tiss3 = tiss.obs[tiss.obs['Age'] == '24m']


In [None]:
tiss

In [None]:
tissaux = tiss2.append(tiss3)
tiss = adata[adata.obs.index.isin(tissaux.index),:]

In [None]:
tiss

## PCA

In [None]:
sc.tl.pca(tiss)

In [None]:
tiss

In [None]:
ax = sc.pl.pca_scatter(tiss, color=['Tissue'], right_margin=0.5)

In [None]:
ax = sc.pl.pca_scatter(tiss, color=['Age'], right_margin=0.5)

In [None]:
ax = sc.pl.pca_scatter(tiss, color=['Sex'], right_margin=0.5)

In [None]:
ax = sc.pl.pca_scatter(tiss, color='n_counts', right_margin=0.5)

In [None]:
sc.pl.pca_variance_ratio(tiss, log=True)

## Louvain clustering

In [None]:
sc.pp.neighbors(tiss, n_neighbors=15)#, method='gauss')
sc.tl.louvain(tiss, resolution = 0.2)

In [None]:
tiss

## UMAP

In [None]:
sc.tl.umap(tiss)

In [None]:
sc.settings.set_figure_params(dpi=200)
sc.pl.umap(tiss, color=['Tissue'], right_margin=0.5)

In [None]:
sc.settings.set_figure_params(dpi=200)
sc.pl.umap(tiss, color=['louvain'], right_margin=0.5)

In [None]:
sc.settings.set_figure_params(dpi=200)
sc.pl.umap(tiss, color=['Age'], right_margin=0.5)

## tSNE

In [None]:
sc.tl.tsne(tiss, perplexity=50)

In [None]:
sc.settings.set_figure_params(dpi=200)
sc.pl.tsne(tiss, color=['Tissue'], right_margin=0.5)

In [None]:
sc.pl.tsne(tiss, color=['Sex'], right_margin=0.5)

In [None]:
sc.pl.tsne(tiss, color=['Age'], right_margin=0.5)

In [None]:
sc.pl.tsne(tiss, color=['louvain'],right_margin=0.5)

## read the annotations from tabula muris

In [None]:
maca10x3metadata2 = pd.read_csv('/data/maca/data/10x/TM_droplet_metadata.csv', low_memory=False)
maca10x3metadata2 = maca10x3metadata2.rename(columns = {'cell':'Cell'})
len(maca10x3metadata2)

In [None]:
ageofinterest = "3m"
tissage = tiss[tiss.obs['Age'] == ageofinterest,:]
tissage

In [None]:
tissage.obs.head()

In [None]:
maca10x3metadata2.head()

In [None]:
merged_inner = pd.merge(left=tissage.obs,right=maca10x3metadata2, left_on='Cell', right_on='Cell')
#merged_inner = merged_inner[['Age','Cell','Channel','Sex','Tissue','batch','n_genes','n_counts','cell_ontology_class','free_annotation']]
merged_inner.head()

In [None]:
tissage.obs["cell_ontology_class"] = merged_inner["cell_ontology_class"]
tissage.obs["free_annotation"] = merged_inner["free_annotation"]
tissage.obs

In [None]:
tissage.obs = tissage.obs.reset_index()

In [None]:
tissage.obs["cell_ontology_class"] = merged_inner["cell_ontology_class"]
tissage.obs["free_annotation"] = merged_inner["free_annotation"]
tissage.obs

In [None]:
tissage.obs.head()

In [None]:
#sc.tl.pca(tissage)
#sc.pp.neighbors(tissage, n_neighbors=15)#, method='gauss')
#sc.tl.louvain(tissage, resolution = 0.3)
#sc.tl.tsne(tissage, perplexity=50)

In [None]:
sc.pl.tsne(tissage, color=['louvain'],right_margin=0.5)

In [None]:
sc.pl.tsne(tissage, color=['cell_ontology_class'],right_margin=0.5)

In [None]:
sc.pl.tsne(tissage, color=['free_annotation'],right_margin=0.5)

In [None]:
df = tissage.obs[tissage.obs['louvain'].str.match('2')]['cell_ontology_class']
df.reset_index()
df = df.reset_index()
display(df.groupby('cell_ontology_class').count())
display(df.groupby('cell_ontology_class').count().sum())
display(df.groupby('cell_ontology_class').count()/df.groupby('cell_ontology_class').count().sum())
dfaux = df.groupby('cell_ontology_class').count()/df.groupby('cell_ontology_class').count().sum()
dfaux.reset_index()
dfaux = dfaux.reset_index()
display(dfaux[dfaux['index']>0.95][['cell_ontology_class']])
#dfdf.sum()
#df.drop_duplicates()
#tissage.obs['louvain']
#val = dfaux[dfaux['index']>0.95][['cell_ontology_class']].values[0]
#print(val)

In [None]:
tiss_cell_ontology_class = {}
for i in range(0,tissage.obs['louvain'].nunique()):
    df = tissage.obs[tissage.obs['louvain'].str.match(str(i))]['cell_ontology_class']
    df.reset_index()
    df = df.reset_index()
    #df.groupby('cell_ontology_class').count()
    #df.groupby('cell_ontology_class').count().sum()
    #df.groupby('cell_ontology_class').count()/df.groupby('cell_ontology_class').count().sum()
    dfaux = df.groupby('cell_ontology_class').count()/df.groupby('cell_ontology_class').count().sum()
    dfaux.reset_index()
    dfaux = dfaux.reset_index()
    #display(dfaux[dfaux['index']>0.95][['cell_ontology_class']])
    #dfdfaux = pd.concat([dfdfaux,dfaux[dfaux['index']>0.95][['cell_ontology_class']]])
    a = dfaux[dfaux['index']>0.95][['cell_ontology_class']]
    if a.empty:
        tiss_cell_ontology_class[i] = 'tbc'
    else:
        tiss_cell_ontology_class[i] = a.values[0]

tiss_cell_ontology_class



In [None]:
outfile = open('dict.txt', 'w' )
for key, value in sorted( mydict.items() ):
    outfile.write( str(key) + '\t' + str(value) + '\n' )

In [None]:
tiss.obs['louvain'] = tiss.obs['louvain'].apply(pd.to_numeric)
tiss.obs['cell_ontology_class'] = tiss.obs['louvain'].map(tiss_cell_ontology_class)
tiss.obs

In [None]:
sc.pl.tsne(tiss, color=['louvain'],right_margin=0.5)

In [None]:
sc.pl.tsne(tiss, color=['cell_ontology_class'],right_margin=0.5)

In [None]:
sc.pl.umap(tiss, color=['cell_ontology_class'],right_margin=0.5)

## finding marker genes

In [None]:
sc.tl.rank_genes_groups(tiss, 'louvain')
sc.pl.rank_genes_groups(tiss, n_genes=20, save='louvain_clusters_top_genes.pdf')
#adata.write(results_file)

In [None]:
sc.pl.rank_genes_groups_violin(tiss, n_genes=8, groups=['1'])

In [None]:
sc.tl.rank_genes_groups(tiss, 'louvain', method='logreg')
sc.pl.rank_genes_groups(tiss, n_genes=20)

### Show the 10 top ranked genes per cluster in a dataframe.

In [None]:
pd.DataFrame(tiss.uns['rank_genes_groups']['names']).head(10)

### Get a table with scores and groups.



In [None]:
result = tiss.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.DataFrame({group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names', 'scores']}).head(5)

In [None]:
sc.pl.violin(tiss, ['Cdkn2a','Myoc','Gpx3'],groupby='Age', rotation=90)#, save='.pdf')


In [None]:
sc.tl.rank_genes_groups(tiss, 'cell_ontology_class')
sc.pl.rank_genes_groups(tiss, n_genes=20, save='cell_types_top_genes.pdf')

### subset for cell ontology

In [None]:
tiss.obs[tiss.obs['cell_ontology_class'] == tiss.obs['cell_ontology_class'][1]].head()

In [None]:
subtiss = tiss[tiss.obs['cell_ontology_class'] == tiss.obs['cell_ontology_class'][3],:]
sc.tl.rank_genes_groups(subtiss, 'Age', groups=['24m'], reference='3m')#, method='logreg')
sc.pl.rank_genes_groups(subtiss, n_genes=20, groups=['24m'], save='subtiss_louvain_clusters_top_genes.pdf')

## Force-directed graph

In [None]:
sc.tl.draw_graph(adata) # be patient here...

In [None]:
sc.pl.draw_graph(adata, color=['Tissue'])

In [None]:
sc.pl.draw_graph(adata, color=['Age'])

In [None]:
sc.pl.draw_graph(adata, color=['Sex'])

In [None]:
sc.pl.draw_graph(adata, color=['louvain'])

# Pseudotime analysis

In [None]:
sc.pp.neighbors(adata)


In [None]:
sc.tl.diffmap(adata)

In [None]:
sc.tl.dpt(adata, n_branchings=1)

In [None]:
#sc.pl.diffmap(adata, color=['dpt_pseudotime', 'dpt_groups', 'age'])

In [None]:
#sc.pl.diffmap(adata, color=['dpt_pseudotime'])

In [None]:
#sc.pl.diffmap(adata, color=['dpt_groups'])

In [None]:
#sc.pl.diffmap(adata, color=['Age'])

# Save processed data

In [None]:
adata.write('./write/maca.processed.h5ad')