In [5]:
import pandas as pd

import scanpy as sc
import sourmash

### Clean batbam chanel metadata file to join onto adata

In [6]:
bam_channel_metadata = pd.read_csv('/home/olga/data_lg/data_sm_copy/batlas/Bat_Bams/metadata.csv')

In [7]:
# check column names have no spaces
bam_channel_metadata.columns.values

array(['Folder', 'Sex', 'Tissue', 'Animal ID'], dtype=object)

In [8]:
print(bam_channel_metadata.shape)
bam_channel_metadata.head()

(34, 4)


Unnamed: 0,Folder,Sex,Tissue,Animal ID
0,bat2-BL,M,bladder,2
1,bat2-BM,M,bone marrow,2
2,bat2-BR,M,brain,2
3,bat2-FTB,M,brown fat,2
4,bat2-KD,M,kidney,2


In [9]:
# add on missing metadata

from io import StringIO

s = """Folder,Animal ID,Tissue,Sex
bat2-FTW,2,white fat,M
bat2-CO,2,colon,M
bat3-FTB,3,brown fat,M
bat3-FTW,3,white fat,M
"""

df = pd.read_csv(StringIO(s))
# print(df.shape)
# df.head()

bam_channel_metadata_full = pd.concat([
    bam_channel_metadata, 
    df,
])

print(bam_channel_metadata_full.shape)
bam_channel_metadata_full.head()

(38, 4)


Unnamed: 0,Folder,Sex,Tissue,Animal ID
0,bat2-BL,M,bladder,2
1,bat2-BM,M,bone marrow,2
2,bat2-BR,M,brain,2
3,bat2-FTB,M,brown fat,2
4,bat2-KD,M,kidney,2


In [10]:
bat_metadata_index = bam_channel_metadata_full.set_index(["Animal ID", "Tissue", "Sex"])
bat_metadata_index.shape
bat_metadata_index.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Folder
Animal ID,Tissue,Sex,Unnamed: 3_level_1
2,bladder,M,bat2-BL
2,bone marrow,M,bat2-BM
2,brain,M,bat2-BR
2,brown fat,M,bat2-FTB
2,kidney,M,bat2-KD


### Read in and clean adata

In [11]:
ls -lha /home/olga/data_lg/data_sm_copy/batlas/

total 22G
drwxr-xr-x  4 olga czb  4.0K May 12 15:05 [0m[01;34m.[0m/
drwxrwxrwx 34 1004 root 8.0K May  5 08:36 [34;42m..[0m/
drwxr-xr-x  2 olga czb  4.0K Oct 23  2020 [01;34m.nextflow[0m/
-rw-r--r--  1 olga czb  1.1K Oct 23  2020 .nextflow.log
drwxrwxrwx  2 olga czb  4.0K May 11 16:40 [34;42mBat_Bams[0m/
-rw-r--r--  1 olga czb  638M Sep 23  2020 baca-droplet-pre-processed.h5ad
-rw-r--r--  1 olga czb  618M Sep 23  2020 baca-droplet-raw.h5ad
-rw-r--r--  1 olga czb  6.5G May 12 15:06 bat_82k_cells.h5ad
-rw-r--r--  1 olga czb   13M Sep 28  2020 bat_annotate.csv
-rw-r--r--  1 olga czb  7.2G Sep 23  2020 bat_normalized_count.csv
-rw-r--r--  1 olga czb  266M Oct 12  2020 [01;31mbat_normalized_count.csv.tar.gz[0m
-rw-r--r--  1 olga czb  6.5G Sep 23  2020 bat_raw_counts.csv
-rw-r--r--  1 olga czb  237M Oct 16  2020 [01;31mbat_raw_counts.csv.tar.gz[0m
-rw-r--r--  1 olga czb     0 Jul  1  2020 rsync.log
-rw-r--r--  1 olga czb   12M Sep 23  2020 scanpy-baca.ipynb
-rw-r--r--  1 olga czb

In [12]:
ls -lha ~/data_lg/data_sm_copy/immune-evolution/data-objects/bat

total 8.3G
drwxrwxr-x 2 phoenix czb 4.0K Feb  3 09:48 [0m[01;34m.[0m/
drwxrwxr-x 6 phoenix czb 4.0K Mar 31 09:03 [01;34m..[0m/
-rw-r--r-- 1 olga    czb 6.5G Oct 16  2020 bat_82k_cells.h5ad
-rw-r--r-- 1 olga    czb 692M Oct 16  2020 bat_82k_cells__lung_only.h5ad
-rw-r--r-- 1 olga    czb 762K Feb  3 09:48 bat_lung__one2one_orthologs_var.csv
-rw-r--r-- 1 phoenix czb 496M Oct  8  2020 bat_subset_w_shared_one2one_orthologs_human_lemur_mouse_bat.h5ad
-rw-r--r-- 1 phoenix czb  48M Oct  8  2020 bat_subset_w_shared_one2one_orthologs_human_lemur_mouse_bat__lung_only.h5ad
-rw-r--r-- 1 phoenix czb 623M Oct  8  2020 bat_with_cell_type_annotations.h5ad


In [13]:
# cp ~/data_lg/data_sm_copy/immune-evolution/data-objects/bat/bat_82k_cells.h5ad /home/olga/data_lg/data_sm_copy/batlas/

In [14]:
h5ad = '/home/olga/data_lg/data_sm_copy/batlas/bat_82k_cells.h5ad'
adata = sc.read(h5ad)
adata

AnnData object with n_obs × n_vars = 82924 × 20854
    obs: 'animalID', 'batch', 'sex', 'tissue', 'method', 'n_genes', 'n_counts', 'louvain', 'clustertype', 'cluster', 'celltype', 'celltype_tiss', 'clustercellnumber', 'newcelltype', 'clustertonumber', 'tissue_lower', 'sex_abbrev', 'channel', 'cell_barcode', 'cell_id'

In [15]:
adata.obs.head()

Unnamed: 0_level_0,animalID,batch,sex,tissue,method,n_genes,n_counts,louvain,clustertype,cluster,celltype,celltype_tiss,clustercellnumber,newcelltype,clustertonumber,tissue_lower,sex_abbrev,channel,cell_barcode,cell_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
AAACCTGAGAGGGCTT-1-0,3,0,male,Whole Blood,10x,2370,6673.0,0,T cell,Whole Blood_T cell,Immune cell,Whole Blood_Immune cell,354,T cell,181,whole blood,M,bat3-WB,AAACCTGAGAGGGCTT,bat3-WB__AAACCTGAGAGGGCTT
AAACGGGAGACGCAAC-1-0,3,0,male,Whole Blood,10x,1196,2132.0,12,Non-classical monocyte,Whole Blood_Non-classical monocyte,Immune cell,Whole Blood_Immune cell,488,Other immune cell,180,whole blood,M,bat3-WB,AAACGGGAGACGCAAC,bat3-WB__AAACGGGAGACGCAAC
AAAGATGTCAGCTTAG-1-0,3,0,male,Whole Blood,10x,2647,7631.0,8,Classical monocyte,Whole Blood_Classical monocyte,Immune cell,Whole Blood_Immune cell,229,Other immune cell,178,whole blood,M,bat3-WB,AAAGATGTCAGCTTAG,bat3-WB__AAAGATGTCAGCTTAG
AAAGCAAAGCCCAACC-1-0,3,0,male,Whole Blood,10x,556,1048.0,5,Classical monocyte,Whole Blood_Classical monocyte,Immune cell,Whole Blood_Immune cell,229,Other immune cell,178,whole blood,M,bat3-WB,AAAGCAAAGCCCAACC,bat3-WB__AAAGCAAAGCCCAACC
AAAGCAATCAACACCA-1-0,3,0,male,Whole Blood,10x,661,1256.0,0,T cell,Whole Blood_T cell,Immune cell,Whole Blood_Immune cell,354,T cell,181,whole blood,M,bat3-WB,AAAGCAATCAACACCA,bat3-WB__AAAGCAATCAACACCA


In [16]:
adata.var.head()

GORAB
PRRX1
MROH9
FMO3
FMO2


In [26]:
# "ADAR2" in adata.var.index

False

In [17]:
# clean up adata obs tissue names to match bam_channel_metadata
adata.obs["tissue_lower"] = adata.obs["tissue"].str.lower()
adata.obs["sex_abbrev"] = adata.obs["sex"].str[0].str.upper()
adata.obs["animalID"] = adata.obs["animalID"].astype(int)
adata.obs.head()

Unnamed: 0_level_0,animalID,batch,sex,tissue,method,n_genes,n_counts,louvain,clustertype,cluster,celltype,celltype_tiss,clustercellnumber,newcelltype,clustertonumber,tissue_lower,sex_abbrev,channel,cell_barcode,cell_id
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
AAACCTGAGAGGGCTT-1-0,3,0,male,Whole Blood,10x,2370,6673.0,0,T cell,Whole Blood_T cell,Immune cell,Whole Blood_Immune cell,354,T cell,181,whole blood,M,bat3-WB,AAACCTGAGAGGGCTT,bat3-WB__AAACCTGAGAGGGCTT
AAACGGGAGACGCAAC-1-0,3,0,male,Whole Blood,10x,1196,2132.0,12,Non-classical monocyte,Whole Blood_Non-classical monocyte,Immune cell,Whole Blood_Immune cell,488,Other immune cell,180,whole blood,M,bat3-WB,AAACGGGAGACGCAAC,bat3-WB__AAACGGGAGACGCAAC
AAAGATGTCAGCTTAG-1-0,3,0,male,Whole Blood,10x,2647,7631.0,8,Classical monocyte,Whole Blood_Classical monocyte,Immune cell,Whole Blood_Immune cell,229,Other immune cell,178,whole blood,M,bat3-WB,AAAGATGTCAGCTTAG,bat3-WB__AAAGATGTCAGCTTAG
AAAGCAAAGCCCAACC-1-0,3,0,male,Whole Blood,10x,556,1048.0,5,Classical monocyte,Whole Blood_Classical monocyte,Immune cell,Whole Blood_Immune cell,229,Other immune cell,178,whole blood,M,bat3-WB,AAAGCAAAGCCCAACC,bat3-WB__AAAGCAAAGCCCAACC
AAAGCAATCAACACCA-1-0,3,0,male,Whole Blood,10x,661,1256.0,0,T cell,Whole Blood_T cell,Immune cell,Whole Blood_Immune cell,354,T cell,181,whole blood,M,bat3-WB,AAAGCAATCAACACCA,bat3-WB__AAAGCAATCAACACCA


In [18]:
adata_joined_bat_bam = adata.obs.join(
    bat_metadata_index, 
    on = ["animalID", "tissue_lower", "sex_abbrev"],
)

print(adata_joined_bat_bam.shape)
adata_joined_bat_bam.head()

(82924, 21)


Unnamed: 0_level_0,animalID,batch,sex,tissue,method,n_genes,n_counts,louvain,clustertype,cluster,...,celltype_tiss,clustercellnumber,newcelltype,clustertonumber,tissue_lower,sex_abbrev,channel,cell_barcode,cell_id,Folder
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACCTGAGAGGGCTT-1-0,3,0,male,Whole Blood,10x,2370,6673.0,0,T cell,Whole Blood_T cell,...,Whole Blood_Immune cell,354,T cell,181,whole blood,M,bat3-WB,AAACCTGAGAGGGCTT,bat3-WB__AAACCTGAGAGGGCTT,bat3-WB
AAACGGGAGACGCAAC-1-0,3,0,male,Whole Blood,10x,1196,2132.0,12,Non-classical monocyte,Whole Blood_Non-classical monocyte,...,Whole Blood_Immune cell,488,Other immune cell,180,whole blood,M,bat3-WB,AAACGGGAGACGCAAC,bat3-WB__AAACGGGAGACGCAAC,bat3-WB
AAAGATGTCAGCTTAG-1-0,3,0,male,Whole Blood,10x,2647,7631.0,8,Classical monocyte,Whole Blood_Classical monocyte,...,Whole Blood_Immune cell,229,Other immune cell,178,whole blood,M,bat3-WB,AAAGATGTCAGCTTAG,bat3-WB__AAAGATGTCAGCTTAG,bat3-WB
AAAGCAAAGCCCAACC-1-0,3,0,male,Whole Blood,10x,556,1048.0,5,Classical monocyte,Whole Blood_Classical monocyte,...,Whole Blood_Immune cell,229,Other immune cell,178,whole blood,M,bat3-WB,AAAGCAAAGCCCAACC,bat3-WB__AAAGCAAAGCCCAACC,bat3-WB
AAAGCAATCAACACCA-1-0,3,0,male,Whole Blood,10x,661,1256.0,0,T cell,Whole Blood_T cell,...,Whole Blood_Immune cell,354,T cell,181,whole blood,M,bat3-WB,AAAGCAATCAACACCA,bat3-WB__AAAGCAATCAACACCA,bat3-WB


In [19]:
# check where folders are null
null_columns = adata_joined_bat_bam[adata_joined_bat_bam.Folder.isnull()]
null_columns.groupby(by=["animalID", "tissue"], observed=True).size()


animalID  tissue              
2         Brown Adipose Tissue    3134
3         Brown Adipose Tissue    5366
          White Adipose Tissue    2127
dtype: int64

### make new adata columns

In [20]:
# make channel barcode ex: bat3-WB__AAAAAAAATTTTT in adata.obs
adata_joined_bat_bam["bam_channel_barcode"] = adata_joined_bat_bam.Folder + "__" + adata_joined_bat_bam.index
print(adata_joined_bat_bam.shape)
adata_joined_bat_bam.head()


(82924, 22)


Unnamed: 0_level_0,animalID,batch,sex,tissue,method,n_genes,n_counts,louvain,clustertype,cluster,...,clustercellnumber,newcelltype,clustertonumber,tissue_lower,sex_abbrev,channel,cell_barcode,cell_id,Folder,bam_channel_barcode
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACCTGAGAGGGCTT-1-0,3,0,male,Whole Blood,10x,2370,6673.0,0,T cell,Whole Blood_T cell,...,354,T cell,181,whole blood,M,bat3-WB,AAACCTGAGAGGGCTT,bat3-WB__AAACCTGAGAGGGCTT,bat3-WB,bat3-WB__AAACCTGAGAGGGCTT-1-0
AAACGGGAGACGCAAC-1-0,3,0,male,Whole Blood,10x,1196,2132.0,12,Non-classical monocyte,Whole Blood_Non-classical monocyte,...,488,Other immune cell,180,whole blood,M,bat3-WB,AAACGGGAGACGCAAC,bat3-WB__AAACGGGAGACGCAAC,bat3-WB,bat3-WB__AAACGGGAGACGCAAC-1-0
AAAGATGTCAGCTTAG-1-0,3,0,male,Whole Blood,10x,2647,7631.0,8,Classical monocyte,Whole Blood_Classical monocyte,...,229,Other immune cell,178,whole blood,M,bat3-WB,AAAGATGTCAGCTTAG,bat3-WB__AAAGATGTCAGCTTAG,bat3-WB,bat3-WB__AAAGATGTCAGCTTAG-1-0
AAAGCAAAGCCCAACC-1-0,3,0,male,Whole Blood,10x,556,1048.0,5,Classical monocyte,Whole Blood_Classical monocyte,...,229,Other immune cell,178,whole blood,M,bat3-WB,AAAGCAAAGCCCAACC,bat3-WB__AAAGCAAAGCCCAACC,bat3-WB,bat3-WB__AAAGCAAAGCCCAACC-1-0
AAAGCAATCAACACCA-1-0,3,0,male,Whole Blood,10x,661,1256.0,0,T cell,Whole Blood_T cell,...,354,T cell,181,whole blood,M,bat3-WB,AAAGCAATCAACACCA,bat3-WB__AAAGCAATCAACACCA,bat3-WB,bat3-WB__AAAGCAATCAACACCA-1-0


In [21]:
bat_annotate = pd.read_csv("/home/olga/data_lg/data_sm_copy/batlas/bat_annotate.csv", index_col=0)
print(bat_annotate.shape)
bat_annotate.head()

(82924, 15)


Unnamed: 0_level_0,animalID,batch,sex,tissue,method,n_genes,n_counts,louvain,clustertype,cluster,celltype,celltype_tiss,clustercellnumber,newcelltype,clustertonumber
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
AAACCTGAGAGGGCTT-1-0,3,0,male,Whole Blood,10x,2370,6673.0,0,T cell,Whole Blood_T cell,Immune cell,Whole Blood_Immune cell,354,T cell,181
AAACGGGAGACGCAAC-1-0,3,0,male,Whole Blood,10x,1196,2132.0,12,Non-classical monocyte,Whole Blood_Non-classical monocyte,Immune cell,Whole Blood_Immune cell,488,Other immune cell,180
AAAGATGTCAGCTTAG-1-0,3,0,male,Whole Blood,10x,2647,7631.0,8,Classical monocyte,Whole Blood_Classical monocyte,Immune cell,Whole Blood_Immune cell,229,Other immune cell,178
AAAGCAAAGCCCAACC-1-0,3,0,male,Whole Blood,10x,556,1048.0,5,Classical monocyte,Whole Blood_Classical monocyte,Immune cell,Whole Blood_Immune cell,229,Other immune cell,178
AAAGCAATCAACACCA-1-0,3,0,male,Whole Blood,10x,661,1256.0,0,T cell,Whole Blood_T cell,Immune cell,Whole Blood_Immune cell,354,T cell,181


In [22]:
adata_joined_bat_bam

Unnamed: 0_level_0,animalID,batch,sex,tissue,method,n_genes,n_counts,louvain,clustertype,cluster,...,clustercellnumber,newcelltype,clustertonumber,tissue_lower,sex_abbrev,channel,cell_barcode,cell_id,Folder,bam_channel_barcode
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAACCTGAGAGGGCTT-1-0,3,0,male,Whole Blood,10x,2370,6673.0,0,T cell,Whole Blood_T cell,...,354,T cell,181,whole blood,M,bat3-WB,AAACCTGAGAGGGCTT,bat3-WB__AAACCTGAGAGGGCTT,bat3-WB,bat3-WB__AAACCTGAGAGGGCTT-1-0
AAACGGGAGACGCAAC-1-0,3,0,male,Whole Blood,10x,1196,2132.0,12,Non-classical monocyte,Whole Blood_Non-classical monocyte,...,488,Other immune cell,180,whole blood,M,bat3-WB,AAACGGGAGACGCAAC,bat3-WB__AAACGGGAGACGCAAC,bat3-WB,bat3-WB__AAACGGGAGACGCAAC-1-0
AAAGATGTCAGCTTAG-1-0,3,0,male,Whole Blood,10x,2647,7631.0,8,Classical monocyte,Whole Blood_Classical monocyte,...,229,Other immune cell,178,whole blood,M,bat3-WB,AAAGATGTCAGCTTAG,bat3-WB__AAAGATGTCAGCTTAG,bat3-WB,bat3-WB__AAAGATGTCAGCTTAG-1-0
AAAGCAAAGCCCAACC-1-0,3,0,male,Whole Blood,10x,556,1048.0,5,Classical monocyte,Whole Blood_Classical monocyte,...,229,Other immune cell,178,whole blood,M,bat3-WB,AAAGCAAAGCCCAACC,bat3-WB__AAAGCAAAGCCCAACC,bat3-WB,bat3-WB__AAAGCAAAGCCCAACC-1-0
AAAGCAATCAACACCA-1-0,3,0,male,Whole Blood,10x,661,1256.0,0,T cell,Whole Blood_T cell,...,354,T cell,181,whole blood,M,bat3-WB,AAAGCAATCAACACCA,bat3-WB__AAAGCAATCAACACCA,bat3-WB,bat3-WB__AAAGCAATCAACACCA-1-0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGCGCAGGAGTTTA-1-33,3,33,male,Testis,10x,929,1765.0,22,Myofibroblast,Testis_Myofibroblast,...,176,Mesenchymal cell,133,testis,M,bat3-TS,TTTGCGCAGGAGTTTA,bat3-TS__TTTGCGCAGGAGTTTA,bat3-TS,bat3-TS__TTTGCGCAGGAGTTTA-1-33
TTTGCGCTCCCAAGTA-1-33,3,33,male,Testis,10x,1387,2459.0,3,Leydig cell,Testis_Leydig cell,...,748,Mesenchymal cell,131,testis,M,bat3-TS,TTTGCGCTCCCAAGTA,bat3-TS__TTTGCGCTCCCAAGTA,bat3-TS,bat3-TS__TTTGCGCTCCCAAGTA-1-33
TTTGGTTGTTAAGATG-1-33,3,33,male,Testis,10x,1505,2660.0,3,Leydig cell,Testis_Leydig cell,...,748,Mesenchymal cell,131,testis,M,bat3-TS,TTTGGTTGTTAAGATG,bat3-TS__TTTGGTTGTTAAGATG,bat3-TS,bat3-TS__TTTGGTTGTTAAGATG-1-33
TTTGTCAGTGCAGTAG-1-33,3,33,male,Testis,10x,2340,6230.0,3,Leydig cell,Testis_Leydig cell,...,748,Mesenchymal cell,131,testis,M,bat3-TS,TTTGTCAGTGCAGTAG,bat3-TS__TTTGTCAGTGCAGTAG,bat3-TS,bat3-TS__TTTGTCAGTGCAGTAG-1-33


In [23]:
adata_joined_bat_bam.obs.join(
    bat_annotate,
)

ValueError: columns overlap but no suffix specified: Index(['animalID', 'batch', 'sex', 'tissue', 'method', 'n_genes', 'n_counts',
       'louvain', 'clustertype', 'cluster', 'celltype', 'celltype_tiss',
       'clustercellnumber', 'newcelltype', 'clustertonumber'],
      dtype='object')

In [None]:
adata_joined_bat_bam

In [113]:
# read in mouse human, lemur adata object and see what is similar between bat.obs.cluster_type and cell_ontology_class
adata_mhl = sc.read(
    "/home/olga/data_lg/data_sm_copy/tabula-microcebus/data-objects/cross-species/concatenated__human-lung--lemur-lung--mouse-lung__10x__one2one_orthologs__unified_compartments__bbknn.h5ad"
)



This is where adjacency matrices should go now.

This is where adjacency matrices should go now.


In [114]:
print(adata_mhl)


AnnData object with n_obs × n_vars = 87909 × 13350
    obs: 'age', 'cell_barcode', 'cell_ontology_class', 'cell_ontology_id', 'channel', 'free_annotation', 'individual', 'mouse.id', 'patient', 'possibly_contaminated_barcode', 'sample', 'sequencing_run', 'sex', 'species', 'species_batch', 'species_latin', 'tissue', 'narrow_group', 'broad_group', 'compartment_group', 'compartment_narrow', 'compartment_broad', 'compartment_broad_narrow', 'compartment_species', 'compartment_narrow_species', 'n_counts', 'log_counts', 'sqrt_counts', 'n_genes'
    var: 'mouse_lemur__gene_name', 'mouse__gene_name', 'gene_ids-lemur', 'n_cells-mouse', 'n_cells', 'highly_variable', 'means', 'dispersions', 'dispersions_norm'
    uns: 'age_colors', 'broad_group_colors', 'cell_ontology_class_colors', 'compartment_group_colors', 'individual_colors', 'narrow_group_colors', 'neighbors', 'pca', 'sex_colors', 'species_batch_colors', 'species_colors', 'umap'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'distances

In [116]:
set(adata_mhl.obs.cell_ontology_class)

{'B cell',
 'CD4-positive, alpha-beta T cell',
 'CD8-positive, alpha-beta T cell',
 'NK cell',
 'T cell',
 'adventitial cell',
 'alveolar macrophage',
 'bronchial smooth muscle cell',
 'capillary endothelial cell',
 'ciliated columnar cell of tracheobronchial tree',
 'classical monocyte',
 'conventional dendritic cell',
 'endothelial cell of artery',
 'endothelial cell of lymphatic vessel',
 'fibroblast',
 'fibroblast of lung',
 'immature NK T cell',
 'intermediate monocyte',
 'lung ciliated cell',
 'lung endothelial cell',
 'lymphocyte',
 'macrophage',
 'mature NK T cell',
 'monocyte',
 'myeloid dendritic cell',
 'myofibroblast cell',
 'naive thymus-derived CD4-positive, alpha-beta T cell',
 'naive thymus-derived CD8-positive, alpha-beta T cell',
 'natural killer cell',
 'non-classical monocyte',
 'pericyte cell',
 'plasma cell',
 'plasmacytoid dendritic cell',
 'pulmonary interstitial fibroblast',
 'smooth muscle cell of the pulmonary artery',
 'stromal cell',
 'type II pneumocyte',
