In [1]:
import pandas as pd
import scanpy as sc
import numpy as np
from scipy import sparse
import loompy as lp
from loomxpy._loomx import LoomX
from loomxpy._io._read import read_loom

# Read (LoomX)

In [2]:
file_path = '../data/10x_rna.loom'

In [3]:
lx = read_loom(
    file_path=file_path,
    force_conversion={"annotations": True}
)

Adding data matrix...
INFO: adding new rna mode
Adding annotations...


The key 'leiden_res0.9' won't be accessible using the dot notation (containing special characters other than '_')
Converting leiden_res0.9 annotation to categorical type...
The key 'leiden_res0.3' won't be accessible using the dot notation (containing special characters other than '_')
Converting leiden_res0.3 annotation to categorical type...
The key 'leiden_res1.2' won't be accessible using the dot notation (containing special characters other than '_')
Converting leiden_res1.2 annotation to categorical type...
The key 'leiden_res0.6' won't be accessible using the dot notation (containing special characters other than '_')
Converting leiden_res0.6 annotation to categorical type...
Converting cell_type annotation to categorical type...
Converting sample_id annotation to categorical type...


Adding metrics...
Adding embeddings...


The key 'HVG UMAP' won't be accessible using the dot notation (containing special characters other than '_')
The key 'HVG t-SNE' won't be accessible using the dot notation (containing special characters other than '_')
The key 'HVG PC1/PC2' won't be accessible using the dot notation (containing special characters other than '_')
The key 'SCENIC AUC UMAP' won't be accessible using the dot notation (containing special characters other than '_')
The key 'SCENIC AUC t-SNE' won't be accessible using the dot notation (containing special characters other than '_')


Adding clusterings...


The key 'Leiden resolution 0.9' won't be accessible using the dot notation (containing special characters other than '_')
The key 'Leiden resolution 0.3' won't be accessible using the dot notation (containing special characters other than '_')
The key 'Leiden resolution 1.2' won't be accessible using the dot notation (containing special characters other than '_')
The key 'Leiden resolution 0.6' won't be accessible using the dot notation (containing special characters other than '_')


Adding global attributes...


# Explore

In [4]:
for k,v in lx.modes.rna.g.keys():
    print(k)

CreationDate
Genome
LOOM_SPEC_VERSION
SCopeTreeL1
SCopeTreeL2
SCopeTreeL3


In [5]:
lx.modes.rna.g.LOOM_SPEC_VERSION

'3.0.0'

In [6]:
for k,v in lx.modes.rna.f.keys():
    print(k)

In [7]:
# Explore all available data regarding the observations (i.e.: cells)
for k,v in lx.modes.rna.o.keys():
    print(k)

leiden_res0.9
leiden_res0.3
leiden_res1.2
leiden_res0.6
cell_type
sample_id
nGene
nUMI
n_counts
n_genes
percent_mito
HVG UMAP
HVG t-SNE
HVG PC1/PC2
SCENIC AUC UMAP
SCENIC AUC t-SNE
Leiden resolution 0.9
Leiden resolution 0.3
Leiden resolution 1.2
Leiden resolution 0.6


## Annotations

In [8]:
for k,v in lx.modes.rna.o.annotations:
    print(k)

leiden_res0.9
leiden_res0.3
leiden_res1.2
leiden_res0.6
cell_type
sample_id


In [9]:
lx.modes.rna.o.annotations["cell_type"]
# Can also be accessed using:
# lx.modes.rna.o["cell_type"]

AAACAGCCATTATGCG-1-10x_multiome_brain        MOL
AAACCAACATAGACCC-1-10x_multiome_brain        MOL
AAACCGAAGATGCCTG-1-10x_multiome_brain    INH_VIP
AAACCGAAGTTAGCTA-1-10x_multiome_brain       NFOL
AAACCGCGTCTTACTA-1-10x_multiome_brain        MOL
                                          ...   
TTTGTGAAGGGTGAGT-1-10x_multiome_brain    INH_VIP
TTTGTGAAGTCAGGCC-1-10x_multiome_brain    AST_CER
TTTGTGGCATGCTTAG-1-10x_multiome_brain        MOL
TTTGTTGGTGATCAGC-1-10x_multiome_brain       NFOL
TTTGTTGGTGATTTGG-1-10x_multiome_brain    INH_SST
Name: cell_type, Length: 2607, dtype: category
Categories (14, object): ['AST', 'ASTP', 'AST_CER', 'ENDO', ..., 'MOL', 'NFOL', 'OPC', 'PURK']

## Metrics

In [10]:
for k,v in lx.modes.rna.o.metrics:
    print(k)

nGene
nUMI
n_counts
n_genes
percent_mito


In [11]:
lx.modes.rna.o.metrics["n_counts"]
# Can also be accessed using:
# lx.modes.rna.o["n_counts"]

AAACAGCCATTATGCG-1-10x_multiome_brain    10006.0
AAACCAACATAGACCC-1-10x_multiome_brain     6016.0
AAACCGAAGATGCCTG-1-10x_multiome_brain    15267.0
AAACCGAAGTTAGCTA-1-10x_multiome_brain     6996.0
AAACCGCGTCTTACTA-1-10x_multiome_brain     2059.0
                                          ...   
TTTGTGAAGGGTGAGT-1-10x_multiome_brain     1102.0
TTTGTGAAGTCAGGCC-1-10x_multiome_brain     6614.0
TTTGTGGCATGCTTAG-1-10x_multiome_brain    12456.0
TTTGTTGGTGATCAGC-1-10x_multiome_brain     7460.0
TTTGTTGGTGATTTGG-1-10x_multiome_brain     2221.0
Name: n_counts, Length: 2607, dtype: float32

## Embeddings

In [12]:
for k,v in lx.modes.rna.o.embeddings:
    print(k)

HVG UMAP
HVG t-SNE
HVG PC1/PC2
SCENIC AUC UMAP
SCENIC AUC t-SNE


In [13]:
lx.modes.rna.o.embeddings["SCENIC AUC UMAP"]
# Can also be accessed using:
# lx.modes.rna.o["SCENIC AUC UMAP"]

Unnamed: 0,_X,_Y
AAACAGCCATTATGCG-1-10x_multiome_brain,11.866692,6.071897
AAACCAACATAGACCC-1-10x_multiome_brain,11.540525,3.874464
AAACCGAAGATGCCTG-1-10x_multiome_brain,-5.994763,7.198971
AAACCGAAGTTAGCTA-1-10x_multiome_brain,12.658012,5.963271
AAACCGCGTCTTACTA-1-10x_multiome_brain,10.451440,4.170816
...,...,...
TTTGTGAAGGGTGAGT-1-10x_multiome_brain,-4.442218,5.991313
TTTGTGAAGTCAGGCC-1-10x_multiome_brain,-0.537465,-3.767715
TTTGTGGCATGCTTAG-1-10x_multiome_brain,12.732769,2.572658
TTTGTTGGTGATCAGC-1-10x_multiome_brain,11.358409,3.532379


## Clustering

In [14]:
for k,v in lx.modes.rna.o.clusterings:
    print(k)

Leiden resolution 0.9
Leiden resolution 0.3
Leiden resolution 1.2
Leiden resolution 0.6


In [15]:
# Summary of given clustering
lx.modes.rna.o.clusterings.get_attribute("Leiden resolution 0.3")



key: Leiden resolution 0.3
mode: ModeType.RNA
type: AttributeType.CLUSTERING
name: Leiden resolution 0.3
description: None
        
number of clusters: 12
        

In [16]:
# Get markers of all clusters of given clustering
lx.modes.rna.o.clusterings.get_attribute("Leiden resolution 0.3").markers

Unnamed: 0_level_0,cluster,avg_logFC,pval
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AACS,0,-1.214017,1.755964e-06
AAGAB,0,-0.269014,1.739026e-02
AAK1,0,-1.092985,1.650032e-30
AARSD1,0,-0.864859,1.543590e-02
AASDH,0,-0.709720,1.375027e-04
...,...,...,...
ZNF708,11,0.000000,0.000000e+00
ZNF98,11,0.000000,0.000000e+00
ZNRD1,11,0.000000,0.000000e+00
ZNRF3,11,0.000000,0.000000e+00


In [17]:
# Get all markers of a cluster from a clustering
for k,v in lx.modes.rna.o.clusterings.get_attribute("Leiden resolution 0.3"):
    print(f"{k}: {v.description}")

0: MOL (0)
1: NFOL (1)
2: AST_CER (2)
3: OPC (3)
4: GC (4)
5: INH_SST (5)
6: INH_VIP (6)
7: MGL (7)
8: GP (8)
9: AST+ENDO (9)
10: PURK (10)
11: INH_PVALB (11)


In [15]:
lx.modes.rna.o.clusterings.get_attribute("Leiden resolution 0.3").clusters[0].markers

Unnamed: 0_level_0,cluster,avg_logFC,pval
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AACS,0,-1.214017,1.755964e-06
AAGAB,0,-0.269014,1.739026e-02
AAK1,0,-1.092985,1.650032e-30
AARSD1,0,-0.864859,1.543590e-02
AASDH,0,-0.709720,1.375027e-04
...,...,...,...
ZRANB3,0,0.000000,0.000000e+00
ZSCAN23,0,-1.135173,1.859158e-02
ZSCAN5A,0,-0.592134,5.246708e-05
ZSWIM5,0,-0.276347,5.657378e-04
