In [1]:
!conda env list


# conda environments:
#
base                   /opt/conda
scanpy_v1.10.4_r       /opt/conda/envs/scanpy_v1.10.4_r
velocyto               /opt/conda/envs/velocyto



In [2]:
import sys
print(sys.executable)

/opt/conda/envs/scanpy_v1.10.4_r/bin/python


In [7]:
import os
print("Working directory:", os.getcwd())
print("File exists:", os.path.exists("alzheimers-cns-human-brain-10XV2.loom"))

Working directory: /home/rstudio
File exists: True


In [1]:
import loompy
with loompy.connect("alzheimers-cns-human-brain-10XV2.loom", mode="r", validate=False) as ds:
    print("Matrix shape:", ds.shape)
    print("Row attributes:", list(ds.ra.keys())[:10])
    print("Column attributes:", list(ds.ca.keys())[:10])

Matrix shape: (58347, 12310)
Row attributes: ['Gene', 'antisense_reads', 'duplicate_reads', 'ensembl_ids', 'fragments_per_molecule', 'fragments_with_single_read_evidence', 'gene_names', 'genomic_read_quality_mean', 'genomic_read_quality_variance', 'genomic_reads_fraction_bases_quality_above_30_mean']
Column attributes: ['CellID', 'antisense_reads', 'cell_barcode_fraction_bases_above_30_mean', 'cell_barcode_fraction_bases_above_30_variance', 'cell_names', 'duplicate_reads', 'emptydrops_FDR', 'emptydrops_IsCell', 'emptydrops_Limited', 'emptydrops_LogProb']


In [2]:
# how to get expression data?

In [3]:
import loompy
import numpy as np
import pandas as pd

path = "alzheimers-cns-human-brain-10XV2.loom"

with loompy.connect(path, mode="r", validate=False) as ds:
    print(f"Matrix shape: {ds.shape[0]} genes × {ds.shape[1]} cells")

    # row (gene) and column (cell) names
    genes = ds.ra["ensembl_ids"] if "ensembl_ids" in ds.ra else ds.ra["Gene"]
    cells = ds.ca["CellID"] if "CellID" in ds.ca else np.arange(ds.shape[1]).astype(str)

    # preview first 5 genes × 5 cells
    mat_preview = ds[:5, :5]
    df_preview = pd.DataFrame(mat_preview, index=genes[:5], columns=cells[:5])

print(df_preview)

Matrix shape: 58347 genes × 12310 cells
                   GGAACTTAGGTGCTAG  CGGACTGCAATCCGAT  CCTTCGACATGAAGTA  \
ENSG00000223972.5                 0                 0                 0   
ENSG00000227232.5                 0                 0                 0   
ENSG00000278267.1                 0                 0                 0   
ENSG00000243485.5                 0                 0                 0   
ENSG00000284332.1                 0                 0                 0   

                   ACGGGCTGTGCCTTGG  TTGCCGTGTATGCTTG  
ENSG00000223972.5                 0                 0  
ENSG00000227232.5                 0                 0  
ENSG00000278267.1                 0                 0  
ENSG00000243485.5                 0                 0  
ENSG00000284332.1                 0                 0  


In [5]:
with loompy.connect(path, "r", validate=False) as ds:
    nz = 0
    total = 0
    chunk = 2000
    for i in range(0, ds.shape[0], chunk):
        block = ds[i:i+chunk, :]
        nz += np.count_nonzero(block)
        total += block.size
    print("Non-zero fraction:", nz / total)

Non-zero fraction: 0.014816521737641312


In [7]:
import loompy
import numpy as np

with loompy.connect("alzheimers-cns-human-brain-10XV2.loom", "r", validate=False) as ds:
    vals = ds[:, :100]  # first 100 cells
    print("Value dtype:", vals.dtype)
    print("Unique small sample:", np.unique(vals[:50, :3]))
    print("Max value:", vals.max())

Value dtype: uint32
Unique small sample: [0]
Max value: 395


In [3]:
# normalize. BUT kernel dies on loading with scanpy. Why tho?