In [1]:
import loompy
import os
import glob

In [2]:
# Connect to the T2 loom file
t2_loom_path = "data/103.self_workflow/T2/velocyto/T2.loom"
with loompy.connect(t2_loom_path) as ds:
    # Print basic info
    print(f"Dataset shape: {ds.shape}")
    print(f"Number of cells: {ds.shape[1]}")
    
    # Get the cell IDs
    cell_ids = ds.ca.CellID
    
    # Print the first few cell IDs
    print("\nFirst 5 cell IDs:")
    for i in range(min(5, len(cell_ids))):
        print(cell_ids[i])
    
    # Check if there are any column attributes
    print("\nColumn attributes:")
    for attr in ds.ca:
        print(f"- {attr}")

Dataset shape: (33836, 5306)
Number of cells: 5306

First 5 cell IDs:
anno_decon_sorted_QY5T0:CELL10159_N2x
anno_decon_sorted_QY5T0:CELL10364_N2x
anno_decon_sorted_QY5T0:CELL10034_N2x
anno_decon_sorted_QY5T0:CELL10025_N2x
anno_decon_sorted_QY5T0:CELL10441_N2x

Column attributes:
- CellID


In [3]:
import shutil
import numpy as np

# Define the base directory where loom files are located
base_dir = "data/103.self_workflow"
samples = ["N1", "N2", "N4", "N7", "T1", "T2", "T4", "T7"]
# Process each sample
for sample in samples:
    # Construct the path to the loom file
    loom_path = f"{base_dir}/{sample}/velocyto/{sample}.loom"
    
    # Create the copy path
    copy_path = f"{base_dir}/{sample}/velocyto/{sample}_modified.loom"
    
    # First make a copy of the original loom file
    shutil.copy2(loom_path, copy_path)
    
    # Open the copy and modify cell names
    with loompy.connect(copy_path, 'r+') as ds:
        # Get the cell IDs
        cell_ids = ds.ca.CellID
        
        # Create new cell IDs by replacing the part before : with sample name
        # and removing the ending x
        new_cell_ids = np.array([
            f"{sample}_{cell_id.split(':')[1][:-1]}" 
            for cell_id in cell_ids
        ])
        
        # Update the cell IDs in the loom file
        ds.ca.CellID = new_cell_ids
        
    print(f"Processed {sample}: Created modified loom file at {copy_path}")
    print(f"Example cell ID transformation: {cell_ids[0]} → {new_cell_ids[0]}")

Processed N1: Created modified loom file at data/103.self_workflow/N1/velocyto/N1_modified.loom
Example cell ID transformation: anno_decon_sorted_72QA9:CELL10073_N1x → N1_CELL10073_N1
Processed N2: Created modified loom file at data/103.self_workflow/N2/velocyto/N2_modified.loom
Example cell ID transformation: anno_decon_sorted_7UXCK:CELL12108_N1x → N2_CELL12108_N1
Processed N4: Created modified loom file at data/103.self_workflow/N4/velocyto/N4_modified.loom
Example cell ID transformation: anno_decon_sorted_DITAM:CELL114_N2x → N4_CELL114_N2
Processed N7: Created modified loom file at data/103.self_workflow/N7/velocyto/N7_modified.loom
Example cell ID transformation: anno_decon_sorted_84NH2:CELL10166_N2x → N7_CELL10166_N2
Processed T1: Created modified loom file at data/103.self_workflow/T1/velocyto/T1_modified.loom
Example cell ID transformation: anno_decon_sorted_LBLVB:CELL10053_N1x → T1_CELL10053_N1
Processed T2: Created modified loom file at data/103.self_workflow/T2/velocyto/T2_mo

In [2]:
base_dir = "data/103.self_workflow"
samples = ["N1", "N2", "N4", "N7", "T1", "T2", "T4", "T7"]
# Define output path for the combined loom file
combined_loom_path = f"{base_dir}/velocyto_combined.loom"

# List all modified loom files
loom_files = [f"{base_dir}/{sample}/velocyto/{sample}_modified.loom" for sample in samples]

# Verify that all files exist
existing_files = [f for f in loom_files if os.path.exists(f)]
print(f"Found {len(existing_files)} of {len(loom_files)} expected loom files")

if len(existing_files) == len(loom_files):
    # Combine all the loom files
    loompy.combine(existing_files, combined_loom_path)
    print(f"Successfully created combined loom file at: {combined_loom_path}")
    
    # Display basic information about the combined file
    with loompy.connect(combined_loom_path) as ds:
        print(f"\nCombined dataset shape: {ds.shape}")
        print(f"Number of cells: {ds.shape[1]}")
        print(f"Number of genes: {ds.shape[0]}")
        
        # Print sample distribution
        sample_counts = {}
        for cell_id in ds.ca.CellID:
            sample_name = cell_id.split('_')[0]
            sample_counts[sample_name] = sample_counts.get(sample_name, 0) + 1
            
        print("\nCells per sample:")
        for sample, count in sample_counts.items():
            print(f"- {sample}: {count} cells")
else:
    print("Warning: Some loom files were not found. Check paths and try again.")

Found 8 of 8 expected loom files




Successfully created combined loom file at: data/103.self_workflow/velocyto_combined.loom

Combined dataset shape: (33836, 80603)
Number of cells: 80603
Number of genes: 33836

Cells per sample:
- N1: 26102 cells
- N2: 9354 cells
- N4: 9656 cells
- N7: 9254 cells
- T1: 3306 cells
- T2: 5306 cells
- T4: 13207 cells
- T7: 4418 cells
