In [7]:
import os
import loompy
import pandas as pd
import numpy as np

import warnings
warnings.simplefilter("ignore", category=UserWarning)

In [2]:
# Directories
loom_dir = "/projectnb/farmer/akhila/Rabhi_Lab/scSPLiT-seq/final_results/copy_final_combined_looms"
results_dir = "/projectnb/farmer/akhila/Rabhi_Lab/scSPLiT-seq/final_results" 

In [3]:
# List of Loom file names
loom_files = [
  "all_combined.loom",
  "GFP_combined.loom",
  "mCherry_combined.loom",
  "HFD_combined.loom",
  "cold_combined.loom"
]

In [23]:
# Define metadata for each loom file
metadata = {
    "1D-GFP_G2SWV": {"sample": "1D-GFP", "color": "GFP", "day": "1D", "diet": "Normal Diet", "condition": "Cold Exposure"},
    "1D-mCherry_C3OSE": {"sample": "1D-mCherry", "color": "mCherry", "day": "1D", "diet": "Normal Diet", "condition": "Cold Exposure"},
    "1W-HFD_EW10E": {"sample": "1W-HFD.GFP", "color": "GFP", "day": "1W", "diet": "High Fat Diet", "condition": "High Fat Diet"},
    "1W-HFD_YSCZK": {"sample": "1W-HFD.mCherry", "color": "mCherry", "day": "1W", "diet": "High Fat Diet", "condition": "High Fat Diet"},
    "2D-GFP_UHS4J": {"sample": "2D-GFP", "color": "GFP", "day": "2D", "diet": "Normal Diet", "condition": "Cold Exposure"},
    "2D-mCherry_VV09P": {"sample": "2D-mCherry", "color": "mCherry", "day": "2D", "diet": "Normal Diet", "condition": "Cold Exposure"},
    "2W-HFD_5QJWR": {"sample": "2W-HFD.GFP", "color": "GFP", "day": "2W", "diet": "High Fat Diet", "condition": "High Fat Diet"},
    "2W-HFD_ATBIY": {"sample": "2W-HFD.mCherry", "color": "mCherry", "day": "2W", "diet": "High Fat Diet", "condition": "High Fat Diet"},
    "3D-GFP_V2HG9": {"sample": "3D-GFP", "color": "GFP", "day": "3D", "diet": "Normal Diet", "condition": "Cold Exposure"},
    "3D-mCherry_I9YDX": {"sample": "3D-mCherry", "color": "mCherry", "day": "3D", "diet": "Normal Diet", "condition": "Cold Exposure"},
    "7D-GFP_Q137W": {"sample": "7D-GFP", "color": "GFP", "day": "7D", "diet": "Normal Diet", "condition": "Cold Exposure"},
    "7D-mCherry_XM91Y": {"sample": "7D-mCherry", "color": "mCherry", "day": "7D", "diet": "Normal Diet", "condition": "Cold Exposure"},
    "RT-GFP_55WEM": {"sample": "RT-GFP", "color": "GFP", "day": "RT", "diet": "Normal Diet", "condition": "Room Temperature"},
    "RT-mCherry_1DV46": {"sample": "RT-mCherry", "color": "mCherry", "day": "RT", "diet": "Normal Diet", "condition": "Room Temperature"}
}

In [22]:
print("Metadata keys:", metadata.keys())


Metadata keys: dict_keys(['1D-GFP', '1D-mCherry', '1W-HFD.GFP', '1W-HFD.mCherry', '2D-GFP', '2D-mCherry', '2W-HFD.GFP', '2W-HFD.mCherry', '3D-GFP', '3D-mCherry', '7D-GFP', '7D-mCherry', 'RT-GFP', 'RT-mCherry'])


In [28]:
# Add metadata to loom file
# Function to extract sample name from CellID
def extract_sample_name(cell_id):
    return cell_id.split(':')[0]

# Iterate through each Loom file
for loom_file in loom_files:
    loom_path = os.path.join(loom_dir, loom_file)
    with loompy.connect(loom_path) as ds:
        # Get CellIDs from the Loom file
        cell_ids = ds.ca['CellID']

        # Initialize metadata columns for each cell
        samples = []
        colors = []
        days = []
        diets = []
        conditions = []

        # Iterate over CellIDs and assign metadata based on sample name
        for cell_id in cell_ids:
            sample_name = extract_sample_name(cell_id)
            if sample_name in metadata:
                samples.append(metadata[sample_name]["sample"])
                colors.append(metadata[sample_name]["color"])
                days.append(metadata[sample_name]["day"])
                diets.append(metadata[sample_name]["diet"])
                conditions.append(metadata[sample_name]["condition"])
            else:
                samples.append("Unknown")
                colors.append("Unknown")
                days.append("Unknown")
                diets.append("Unknown")
                conditions.append("Unknown")

        # Add new columns to the Loom file
        ds.ca["Sample"] = samples
        ds.ca["Color"] = colors
        ds.ca["Day"] = days
        ds.ca["Diet"] = diets
        ds.ca["Condition"] = conditions

    print(f"Metadata added to {loom_file}")

Metadata added to all_combined.loom
Metadata added to GFP_combined.loom
Metadata added to mCherry_combined.loom
Metadata added to HFD_combined.loom
Metadata added to cold_combined.loom


In [None]:
def check_metadata_in_loom(loom_file):
    with loompy.connect(loom_file) as ds:
        print(f"Metadata keys in {loom_file}: {list(ds.ca.keys())}")
        for key in ds.ca.keys():
            print(f"{key}: {ds.ca[key][:10]}")  # Display first 10 values for each metadata key


# Check metadata for each loom file
for loom_file in loom_files:
    file_path = f"{loom_dir}/{loom_file}"
    check_metadata_in_loom(file_path)

In [30]:
def add_barcodes_to_loom(loom_path):
    try:
        with loompy.connect(loom_path, 'r+') as ds:
            # Extract UMIs from CellIDs
            barcodes = [cell_id.split(':')[-1].split('x')[0] for cell_id in ds.ca['CellID']]
            # Add the Barcode column to the Loom file metadata
            ds.ca['Barcode'] = np.array(barcodes)
            print(f"Barcode information added to {loom_path}")
    except Exception as e:
        print(f"Error processing {loom_path}: {e}")

# Process each Loom file
for loom_file in loom_files:
    loom_path = os.path.join(loom_dir, loom_file)
    add_barcodes_to_loom(loom_path)

print("All Loom files updated with Barcode information.")

Barcode information added to /projectnb/farmer/akhila/Rabhi_Lab/scSPLiT-seq/final_results/copy_final_combined_looms/all_combined.loom
Barcode information added to /projectnb/farmer/akhila/Rabhi_Lab/scSPLiT-seq/final_results/copy_final_combined_looms/GFP_combined.loom
Barcode information added to /projectnb/farmer/akhila/Rabhi_Lab/scSPLiT-seq/final_results/copy_final_combined_looms/mCherry_combined.loom
Barcode information added to /projectnb/farmer/akhila/Rabhi_Lab/scSPLiT-seq/final_results/copy_final_combined_looms/HFD_combined.loom
Barcode information added to /projectnb/farmer/akhila/Rabhi_Lab/scSPLiT-seq/final_results/copy_final_combined_looms/cold_combined.loom
All Loom files updated with Barcode information.


In [31]:
def check_metadata_in_loom(loom_file):
    with loompy.connect(loom_file) as ds:
        print(f"Metadata keys in {loom_file}: {list(ds.ca.keys())}")
        for key in ds.ca.keys():
            print(f"{key}: {ds.ca[key][:10]}")  # Display first 10 values for each metadata key


# Check metadata for each loom file
for loom_file in loom_files:
    file_path = f"{loom_dir}/{loom_file}"
    check_metadata_in_loom(file_path)

Metadata keys in /projectnb/farmer/akhila/Rabhi_Lab/scSPLiT-seq/final_results/copy_final_combined_looms/all_combined.loom: ['Barcode', 'CellID', 'Color', 'Condition', 'Day', 'Diet', 'Sample']
Barcode: ['AAACTGAACATCGGCTATCAGATTACGTT' 'AAATCGTTCGGCAGACGCCGATTCTTGAA'
 'AAATCGTTCGGCAGACGCCCTAACTAGGT' 'AAATTCCTCATCGGCTATCCAGCTAGCGG'
 'AAAGCTGATATCGGCTATCTTATTGAGGC' 'AAAGCTGATGATACGGAACATTCTTAGCT'
 'AAAGCTGATCTAAGAGAAGGCGGCTGCGG' 'AAACTGAACAAGAAGCTAGTTGCGAGCAT'
 'AAACTGAACGATACGGAACGTAGCAGCTA' 'AAATCGTTCATCGGCTATCATTATGCAAG']
CellID: ['1D-GFP_G2SWV:AAACTGAACATCGGCTATCAGATTACGTTx'
 '1D-GFP_G2SWV:AAATCGTTCGGCAGACGCCGATTCTTGAAx'
 '1D-GFP_G2SWV:AAATCGTTCGGCAGACGCCCTAACTAGGTx'
 '1D-GFP_G2SWV:AAATTCCTCATCGGCTATCCAGCTAGCGGx'
 '1D-GFP_G2SWV:AAAGCTGATATCGGCTATCTTATTGAGGCx'
 '1D-GFP_G2SWV:AAAGCTGATGATACGGAACATTCTTAGCTx'
 '1D-GFP_G2SWV:AAAGCTGATCTAAGAGAAGGCGGCTGCGGx'
 '1D-GFP_G2SWV:AAACTGAACAAGAAGCTAGTTGCGAGCATx'
 '1D-GFP_G2SWV:AAACTGAACGATACGGAACGTAGCAGCTAx'
 '1D-GFP_G2SWV:AAATCGTTCATCGGCTATCATTATGCA

In [33]:
# Load the cluster info data
cluster_info = pd.read_csv(f"{results_dir}/cluster_info.csv")

# Dictionary mapping cluster numbers to cell types
annotation_map = {
  0: "APC1a",
  1: "APC2b",
  2: "PVM",
  3: "APC3",
  4: "DC",
  5: "APC4",
  6: "Activated macrophages",
  7: "cDC1",
  8: "Angiogenic Endothelial Cells",
  9: "Marix progenitor cells",
  10: "Smooth muscle cell progenitors",
  11: "T Cells",
  12: "Pericytes",
  13: "B Cells",
  14: "Proliferating immune cells",
  15: "Epithelial progenitor cells",
  16: "Proliferating APCs"
}

In [34]:
cluster_info

Unnamed: 0,CellID,Cluster
0,AAACCATAGAATCGATAATATTATGCAAG,10
1,AAACCATAGAATCGATAATATTGGCAGAT,10
2,AAACCATAGACTCAATAGGCGCTTATCGT,10
3,AAACCATAGACTCAATAGGCTCAATTAGT,6
4,AAACCATAGAGGTTAGCATCGCTTATCGT,13
...,...,...
84083,TTTGCTTAGCATTCAATCACGCGGCCATA,5
84084,TTTGCTTAGTCGCGGTTGGCCTCAGCTCC,1
84085,TTTGCTTAGTCGCGGTTGGTGGACGACTA,1
84086,TTTGCTTAGTCTGATACCAAATACGATAA,13


In [35]:
# Add the cell type annotations to the cluster info data frame
cluster_info['CellType'] = cluster_info['Cluster'].map(annotation_map)

def add_cluster_info_to_loom(loom_dir, loom_files, cluster_info):
    for loom_file in loom_files:
        loom_file_path = os.path.join(loom_dir, loom_file)
        
        with loompy.connect(loom_file_path, mode='r+') as ds:
            # Convert loom file metadata to DataFrame
            loom_df = pd.DataFrame({
                'Barcode': ds.ca['Barcode'],
                'CellID': ds.ca['CellID']
            })
            
            # Merge loom_df with cluster_info on Barcode == CellID
            merged_df = pd.merge(loom_df, cluster_info, left_on='Barcode', right_on='CellID', how='left')

            # Update the Cluster and CellType columns in the loom file
            ds.ca['Cluster'] = merged_df['Cluster'].values
            ds.ca['CellType'] = merged_df['CellType'].values

        print(f"Updated {loom_file}")


# Run the function to add cluster info to loom
add_cluster_info_to_loom(loom_dir, loom_files, cluster_info)

Updated all_combined.loom
Updated GFP_combined.loom
Updated mCherry_combined.loom
Updated HFD_combined.loom
Updated cold_combined.loom


In [36]:
def check_metadata_in_loom(loom_file):
    with loompy.connect(loom_file) as ds:
        print(f"Metadata keys in {loom_file}: {list(ds.ca.keys())}")
        for key in ds.ca.keys():
            print(f"{key}: {ds.ca[key][:10]}")  # Display first 10 values for each metadata key


# Check metadata for each loom file
for loom_file in loom_files:
    file_path = f"{loom_dir}/{loom_file}"
    check_metadata_in_loom(file_path)


Metadata keys in /projectnb/farmer/akhila/Rabhi_Lab/scSPLiT-seq/final_results/copy_final_combined_looms/all_combined.loom: ['Barcode', 'CellID', 'CellType', 'Cluster', 'Color', 'Condition', 'Day', 'Diet', 'Sample']
Barcode: ['AAACTGAACATCGGCTATCAGATTACGTT' 'AAATCGTTCGGCAGACGCCGATTCTTGAA'
 'AAATCGTTCGGCAGACGCCCTAACTAGGT' 'AAATTCCTCATCGGCTATCCAGCTAGCGG'
 'AAAGCTGATATCGGCTATCTTATTGAGGC' 'AAAGCTGATGATACGGAACATTCTTAGCT'
 'AAAGCTGATCTAAGAGAAGGCGGCTGCGG' 'AAACTGAACAAGAAGCTAGTTGCGAGCAT'
 'AAACTGAACGATACGGAACGTAGCAGCTA' 'AAATCGTTCATCGGCTATCATTATGCAAG']
CellID: ['1D-GFP_G2SWV:AAACTGAACATCGGCTATCAGATTACGTTx'
 '1D-GFP_G2SWV:AAATCGTTCGGCAGACGCCGATTCTTGAAx'
 '1D-GFP_G2SWV:AAATCGTTCGGCAGACGCCCTAACTAGGTx'
 '1D-GFP_G2SWV:AAATTCCTCATCGGCTATCCAGCTAGCGGx'
 '1D-GFP_G2SWV:AAAGCTGATATCGGCTATCTTATTGAGGCx'
 '1D-GFP_G2SWV:AAAGCTGATGATACGGAACATTCTTAGCTx'
 '1D-GFP_G2SWV:AAAGCTGATCTAAGAGAAGGCGGCTGCGGx'
 '1D-GFP_G2SWV:AAACTGAACAAGAAGCTAGTTGCGAGCATx'
 '1D-GFP_G2SWV:AAACTGAACGATACGGAACGTAGCAGCTAx'
 '1D-GFP_G2SWV:AAAT

In [38]:
# Inspect the structure to understand whether CellID is a row or column attribute
def inspect_loom_file(file_path):
    with loompy.connect(file_path) as ds:
        print(f"Metadata keys in {file_path}:")
        print(f"Row attributes: {list(ds.row_attrs.keys())}")
        print(f"Column attributes: {list(ds.col_attrs.keys())}")

# Inspect the first loom file to understand the structure
# Check metadata for each loom file
for loom_file in loom_files:
    file_path = f"{loom_dir}/{loom_file}"
    inspect_loom_file(file_path)


Metadata keys in /projectnb/farmer/akhila/Rabhi_Lab/scSPLiT-seq/final_results/copy_final_combined_looms/all_combined.loom:
Row attributes: ['Accession', 'Chromosome', 'End', 'Gene', 'Start', 'Strand']
Column attributes: ['Barcode', 'CellID', 'CellType', 'Cluster', 'Color', 'Condition', 'Day', 'Diet', 'Sample']
Metadata keys in /projectnb/farmer/akhila/Rabhi_Lab/scSPLiT-seq/final_results/copy_final_combined_looms/GFP_combined.loom:
Row attributes: ['Accession', 'Chromosome', 'End', 'Gene', 'Start', 'Strand']
Column attributes: ['Barcode', 'CellID', 'CellType', 'Cluster', 'Color', 'Condition', 'Day', 'Diet', 'Sample']
Metadata keys in /projectnb/farmer/akhila/Rabhi_Lab/scSPLiT-seq/final_results/copy_final_combined_looms/mCherry_combined.loom:
Row attributes: ['Accession', 'Chromosome', 'End', 'Gene', 'Start', 'Strand']
Column attributes: ['Barcode', 'CellID', 'CellType', 'Cluster', 'Color', 'Condition', 'Day', 'Diet', 'Sample']
Metadata keys in /projectnb/farmer/akhila/Rabhi_Lab/scSPLiT-