# write looms from ctos

In [30]:
import loompy as lp
import glob
import os
import pickle
import matplotlib.pyplot as plt

%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [31]:
cto_path_dict = {
    x.split("/")[-1].split("__")[0]: x
    for x in sorted(glob.glob("cistopic_objects/*topics.pkl"))
}
cto_path_dict

{'master_sub_1.FIXEDCELLS': 'cistopic_objects/master_sub_1.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_20topics.pkl'}

In [32]:
loom_out = "cell_region_looms"
if not os.path.exists(os.path.join(loom_out)):
    os.makedirs(os.path.join(loom_out))

In [33]:
cto_path_sub_dict = {}
for sample in cto_path_dict.keys():
    cto_path = cto_path_dict[sample]
    loom_path = os.path.join(loom_out, cto_path.split("/")[-1].replace(".pkl", ".loom"))
    print(f"Checking if {loom_path} exist...")
    if os.path.exists(loom_path):
        print(f"\t{loom_path} exists! Skipping...")
    else:
        print(f"\t{loom_path} does not exist, adding to subdict to generate")
        cto_path_sub_dict[sample] = cto_path_dict[sample]

Checking if cell_region_looms/master_sub_1.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_20topics.loom exist...
	cell_region_looms/master_sub_1.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_20topics.loom exists! Skipping...


In [34]:
for sample in cto_path_sub_dict.keys():
    cto_path = cto_path_sub_dict[sample]
    with open(cto_path, "rb") as f:
        cto = pickle.load(f)

    print(f"Loaded filtered cistopic object {sample}")
    loom_path = os.path.join(loom_out, cto_path.split("/")[-1].replace(".pkl", ".loom"))
    lp.create(
        filename=loom_path,
        layers=cto.fragment_matrix,
        row_attrs={"Gene": cto.region_names},
        col_attrs={
            "CellID": [x.replace("___", "__").split(".")[0] for x in cto.cell_names]
        },
    )
    print(f"Finished {loom_path} loom writing")

# write seurat scripts

In [35]:
frags_path_dict = {
    "master_sub_1.FIXEDCELLS": "../1_data_repository/fixedcells_merged/merged_all_1.fragments.ID.sorted.tsv.gz"
}

In [36]:
scrub_name_suffix = "0-4"
loom_path_dict = {
    x.split("/")[-1].split(f"__")[0]: x
    for x in sorted(glob.glob(f"cell_region_looms/*.loom"))
}
loom_path_dict

{'master_sub_1.FIXEDCELLS': 'cell_region_looms/master_sub_1.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_20topics.loom'}

In [37]:
!cat ../0_resources/scripts/seurat_label_transfer_consensus.R

#!/usr/bin/env Rscript
library(Seurat)
library(SeuratDisk)
library(Signac)
# library(EnsDb.Hsapiens.v86)
library(ggplot2)
library(stringr)

args = commandArgs(trailingOnly=TRUE)
sample_id = args[1]
f_loom = args[2]
f_frag = args[3]
f_reference = args[4]
f_annotation = args[5]
f_out = args[6]

print(paste0("Processing sample ", args[1]))

# load pbmc object
# pbmc.rna <- readRDS("../0_resources/seurat_references/pbmc_integrated.RDS")
#pbmc.rna <- readRDS('/lustre1/project/stg_00090/scatac_benchmark/0_resources/seurat_references/pbmc_ssc_mat__integrated.rds')

################################################################################
# ATAC
################################################################################

### get data from loom:
atacloomcon <- Connect(filename = f_loom, mode = "r")
atacloomcon
atac_tmp <- as.Seurat(atacloomcon, assay='ATAC')
atacloomcon$close_all()

# subset by removing contig chromosomes
rawregions = rownames(GetAssayData(atac_tmp, slot = "counts",

In [38]:
parallel_filename = "seurat_label_transfer.parallel"
script_path = "../0_resources/scripts/seurat_label_transfer_consensus.R"
img_path = "../0_resources/vsn_cache/cflerin-seurat-4.0.3-plus.sif"
reference_path = "../0_resources/seurat_references/pbmc_ref.rds"
annotation_path = "../0_resources/seurat_references/granges_annotation.rds"
# bind_mounts = "/dodrio,/readonly/dodrio,/tmp"
bind_mounts = "/lustre1,/staging,${VSC_SCRATCH}/tmp:/tmp"
bind_mounts = "/lustre1,/staging,${VSC_SCRATCH}/tmp:/tmp"

with open(parallel_filename, "w") as f:
    for sample in loom_path_dict.keys():
        outfile = f"cell_type_classification/{sample}__cell_type_seurat.txt"
        if not os.path.exists(outfile):
            loomfile = loom_path_dict[sample]
            fragfile = frags_path_dict[sample]
            command = f"Rscript {script_path} {sample} {loomfile} {fragfile} {reference_path} {annotation_path} {outfile}"
            f.write(f"{command}\n")
            print(command)

        else:
            f.write(f"#{outfile} already exists!\n")
            print(f"#{outfile} already exists!")

Rscript ../0_resources/scripts/seurat_label_transfer_consensus.R master_sub_1.FIXEDCELLS cell_region_looms/master_sub_1.FIXEDCELLS__cto.scrublet0-4.fmx.singlets.model_20topics.loom ../1_data_repository/fixedcells_merged/merged_all_1.fragments.ID.sorted.tsv.gz ../0_resources/seurat_references/pbmc_ref.rds ../0_resources/seurat_references/granges_annotation.rds cell_type_classification/master_sub_1.FIXEDCELLS__cell_type_seurat.txt
