# Generate the main reference (R):

In [None]:
# RNA-ATAC integration
################################################################################
# https://satijalab.org/seurat/articles/atacseq_integration_vignette.html

library(Seurat)
library(SeuratDisk)
library(Signac)
library(EnsDb.Hsapiens.v86)
library(ggplot2)

################################################################################
################################################################################
# reference data:
################################################################################

f_ref = '/lustre1/project/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/scRNA/cell_type_classification/seurat/reference/pbmc_ssc_mat.rds'
f_meta = '/lustre1/project/stg_00002/lcb/cflerin/analysis/pbmc_atac/analysis2/scRNA/cell_type_classification/seurat/reference/pbmc_ssc_metadata.rds'

pbmc.data <- readRDS(f_ref)
pbmc.metadata <- readRDS(f_meta)

pbmc <- CreateSeuratObject(counts = pbmc.data, meta.data = pbmc.metadata)
pbmc <- subset(pbmc, subset = nFeature_RNA > 200)

pbmc.list <- SplitObject(pbmc, split.by = "Method")
#pbmc.list$bench = qdata

for (i in names(pbmc.list)) {
    pbmc.list[[i]] = NormalizeData(pbmc.list[[i]])
    pbmc.list[[i]] = FindVariableFeatures(pbmc.list[[i]], selection.method = "vst", nfeatures = 2000, verbose = FALSE)
    pbmc.list[[i]] = ScaleData(pbmc.list[[i]])
    pbmc.list[[i]] = RunPCA(pbmc.list[[i]])
    pbmc.list[[i]] = RunUMAP(pbmc.list[[i]], dims = 1:30)
}

##################################################
### integrate reference:

reference.list <- pbmc.list[
    c("10x Chromium (v2) A", "10x Chromium (v2) B", "10x Chromium (v3)", "10x Chromium (v2)")
]
#reference.list$query = qdata
#pbmc.ref = pbmc.list$`10x Chromium (v3)`
pbmc.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)

pbmc.integrated <- IntegrateData(anchorset = pbmc.anchors, dims = 1:30)
DefaultAssay(pbmc.integrated) <- "integrated"

pbmc.integrated <- ScaleData(pbmc.integrated, verbose = FALSE)
pbmc.integrated <- RunPCA(pbmc.integrated, npcs = 30, verbose = FALSE)
pbmc.integrated <- RunUMAP(pbmc.integrated, reduction = "pca", dims = 1:30, verbose = FALSE)

##################################################

pbmc.rna = pbmc.list$`10x Chromium (v3)`

saveRDS(pbmc.rna, file='pbmc_RNA_reference_10xv3.rds')

# pbmc.rna <- NormalizeData(pbmc.rna)
# pbmc.rna <- FindVariableFeatures(pbmc.rna)
# pbmc.rna <- ScaleData(pbmc.rna)
# pbmc.rna <- RunPCA(pbmc.rna)
# pbmc.rna <- RunUMAP(pbmc.rna, dims = 1:30)

Remove megakaryocytes manually

In [None]:
# RNA-ATAC integration
################################################################################
# https://satijalab.org/seurat/articles/atacseq_integration_vignette.html

library(Seurat)
library(SeuratDisk)
library(Signac)
library(EnsDb.Hsapiens.v86)
library(ggplot2)


################################################################################
# reference data:
################################################################################

f_ref = '../0_resources/seurat_references/pbmc_ssc_mat.rds'
f_meta = '../0_resources/seurat_references/pbmc_ssc_metadata.rds'


pbmc.data <- readRDS(f_ref)
pbmc.metadata <- readRDS(f_meta)

pbmc <- CreateSeuratObject(counts = pbmc.data, meta.data = pbmc.metadata)
pbmc <- subset(pbmc, subset = nFeature_RNA > 200)

pbmc.list <- SplitObject(pbmc, split.by = "Method")
#pbmc.list$bench = qdata

for (i in names(pbmc.list)) {
    pbmc.list[[i]] = NormalizeData(pbmc.list[[i]])
    pbmc.list[[i]] = FindVariableFeatures(pbmc.list[[i]], selection.method = "vst", nfeatures = 2000, verbose = FALSE)
    pbmc.list[[i]] = ScaleData(pbmc.list[[i]])
    pbmc.list[[i]] = RunPCA(pbmc.list[[i]])
    pbmc.list[[i]] = RunUMAP(pbmc.list[[i]], dims = 1:30)
}

##################################################
### integrate reference:

reference.list <- pbmc.list[
    c("10x Chromium (v2) A", "10x Chromium (v2) B", "10x Chromium (v3)", "10x Chromium (v2)")
]
#reference.list$query = qdata
#pbmc.ref = pbmc.list$`10x Chromium (v3)`
pbmc.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)

pbmc.integrated <- IntegrateData(anchorset = pbmc.anchors, dims = 1:30)
DefaultAssay(pbmc.integrated) <- "integrated"

pbmc.integrated <- ScaleData(pbmc.integrated, verbose = FALSE)
pbmc.integrated <- RunPCA(pbmc.integrated, npcs = 30, verbose = FALSE)
pbmc.integrated <- RunUMAP(pbmc.integrated, reduction = "pca", dims = 1:30, verbose = FALSE)

##################################################

# pbmc.rna = pbmc.list$`10x Chromium (v3)`

pbmc.rna = readRDS('../0_resources/seurat_references/pbmc_ssc_mat__integrated.rds')

# pbmc.rna <- NormalizeData(pbmc.rna)
# pbmc.rna <- FindVariableFeatures(pbmc.rna)
# pbmc.rna <- ScaleData(pbmc.rna)
# pbmc.rna <- RunPCA(pbmc.rna)
# pbmc.rna <- RunUMAP(pbmc.rna, dims = 1:30)

saveRDS(pbmc, file="../0_resources/seurat_references/pbmc_ref.rds")


# ATAC analysis add gene annotation information
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)

# use chr prefix:
ucsc.levels <- str_replace(string=paste("chr",seqlevels(annotations),sep=""), pattern="chrMT", replacement="chrM")
seqlevels(annotations) <- ucsc.levels

genome(annotations) <- "hg38"
saveRDS(annotations, file="../0_resources/seurat_references/granges_annotation.rds")

# Then, invoke the following R script

In [2]:
!cat ../0_resources/scripts/seurat_label_transfer.R

#!/usr/bin/env Rscript
library(Seurat)
library(SeuratDisk)
library(Signac)
library(EnsDb.Hsapiens.v86)
library(ggplot2)
library(stringr)

args = commandArgs(trailingOnly=TRUE)
sample_id = args[1]
f_loom = args[2]
f_frag = args[3]
f_reference = args[4]
f_annotation = args[5]
f_out = args[6]

print(paste0("Processing sample ", args[1]))

# load pbmc object
# pbmc.integrated <- readRDS("../0_resources/seurat_references/pbmc_integrated.RDS")
pbmc.rna <- readRDS('../0_resources/seurat_references/pbmc_ssc_mat__integrated.rds')

################################################################################
# ATAC
################################################################################

### get data from loom:
atacloomcon <- Connect(filename = f_loom, mode = "r")
atacloomcon
atac_tmp <- as.Seurat(atacloomcon, assay='ATAC')
atacloomcon$close_all()

# correctly parse regions (default delims are '-','-')
regions = StringToGRanges(
    rownames(GetAssayData(atac_tmp, slot = "counts", ass

# Python

In [22]:
!cat /dodrio/scratch/projects/starting_2022_023/benchmark/scatac_benchmark/0_resources/scripts/seurat_label_transfer.R

#!/usr/bin/env Rscript
library(Seurat)
library(SeuratDisk)
library(Signac)
library(EnsDb.Hsapiens.v86)
library(ggplot2)
library(stringr)

args = commandArgs(trailingOnly=TRUE)
sample_id = args[1]
f_loom = args[2]
f_frag = args[3]
f_reference = args[4]
f_annotation = args[5]
f_out = args[6]

print(paste0("Processing sample ", args[1]))

# load pbmc object
# pbmc.integrated <- readRDS("../0_resources/seurat_references/pbmc_integrated.RDS")
pbmc.rna <- readRDS('../0_resources/seurat_references/pbmc_ssc_mat__integrated.rds')

################################################################################
# ATAC
################################################################################

### get data from loom:
atacloomcon <- Connect(filename = f_loom, mode = "r")
atacloomcon
atac_tmp <- as.Seurat(atacloomcon, assay='ATAC')
atacloomcon$close_all()

# correctly parse regions (default delims are '-','-')
regions = StringToGRanges(
    rownames(GetAssayData(atac_tmp, slot = "counts", ass

# Python

In [1]:
import glob
import os

In [2]:
!pwd

/dodrio/scratch/projects/starting_2022_023/benchmark/scatac_benchmark/full_2_cistopic


In [8]:
scrub_name_suffix = "0-4"
frags_path_dict  = {x.split('/')[-1].split(f'.fragments.tsv.gz')[0]:x for x in sorted(glob.glob(f'../1_data_repository/full_fragments/*.fragments.tsv.gz'))}
frags_path_dict

{'BIO_ddseq_1.FULL': '../1_data_repository/full_fragments/BIO_ddseq_1.FULL.fragments.tsv.gz',
 'BIO_ddseq_2.FULL': '../1_data_repository/full_fragments/BIO_ddseq_2.FULL.fragments.tsv.gz',
 'BIO_ddseq_3.FULL': '../1_data_repository/full_fragments/BIO_ddseq_3.FULL.fragments.tsv.gz',
 'BIO_ddseq_4.FULL': '../1_data_repository/full_fragments/BIO_ddseq_4.FULL.fragments.tsv.gz',
 'BRO_mtscatac_1.FULL': '../1_data_repository/full_fragments/BRO_mtscatac_1.FULL.fragments.tsv.gz',
 'BRO_mtscatac_2.FULL': '../1_data_repository/full_fragments/BRO_mtscatac_2.FULL.fragments.tsv.gz',
 'CNA_10xmultiome_1.FULL': '../1_data_repository/full_fragments/CNA_10xmultiome_1.FULL.fragments.tsv.gz',
 'CNA_10xmultiome_2.FULL': '../1_data_repository/full_fragments/CNA_10xmultiome_2.FULL.fragments.tsv.gz',
 'CNA_10xv11_1.FULL': '../1_data_repository/full_fragments/CNA_10xv11_1.FULL.fragments.tsv.gz',
 'CNA_10xv11_2.FULL': '../1_data_repository/full_fragments/CNA_10xv11_2.FULL.fragments.tsv.gz',
 'CNA_10xv11_3.FULL'

In [11]:
scrub_name_suffix = "0-4"
frags_path_dict  = {x.split('/')[-1].split(f'.fragments.tsv.gz')[0] :x for x in sorted(glob.glob(f'../1_data_repository/full_fragments/*.fragments.tsv.gz'))}
frags_path_dict

{'BIO_ddseq_1.FULL': '../1_data_repository/full_fragments/BIO_ddseq_1.FULL.fragments.tsv.gz',
 'BIO_ddseq_2.FULL': '../1_data_repository/full_fragments/BIO_ddseq_2.FULL.fragments.tsv.gz',
 'BIO_ddseq_3.FULL': '../1_data_repository/full_fragments/BIO_ddseq_3.FULL.fragments.tsv.gz',
 'BIO_ddseq_4.FULL': '../1_data_repository/full_fragments/BIO_ddseq_4.FULL.fragments.tsv.gz',
 'BRO_mtscatac_1.FULL': '../1_data_repository/full_fragments/BRO_mtscatac_1.FULL.fragments.tsv.gz',
 'BRO_mtscatac_2.FULL': '../1_data_repository/full_fragments/BRO_mtscatac_2.FULL.fragments.tsv.gz',
 'CNA_10xmultiome_1.FULL': '../1_data_repository/full_fragments/CNA_10xmultiome_1.FULL.fragments.tsv.gz',
 'CNA_10xmultiome_2.FULL': '../1_data_repository/full_fragments/CNA_10xmultiome_2.FULL.fragments.tsv.gz',
 'CNA_10xv11_1.FULL': '../1_data_repository/full_fragments/CNA_10xv11_1.FULL.fragments.tsv.gz',
 'CNA_10xv11_2.FULL': '../1_data_repository/full_fragments/CNA_10xv11_2.FULL.fragments.tsv.gz',
 'CNA_10xv11_3.FULL'

In [12]:
scrub_name_suffix = "0-4"
loom_path_dict  = {x.split('/')[-1].split(f'__')[0]:x for x in sorted(glob.glob(f'cell_region_looms/*singlets.loom'))}
loom_path_dict

{'BIO_ddseq_1.FULL': 'cell_region_looms/BIO_ddseq_1.FULL__cto.scrublet0-4.fmx.singlets.loom',
 'BIO_ddseq_2.FULL': 'cell_region_looms/BIO_ddseq_2.FULL__cto.scrublet0-4.fmx.singlets.loom',
 'BIO_ddseq_3.FULL': 'cell_region_looms/BIO_ddseq_3.FULL__cto.scrublet0-4.fmx.singlets.loom',
 'BIO_ddseq_4.FULL': 'cell_region_looms/BIO_ddseq_4.FULL__cto.scrublet0-4.fmx.singlets.loom',
 'BRO_mtscatac_1.FULL': 'cell_region_looms/BRO_mtscatac_1.FULL__cto.scrublet0-4.fmx.singlets.loom',
 'BRO_mtscatac_2.FULL': 'cell_region_looms/BRO_mtscatac_2.FULL__cto.scrublet0-4.fmx.singlets.loom',
 'CNA_10xmultiome_1.FULL': 'cell_region_looms/CNA_10xmultiome_1.FULL__cto.scrublet0-4.fmx.singlets.loom',
 'CNA_10xmultiome_2.FULL': 'cell_region_looms/CNA_10xmultiome_2.FULL__cto.scrublet0-4.fmx.singlets.loom',
 'CNA_10xv11_1.FULL': 'cell_region_looms/CNA_10xv11_1.FULL__cto.scrublet0-4.fmx.singlets.loom',
 'CNA_10xv11_2.FULL': 'cell_region_looms/CNA_10xv11_2.FULL__cto.scrublet0-4.fmx.singlets.loom',
 'CNA_10xv11_3.FULL'

In [13]:
!cat ../0_resources/scripts/seurat_label_transfer_consensus.R

#!/usr/bin/env Rscript
library(Seurat)
library(SeuratDisk)
library(Signac)
# library(EnsDb.Hsapiens.v86)
library(ggplot2)
library(stringr)

args = commandArgs(trailingOnly=TRUE)
sample_id = args[1]
f_loom = args[2]
f_frag = args[3]
f_reference = args[4]
f_annotation = args[5]
f_out = args[6]

print(paste0("Processing sample ", args[1]))

# load pbmc object
pbmc.rna <- readRDS("../0_resources/seurat_references/pbmc_integrated.RDS")
#pbmc.rna <- readRDS('/lustre1/project/stg_00090/scatac_benchmark/0_resources/seurat_references/pbmc_ssc_mat__integrated.rds')

################################################################################
# ATAC
################################################################################

### get data from loom:
atacloomcon <- Connect(filename = f_loom, mode = "r")
atacloomcon
atac_tmp <- as.Seurat(atacloomcon, assay='ATAC')
atacloomcon$close_all()

# subset by removing contig chromosomes
rawregions = rownames(GetAssayData(atac_tmp, slot = "counts", a

In [14]:
parallel_filename = "seurat_label_transfer.parallel"
script_path = "../0_resources/scripts/seurat_label_transfer_consensus.R"
img_path = "../0_resources/vsn_cache/cflerin-seurat-4.0.3-plus.sif"
reference_path = "../0_resources/seurat_references/pbmc_ref.rds"
annotation_path = "../0_resources/seurat_references/granges_annotation.rds"
bind_mounts = "/dodrio,/readonly/dodrio,/tmp"
#bind_mounts = "/lustre1,/staging,${VSC_SCRATCH}/tmp:/tmp"

with open(parallel_filename, 'w') as f:
    for sample in loom_path_dict.keys():
        outfile = f"cell_type_classification/{sample}__cell_type_seurat.txt"
        if not os.path.exists(outfile):
            loomfile = loom_path_dict[sample]
            fragfile = frags_path_dict[sample]
            command=f"img_path={img_path} && singularity exec --cleanenv -H $PWD -B {bind_mounts} $img_path Rscript {script_path} {sample} {loomfile} {fragfile} {reference_path} {annotation_path} {outfile}"
            f.write(f"{command}\n")
            print(command)

        else:
            f.write(f"#{outfile} already exists!\n")
            print(f"#{outfile} already exists!")

#cell_type_classification/BIO_ddseq_1.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BIO_ddseq_2.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BIO_ddseq_3.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BIO_ddseq_4.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BRO_mtscatac_1.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BRO_mtscatac_2.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xmultiome_1.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xmultiome_2.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xv11_1.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xv11_2.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xv11_3.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xv11_4.FULL__cell_type_seurat.txt already exists!
#cell_

In [15]:
parallel_filename = "seurat_label_transfer.parallel"
script_path = "../0_resources/scripts/seurat_label_transfer_consensus.R"
img_path = "../0_resources/vsn_cache/cflerin-seurat-4.0.3-plus.sif"
reference_path = "../0_resources/seurat_references/pbmc_ref.rds"
annotation_path = "../0_resources/seurat_references/granges_annotation.rds"
bind_mounts = "/dodrio,/readonly/dodrio,/tmp"
#bind_mounts = "/lustre1,/staging,${VSC_SCRATCH}/tmp:/tmp"

with open(parallel_filename, 'w') as f:
    for sample in loom_path_dict.keys():
        outfile = f"cell_type_classification/{sample}__cell_type_seurat.txt"
        if not os.path.exists(outfile):
            loomfile = loom_path_dict[sample]
            fragfile = frags_path_dict[sample]
            command=f"Rscript {script_path} {sample} {loomfile} {fragfile} {reference_path} {annotation_path} {outfile}"
            f.write(f"{command}\n")
            print(command)

        else:
            f.write(f"#{outfile} already exists!\n")
            print(f"#{outfile} already exists!")

#cell_type_classification/BIO_ddseq_1.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BIO_ddseq_2.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BIO_ddseq_3.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BIO_ddseq_4.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BRO_mtscatac_1.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BRO_mtscatac_2.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xmultiome_1.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xmultiome_2.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xv11_1.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xv11_2.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xv11_3.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xv11_4.FULL__cell_type_seurat.txt already exists!
#cell_

In [16]:
parallel_filename = "seurat_label_transfer.parallel"
script_path = "../0_resources/scripts/seurat_label_transfer_consensus.R"
img_path = "../0_resources/vsn_cache/cflerin-seurat-4.0.3-plus.sif"
reference_path = "../0_resources/seurat_references/pbmc_ref.rds"
annotation_path = "../0_resources/seurat_references/granges_annotation.rds"
bind_mounts = "/dodrio,/readonly/dodrio,/tmp"
#bind_mounts = "/lustre1,/staging,${VSC_SCRATCH}/tmp:/tmp"

with open(parallel_filename, 'w') as f:
    for sample in loom_path_dict.keys():
        outfile = f"cell_type_classification/{sample}__cell_type_seurat.txt"
        if not os.path.exists(outfile):
            loomfile = loom_path_dict[sample]
            fragfile = frags_path_dict[sample]
            command=f"Rscript {script_path} {sample} {loomfile} {fragfile} {reference_path} {annotation_path} {outfile}"
            f.write(f"{command}\n")
            print(command)

        else:
            f.write(f"#{outfile} already exists!\n")
            print(f"#{outfile} already exists!")

#cell_type_classification/BIO_ddseq_1.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BIO_ddseq_2.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BIO_ddseq_3.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BIO_ddseq_4.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BRO_mtscatac_1.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/BRO_mtscatac_2.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xmultiome_1.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xmultiome_2.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xv11_1.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xv11_2.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xv11_3.FULL__cell_type_seurat.txt already exists!
#cell_type_classification/CNA_10xv11_4.FULL__cell_type_seurat.txt already exists!
#cell_

# Run the Rscript

```
mkdir cell_type_classification
export OMP_THREAD_LIMIT=80
cat seurat_label_transfer.parallel | parallel -j 2 --progress
```

# Troubleshooting

when running
```
> chromatinassay = CreateChromatinAssay(
+     counts=GetAssayData(atac_tmp, slot = "counts", assay='ATAC'),
+     genome='hg38',
+     fragments = f_frag,
+     ranges=regions
+     )
```
i get the error
```
Computing hash
Checking for 2208 cell barcodes
Error in CreateFragmentObject(path = fragments, cells = cells, validate.fragments = validate.fragments,  :
  Not all cells requested could be found in the fragment file.
In addition: Warning message:
In CreateChromatinAssay(counts = GetAssayData(atac_tmp, slot = "counts",  :
  Overlapping ranges supplied. Ranges should be non-overlapping.
```
and `chromatinassay` is empty. 


In [1]:
zcat fragments_postbap/VIB_Hydrop_1.sinto.mm.fragments.tsv.gz | head

chr1	10151	10183	GCGTTGACTTCGTCGACGTA-12	1
chr1	16228	16272	ATTGTAGAGGTAGGAGTCAA-12	6
chr1	17490	17527	TTGTACTGCAAGGTGCCAGA-12	3
chr1	30857	31030	CATGAACGTTGTGACGTGGA-12	1
chr1	68280	68311	CATTGGTGCAGGCGAGATTC-12	1
chr1	74871	74900	ATCATCCGTTAATAGGCAGG-11	11
chr1	79643	79855	GGAGCGGATTCAAGGTCGAT-11	2
chr1	88228	88302	AAGACCAAGCGCTGATGGTA-12	4
chr1	89893	89944	TCCGATTCAGCACGTTAAGA-12	3
chr1	91177	91237	CTCCTCACTACTTGAAGAAG-12	4

gzip: stdout: Broken pipe


this error doesn't make sense, because the barcodes are present in the fragments file.

the output of the above code can be read in the cell_type_redictions.ipynb notebook

it turns out the loom file writing had a bug in it that did not add the `-1?` do the barcode. i fixed this in the loom writing.

now that this is fixed, i got an error

```
> chromatinassay = CreateChromatinAssay(
+     counts=GetAssayData(atac_tmp, slot = "counts", assay='ATAC'),
+     genome='hg38',
+     fragments = f_frag,
+     ranges=regions
+     )
Computing hash
Checking for 2208 cell barcodes
Error in .order_seqlevels(chrom_sizes[, "chrom"]) :
  !anyNA(m32) is not TRUE
In addition: Warning message:
In CreateChromatinAssay(counts = GetAssayData(atac_tmp, slot = "counts",  :
  Overlapping ranges supplied. Ranges should be non-overlapping.
```


it turns out that this is a known bug (https://github.com/timoast/signac/issues/687) and that commenting out the line `genome='hg38'`