In [None]:
import scanpy as sc
import numpy as np
import os
import anndata2ri
import pathlib
import scvelo as scv
from scipy import io
import anndata#
import pandas as pd
from tqdm import tqdm
import argparse
import sys

# Activate the anndata2ri conversion between SingleCellExperiment and AnnData
anndata2ri.activate()

#Loading the rpy2 extension enables cell magic to be used
#This runs R code in jupyter notebook cells
%load_ext rpy2.ipython

sc.settings.verbosity = 3
# sc.logging.print_versions()

import warnings
warnings.filterwarnings("ignore")

all_datasets = ["220907_FH",
                "GSM5764259",
                "230228_FH",
                "GSM5764288",            
                "GSM5764245",
                "integrate_GSE192742_LIVER",
                "gutcellatlas_myeloid"]

outdir = "/media/hieunguyen/HNSD01/outdir"
orig_dataset = "GSM5764245"
config_version = "v0.1"
output_version = "20240806"
PROJECT = "FHager_datasets"

loom_dir = "/media/hieunguyen/HNSD01/storage/FHager_datasets/velocyto_output"

dataset_name = "{}_{}".format(orig_dataset, config_version)

path_to_main_input = os.path.join(outdir,
                                PROJECT,
                                output_version, 
                                dataset_name, 
                                "s8a_output",
                                "{}.output.s8a.rds".format(dataset_name))

path_to_seurat2anndata = os.path.join(outdir, PROJECT, output_version, "seurat2anndata", dataset_name)
path_to_main_output = os.path.join(outdir, PROJECT, output_version, dataset_name)
path_to_05_output = os.path.join(path_to_main_output, "05_output")
os.system("mkdir -p {}".format(path_to_05_output))

all_loom_files = [item for item in pathlib.Path(loom_dir).glob("GSM*.loom")]

velo_data_dict = dict()
for path_to_loom_file in all_loom_files:
    tmp_velo_data = scv.read_loom(path_to_loom_file)
    samplename = tmp_velo_data.obs.index[0].split(":")[0]
    new_obs = ["{}_{}_{}-1".format(samplename, 
                                   samplename, 
                                   item.split(":")[1].replace("x", "")) for item in tmp_velo_data.obs.index]
    tmp_velo_data.obs.index = new_obs
    tmp_velo_data.obs["barcode"] = new_obs
    tmp_velo_data.var_names_make_unique()
    velo_data_dict[samplename] = tmp_velo_data
    
velo_data = velo_data_dict[list(velo_data_dict.keys())[0]]

for key in tqdm(list(velo_data_dict.keys())[1:]):
    velo_data = velo_data.concatenate(velo_data_dict[key])

new_obs = ["-".join(item.split("-")[0:2]) for item in velo_data.obs.index]
velo_data.obs.index = new_obs  

velo_data.write_loom(os.path.join(loom_dir, "integrate_GSE192742_LIVER.loom"))

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython
