In [None]:
pip install scib

In [None]:
import scib
import scanpy as sc

In [None]:
test_adata = sc.read("/work/SCIB/scib_Biolord/remoived_batch/biolord_HTAPP__fix_final_removed_batch_effect.h5ad")

In [None]:
celltype_batch_counts = test_adata.obs.groupby(["cell_type", "replicate"]).size().unstack(fill_value=0)
print(celltype_batch_counts)

In [None]:
reconstructed_data = sc.read("/work/SCIB/scib_Biolord/remoived_batch/biolord_HTAPP__fix_final_removed_batch_effect.h5ad")

In [None]:
def get_isolated_labels_from_adata(adata, label_key, batch_key, iso_threshold=None, verbose=True):
    import warnings
    import pandas as pd

    tmp = adata.obs[[label_key, batch_key]].drop_duplicates()
    batch_per_lab = tmp.groupby(label_key).agg({batch_key: "count"})

    # threshold for determining when label is considered isolated
    if iso_threshold is None:
        iso_threshold = batch_per_lab.min().tolist()[0]

    if iso_threshold == adata.obs[batch_key].nunique():
        warnings.warn(
            "iso_threshold is equal to number of batches in data, no isolated labels will be found",
            stacklevel=2,
        )
        return []

    if verbose:
        print(f"isolated labels: no more than {iso_threshold} batches per label")

    labels = batch_per_lab[batch_per_lab[batch_key] <= iso_threshold].index.tolist()

    if len(labels) == 0 and verbose:
        print("No isolated labels with less than iso_threshold batches.")

    return labels



In [None]:
isolated = get_isolated_labels_from_adata(
    test_adata,
    label_key="cell_type",
    batch_key="replicate",  # or whatever your batch key is
    iso_threshold=None,     # let it auto-determine
    verbose=True
)
print(isolated)


In [None]:
test_adata.obs["dpt_pseudotime"] = 0

In [None]:
# Final  version with test_adata 

results = scib.metrics.metrics_fast(
    test_adata,         # original dataset
    reconstructed_data,   # the dataset with the reconstruction
    batch_key="donor_id",
    label_key="cell_type",
    embed="X_reconstructed_HTAPP",   # specify embedding in reconstructed dataset

)


print(results)




In [None]:
# Final  version with adata 
kwargs = {"organism":"human"}
results = scib.metrics.metrics_all(
    test_adata,         # original dataset
    reconstructed_data,   # the dataset with the reconstruction
    batch_key="replicate",
    label_key="cell_type",
    embed="X_reconstructed",   # specify embedding in reconstructed dataset, 
    **kwargs
)


print(results)

In [None]:
#Removed batch effect
# Final  version with adata 
kwargs = {"organism":"human"}
results = scib.metrics.metrics_all(
    test_adata,         # original dataset
    reconstructed_data,   # the dataset with the reconstruction
    batch_key="replicate",
    label_key="cell_type",
    embed="X_reconstructed",   # specify embedding in reconstructed dataset, 
    **kwargs
)


print(results)