In [None]:
import numpy as np
import plotly.express as px
def plot_embedding(embedding, labels=None):
    """
    Plot a 2D embedding with optional labels for coloring.

    Args:
        embedding (ndarray): shape (n_samples, 2)
        labels (array-like, optional): labels for coloring points
    """
    if labels is None:
        labels = np.zeros(embedding.shape[0])  # default: all same


    fig = px.scatter(
        x=embedding[:, 0],
        y=embedding[:, 1],
        color=labels.astype(str),  # convert to string for discrete legend
        title="2D Embedding Visualization",
        labels={"x": "Dim 1", "y": "Dim 2", "color": "Label"}
    )


    fig.update_layout(
        width=1000,
        height=800,
        title="Custom size Plotly figure"
    )


    fig.show()



In [None]:
import scanpy as sc
nk_path='/Users/chandrasekharmukherjee/PycharmProjects/Personal/cplearn_biotest/NK_cells.h5ad'
adata = sc.read_h5ad(nk_path)

print("done")

X=adata.obsm['X_HARMONY']
label=adata.obs['ident'].to_numpy()
from scipy.sparse import issparse
X_umap=adata.obsm['X_UMAP']

X_cg = adata.X.toarray() if issparse(adata.X) else adata.X
gene_names = adata.var_names.values

print(X_cg.shape,gene_names.shape,gene_names[0:5])

In [1]:
f_path='/Users/chandrasekharmukherjee/Data/scRNA_IBD/'
d_path=['bcells_final.h5ad',
'cd4tcells_final.h5ad',
'cd8tcells_final.h5ad',
'epicolonic_final.h5ad',
'fibperi_final.h5ad',
'ilc_final.h5ad',
'ilealepi_final.h5ad',
'myeloid_final.h5ad',
'plasmacells_final.h5ad']

subset_number=1

import scanpy as sc
adata=sc.read_h5ad(f_path+d_path[subset_number])
X=adata.obsm['X_harmony']
n=X.shape[0]
print(n)
label=adata.obs['final_analysis'].to_numpy()
pt_label=adata.obs['Patient'].to_numpy()

from scipy.sparse import issparse
X_cg = adata.X.toarray() if issparse(adata.X) else adata.X
gene_names = adata.var_names.values

print(X_cg.shape,gene_names.shape)


145704
(145704, 33075) (33075,)


In [None]:
import data_call
import numpy as np
d_name='Muraro'
X,label=data_call.get_dataset(d_name, mode='PCA')

In [2]:
from cplearn_v4.corespect import CorespectModel
from cplearn_v4.corespect.config import CoreSpectConfig

### Load dataset X as you would have in the pre-clustering step. Type should be np.ndarray()

In [3]:
#Initial parameters.
cfg = CoreSpectConfig(
    q=20, #Neighborhood parameter for initial K-NN graph design
    r=10, #Neighborhood parameter for ascending random walk of FlowRank
    core_frac=0.2, #Fraction of points to be selected in the core. Can be auto-selected with auto_select_core_frac:True
    densify=False, #Densification of different induced subgraph. Choose 'rw' or 'k-nn' to try different methods.
    granularity=2, #Resolution of Leiden influencing layer extraction procedure.
    resolution=2.25 #Resolution for clustering core.
).configure()

#Fine_grained (bool): Allows finding small cores from the initially obtained stable_core.
#Propagate (bool): Cluster non-core points with a normalized-laplacian-based core-label propagation.

#Run Corespect.
model = CorespectModel(X, **cfg.unpack())#.run(fine_grained=True,propagate=True)

In [4]:
import numpy as np
model.labels_=label
model.count_mat=X_cg
indices = np.arange(X.shape[0])
splits = np.array_split(indices, 4)
model.layers_=splits

In [None]:
import umap
reducer=umap.UMAP()
X_umap=reducer.fit_transform(X)

In [None]:
from cplearn_v4.coremap import Coremap
cmap=Coremap(model,global_umap=X_umap,fast_view=True,anchor_finding_mode='default')

In [None]:
from cplearn_v4.coremap.vizualizer import visualize_coremap
fig=visualize_coremap(cmap,model.labels_, use_webgl=True)
fig.show()

In [None]:
from cplearn_v4.coremap.vizualizer import visualize_coremap
fig=visualize_coremap(cmap,label, use_webgl=True)
fig.show()

In [7]:
from cplearn_v4.biostat import DEAnalyzer,DEOptions

model.count_mat=X_cg

analyzer = DEAnalyzer(model,genes=gene_names,
                       options=DEOptions(pseudocount=1.0, min_cells_per_group=5, min_detect_pct=0.05))



In [None]:
from collections import Counter
print(Counter(model.labels_))

In [None]:
print(np.unique(model.labels_))

In [6]:
df_default = analyzer.run_default(set1={'CD4 KLF2hi T'})

Time for preparing indices: 0.069 seconds
Starting DE test between two groups: {'mode': 'default', 'group_a': "['CD4 KLF2hi T']", 'group_b': "['CD4 FOShi T', 'CD4 FOSpos T', 'CD4 HSPhi CD70pos Treg', 'CD4 HSPhi Treg', 'CD4 IKZF2hi TNFRSF18hi Treg', 'CD4 IKZF2hi TNFRSF18lo Treg', 'CD4 IKZF2lo LAG3pos Treg', 'CD4 KLF2int T', 'CD4 TNFSF13Bhi T', 'CD4 TWIST1 Treg', 'CD4 naive T', 'Th1', 'Th1 17 22', 'Th1 17 GZMAhi', 'Th1 17 GZMApos', 'Th17', 'Th22', 'Tph Tfh', 'Tph Tfh CXCL13pos']"}
Time for data slicing: 0.000 seconds
Time for group extraction: 5.533 seconds
Time for means computation: 4.125 seconds
Time for pct computation: 7.546 seconds
Pre-MWU filtering completed in: 17.20646595954895 seconds
Time for pre-MWU filtering: 8.014 seconds
Time for MWU loop: 12.586 seconds
Time for rank-based U computation: 9.137 seconds
Time for assembling DataFrame: 0.003 seconds
Time for sorting results: 0.004 seconds


In [None]:
analyzer.observe_de(df_default,n_rows=30)

In [None]:
df_layerwise = analyzer.run_layerwise({6}, compute_per_layer=True,
        compute_cumulative=True)

In [None]:
analyzer.observe_layerwise(df_layerwise,cumulative=True)

In [None]:
# Core vs periphery for cluster 0
df_cvp = analyzer.run_core_vs_periphery(cluster=6, layer_order=[1, 2, 3,4,5,6])
analyzer.observe_core_vs_periphery(df_cvp)#,genes=["GNLY"])