In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
!ls /content/drive/MyDrive/ms_singlecell/data

In [None]:
!pip install scanpy

In [None]:
import scanpy as sc

base = "/content/drive/MyDrive/ms_singlecell/data/"

adata_abt_n  = sc.read_h5ad(f"{base}/adata_abt_n")


In [None]:
import scanpy as sc

base = "/content/drive/MyDrive/ms_singlecell/data/"

adata_dmso_n  = sc.read_h5ad(f"{base}/adata_dmso_n")

In [None]:
adata_dmso_n.write("adata_dmso_frozen.h5ad")
adata_abt_n.write("adata_abt_frozen.h5ad")

In [None]:
print(adata_dmso_n.shape)
print(adata_abt_n.shape)

print((adata_dmso_n.var_names == adata_abt_n.var_names).all())

In [None]:
adata_tmp = adata_dmso_n.copy()

In [None]:
print("DMSO shape:", adata_dmso_n.shape)
print("ABT  shape:", adata_abt_n.shape)
print("Genes aligned:",
      (adata_dmso_n.var_names == adata_abt_n.var_names).all())

In [None]:
import scanpy as sc

sc.pp.scale(adata_dmso_n, max_value =10)
sc.pp.scale(adata_abt_n, max_value =10)

In [None]:
n_pcs = 20

sc.tl.pca(adata_dmso_n, n_comps=n_pcs, svd_solver="arpack")
sc.tl.pca(adata_abt_n,  n_comps=n_pcs, svd_solver="arpack")

In [None]:
print("DMSO variance explained (first 10 PCs):")
print(adata_dmso_n.uns["pca"]["variance_ratio"][:10])

print("\nABT variance explained (first 10 PCs):")
print(adata_abt_n.uns["pca"]["variance_ratio"][:10])

In [None]:
import numpy as np

X_dmso_pca = adata_dmso_n.obsm["X_pca"][:, :n_pcs]
X_abt_pca  = adata_abt_n.obsm["X_pca"][:,  :n_pcs]

print("PCA shapes:", X_dmso_pca.shape, X_abt_pca.shape)

In [None]:
assert X_dmso_pca.shape[1] == n_pcs
assert X_abt_pca.shape[1]  == n_pcs
assert np.all(np.isfinite(X_dmso_pca))
assert np.all(np.isfinite(X_abt_pca))

In [None]:
from scipy.spatial.distance import pdist

# Pairwise distances within each condition
dist_dmso = pdist(X_dmso_pca, metric="euclidean")
dist_abt  = pdist(X_abt_pca,  metric="euclidean")

print("DMSO distances:", dist_dmso.shape)
print("ABT  distances:", dist_abt.shape)

In [None]:
dispersion_dmso = dist_dmso.mean()
dispersion_abt  = dist_abt.mean()

print("DMSO dispersion:", dispersion_dmso)
print("ABT  dispersion:", dispersion_abt)

In [None]:
from sklearn.neighbors import NearestNeighbors

def compute_compactness(X, k=10):
    nbrs = NearestNeighbors(n_neighbors=k+1).fit(X)
    distances, _ = nbrs.kneighbors(X)
    # exclude self-distance (0)
    return distances[:, 1:].mean()

compact_dmso = compute_compactness(X_dmso_pca, k=10)
compact_abt  = compute_compactness(X_abt_pca,  k=10)

print("DMSO compactness:", compact_dmso)
print("ABT  compactness:", compact_abt)

In [None]:
print("DMSO mean/std:", dist_dmso.mean(), dist_dmso.std())
print("ABT  mean/std:", dist_abt.mean(),  dist_abt.std())

In [None]:
!pip install scvi-tools

In [None]:
import scvi
import torch

In [None]:
scvi.__version__

In [None]:
adata_dmso_raw = sc.read_h5ad("/content/drive/MyDrive/ms_singlecell/data/PMS_DMSO_filtered.h5ad")
adata_abt_raw  = sc.read_h5ad("/content/drive/MyDrive/ms_singlecell/data/PMS_ABT_filtered.h5ad")

print(adata_dmso_raw.shape)
print(adata_abt_raw.shape)

In [None]:
import numpy as np

common_genes = np.intersect1d(
    adata_dmso_raw.var_names,
    adata_abt_raw.var_names
)

print("Number of common genes:", len(common_genes))

In [None]:
adata_dmso_raw = adata_dmso_raw[:, common_genes].copy()
adata_abt_raw  = adata_abt_raw[:,  common_genes].copy()

print("DMSO shape:", adata_dmso_raw.shape)
print("ABT  shape:", adata_abt_raw.shape)

In [None]:
adata_dmso_raw.write("adata_dmso_raw_frozen.h5ad")
adata_abt_raw.write("adata_abt_raw_frozen.h5ad")

In [None]:
scvi.model.SCVI.setup_anndata(adata_dmso_raw)
scvi.model.SCVI.setup_anndata(adata_abt_raw)

In [None]:
#20 latent dimensions
model_dmso = scvi.model.SCVI(
    adata_dmso_raw,
    n_latent=20
)

model_abt = scvi.model.SCVI(
    adata_abt_raw,
    n_latent=20
)

In [None]:
model_dmso.train(max_epochs=100)
model_abt.train(max_epochs=100)

In [None]:
X_dmso_scvi = model_dmso.get_latent_representation()
X_abt_scvi  = model_abt.get_latent_representation()

print("DMSO scVI shape:", X_dmso_scvi.shape)
print("ABT  scVI shape:", X_abt_scvi.shape)

In [None]:
import torch
print(torch.cuda.is_available())

In [None]:
from scipy.spatial.distance import pdist

dist_dmso_scvi = pdist(X_dmso_scvi, metric="euclidean")
dist_abt_scvi  = pdist(X_abt_scvi,  metric="euclidean")

print("DMSO scVI distances:", dist_dmso_scvi.shape)
print("ABT  scVI distances:", dist_abt_scvi.shape)

In [None]:
disp_dmso_scvi = dist_dmso_scvi.mean()
disp_abt_scvi  = dist_abt_scvi.mean()

print("scVI DMSO dispersion:", disp_dmso_scvi)
print("scVI ABT  dispersion:", disp_abt_scvi)