In [None]:
pip install -U scvi-tools

In [None]:
pip install scanpy

In [None]:
import os
import tempfile
import scanpy as sc
import scvi
import seaborn as sns
import torch

import numpy as np
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [None]:
adata = sc.read("/work/SCVI_models/batch_removal_runs/healthy_hamstring_processed_adata_raw_nonormalization (1).h5ad")

In [None]:
adata

In [None]:
print("Min:", adata.X.min(), "Max:", adata.X.max(), "Mean:", adata.X.mean())

In [None]:
#scvi.settings.seed = 0
print("Last run with scvi-tools version:", scvi.__version__) # 1.2.2 when running cpu but 1.2.1 when running GPU

In [None]:
sc.set_figure_params(figsize=(6, 6), frameon=False)
sns.set_theme()
torch.set_float32_matmul_precision("high")
save_dir = tempfile.TemporaryDirectory()

%config InlineBackend.print_figure_kwargs={"facecolor": "w"}
%config InlineBackend.figure_format="retina"

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np

split_key = "split"
adata.obs[split_key] = "train"
idx = list(range(len(adata)))
idx_train, idx_test = train_test_split(adata.obs_names, test_size=0.1, random_state=42)
adata.obs.loc[idx_train, split_key] = "train"
adata.obs.loc[idx_test, split_key] = "test"


# Filter the data to use only the training set and make a copy
adata_train = adata[adata.obs[split_key] == "train"].copy()
adata_test = adata[adata.obs[split_key] == "test"].copy()


In [None]:
#adata_raw.X[:10, :10]  # peek at first 10x10 values
bla = adata_raw.X.toarray()  # If it's a sparse matrix
bla.X[:10, :10]  # peek at first 10x10 values


In [None]:
scvi.model.SCVI.setup_anndata(adata_train, layer=None, batch_key="donor_id")

In [None]:
model = scvi.model.SCVI(adata_train, n_layers=2, n_latent=30, gene_likelihood="nb")

In [None]:
model

In [None]:
model.train()

In [None]:
import os
os.chdir('SCVI_models')  # Provide the new path here

In [None]:
#model_dir = os.path.join(save_dir.name, "scvi_model_HH_1")
#model.save(model_dir, overwrite=True)


model.save("/work/SCVI_models/may_models/SCVI_rerun_HH__final_batch")

In [None]:
# model.load: 
model = scvi.model.SCVI.load("/work/SCVI_models/new_models/SCVI_HH_newpredict_model_final_batch", adata=adata_train)

In [None]:
library=adata_test.X.sum(axis = 1)
#library = adata_test.X.sum(axis = 1)

In [None]:
y_pred = model.get_normalized_expression(adata_test, return_numpy=True)

In [None]:
library = adata_test.X.sum(axis=1)

In [None]:
library = np.array(library.flatten())

In [None]:
y_true = adata_test.X.toarray() if hasattr(adata_test.X, 'toarray') else np.array(adata_test.X)

In [None]:
rec = (library * y_pred.T ).T

In [None]:
#rec = rec.flatten()

In [None]:
y_pred

In [None]:
rec

In [None]:
# 1. Get ground truth expression (dense array)
y_true = adata_test.X.toarray() if hasattr(adata_test.X, 'toarray') else np.array(adata_test.X)


# Check shapes before computing R²
print("y_true shape:", y_true.shape)
print("y_pred shape:", rec.shape)

In [None]:
# If rec is an AnnData object, extract the X attribute (i.e., the data matrix)
import anndata
if isinstance(rec, anndata.AnnData):
    rec = rec.X

# Now, rec should be a numpy array or sparse matrix, which is what obsm expects
adata_test.obsm["X_reconstructed"] = rec

# Save the entire object with the reconstructed data
adata_test.write("adata_with_latent_and_reconstructed_HH_SCVI_after_fix_batch.h5ad")
