In [None]:
import os
import tempfile

import scanpy as sc
import scvi
import seaborn as sns
import torch
import numpy as np
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [None]:
adata = sc.read("/work/SCVI_models/HTAPP_rawdata/HTAPP_997_processed_raw_FINAL.h5ad")

In [None]:
adata

In [None]:
adata.obs["sex"]

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np

split_key = "split"
adata.obs[split_key] = "train"
idx = list(range(len(adata)))
idx_train, idx_test = train_test_split(adata.obs_names, test_size=0.1, random_state=42)
adata.obs.loc[idx_train, split_key] = "train"
adata.obs.loc[idx_test, split_key] = "test"


# Filter the data to use only the training set and make a copy
adata_train = adata[adata.obs[split_key] == "train"].copy()
adata_test = adata[adata.obs[split_key] == "test"].copy()


In [None]:
scvi.model.SCVI.setup_anndata(adata_train, layer=None, batch_key="replicate")

In [None]:
model = scvi.model.SCVI(adata_train, n_layers=2, n_latent=30, gene_likelihood="nb")

In [None]:
model

In [None]:
model.train()

In [None]:
model.save("work/SCVI_models/HTAPP_SCVImodel_final_raw_may")

In [None]:
model = scvi.model.SCVI.load("work/SCVI_models/HTAPP_SCVImodel_final", adata=adata_train)

In [None]:
library=adata_test.X.sum(axis = 1)
#library = adata_test.X.sum(axis = 1)

In [None]:
# Define the expected categories as used in training.
expected_categories = [
    'malignant cell', 
    'endothelial cell of hepatic sinusoid',
    'blood vessel endothelial cell', 
    'fibroblast',
    'blood vessel smooth muscle cell', 
    'hepatic stellate cell', 
    'hepatocyte',
    'neuron', 
    'macrophage', 
    'T cell'
]

# Filter adata_test to include only the expected categories.
adata_test = adata_test[adata_test.obs["cell_type"].isin(expected_categories), :].copy()


In [None]:
y_pred = model.get_normalized_expression(adata_test, return_numpy=True)

In [None]:
library = np.array(library.flatten())

In [None]:
y_true = adata_test.X.toarray() if hasattr(adata_test.X, 'toarray') else np.array(adata_test.X)

In [None]:
rec = (library * y_pred.T ).T

In [None]:
y_pred

In [None]:
rec

In [None]:
print(rec.shape)

In [None]:
# If rec is an AnnData object, extract the X attribute (i.e., the data matrix)
import anndata
if isinstance(rec, anndata.AnnData):
    rec = rec.X

# Now, rec should be a numpy array or sparse matrix, which is what obsm expects
adata_test.obsm["X_reconstructed"] = rec

# Save the entire object with the reconstructed data
adata_test.write("adata_post_with_latent_and_reconstructed_HTAPP_SCVI_final_after_fix_batch.h5ad")

In [None]:
# Getting latent variables

latent = model.get_latent_representation(adata, batch_size = 256)

# Generate gene expression predictions (e.g., imputation) for a subset of data
#predicted_gene_expression = model.get_sample()
