# Loading and preparing data 

In [None]:
import scvi
import scanpy as sc

import pandas as pd

import os

sc.set_figure_params(figsize=(4, 4))

In [None]:
os.chdir("/research/peer/fdeckert/FD20200109SPLENO")

In [None]:
adata = sc.read_h5ad("data/h5ad_object/h5ad_qc.h5ad")

# Create scale layer from .X
adata.layers['scale.data'] = adata.X.copy() # numpy.ndarray
test = adata.layers['scale.data']

# Get count data from raw
adata.layers['counts'] = adata.raw[:, adata.raw.var_names.isin(adata.var_names)].X.copy()
adata.X = adata.layers['counts']

# Normalize and log1p for count data in new layer data
adata.layers['data'] = sc.pp.normalize_total(adata, target_sum=1e4, inplace=False)['X']
adata.layers['data'] = sc.pp.log1p(adata_norm, copy=True).X

# Register model

In [None]:
adata = scvi.data.setup_anndata(
    adata,
    layer='counts', 
    continuous_covariate_keys=["nCount_RNA", "pMt_RNA", "msCC_diff_RNA"], 
    copy = True
)

adata_batch = scvi.data.setup_anndata(
    adata,
    batch_key = 'sample_name', 
    layer='counts', 
    continuous_covariate_keys=["nCount_RNA", "pMt_RNA", "msCC_diff_RNA"], 
    copy = True
)

# Creating and training a model 

In [None]:
model = scvi.model.SCVI(adata)
model_batch = scvi.model.SCVI(adata_batch)

In [None]:
model.train()
model_batch.train()

In [None]:
model.save("data/scvi/model/")
model_batch.save("data/scvi/model_batch/")

In [None]:
# model = scvi.model.SCVI.load("model_all/", adata, use_gpu=False)

# Obtaining model outputs

In [None]:
# latent = model.get_latent_representation()
# adata.obsm["X_scVI"] = latent

In [None]:
# use scVI latent space for UMAP generation
# sc.pp.neighbors(adata, use_rep="X_scVI")
# sc.tl.umap(adata, min_dist=0.3)

In [None]:
# sc.pl.umap(
#     adata,
#     color=["tissue"],
#     frameon=False
# )

In [None]:
# sc.pl.umap(
#     adata,
#     color=["donor", "cell_source"],
#     ncols=2,
#     frameon=False,
# )