# Loading and preparing data 

In [17]:
import scvi
import scanpy as sc

import pandas as pd
import numpy as np
import scipy

import os
import sys

sc.set_figure_params(figsize=(5, 5))

In [18]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

# Import custom modules

In [19]:
sys.path.append('bin/')
import SeuratFacility

# Import AnnData object

In [20]:
adata = SeuratFacility.dir2adata('data/object/seurat/', assay='RNA', slot='counts')

# Compute highly variable genes

In [None]:
adata.layers['counts'] = adata.X.copy() # preserve counts
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
adata.raw = adata # freeze the state in `.raw`

In [None]:
sc.pp.highly_variable_genes(
    adata, 
    n_top_genes=3000, 
    subset=True, 
    layer='counts', 
    flavor='seurat_v3'
)

# Register model

In [None]:
adata=scvi.data.setup_anndata(
    adata,
    layer='counts',
    categorical_covariate_keys=['treatment'],
    copy=True
)

# Creating and training a model 

In [None]:
model=scvi.model.SCVI(
    adata, 
    n_latent=30, 
    n_hidden=128, 
    n_layers=2, 
    dispersion='gene'
)

In [None]:
model.train()

In [None]:
model.save('data/object/seurat/model/', overwrite=True)

# Update anndata object with scvi results 

In [None]:
# scvi normalized counts 
adata.layers['data']=model.get_normalized_expression(library_size=10e4)

In [None]:
adata.obsm['latent_hvg_catc']=model.get_latent_representation()

# Store results 

In [None]:
adata.write('data/object/seurat/andata_hvg_catc.h5ad')

In [None]:
SeuratFacility.adata2dir(adata, 'data/object/seurat/', assay='SCVI_hvg_catc', slot='data', build_dir=False, overwrite=True)