# Loading and preparing data 

In [None]:
import scvi
import scanpy as sc

import pandas as pd
import numpy as np
import scipy

import os

sc.set_figure_params(figsize=(4, 4))

In [None]:
os.chdir('/research/peer/fdeckert/FD20200109SPLENO')

In [None]:
# Create anndata from count mtx wit var_names 
adata = sc.read_mtx('data/object/components/slots/seurat_counts.mtx')
adata.var_names = pd.read_table('data/object/components/slots/seurat_counts_genes.csv', index_col=0).index
# Add obs from meta_data
adata.obs = pd.read_csv('data/object/components/meta_data/seurat_meta.csv', index_col=0)
# Subset by variable features 
variable_features = pd.read_csv('data/object/components/variable_features/seurat_variable_features.csv', index_col=0).index
adata = adata[:, variable_features]
# Set layers
adata.layers['counts'] = adata.X

# Register model

In [None]:
adata = scvi.data.setup_anndata(
    adata,
    layer='counts', 
    continuous_covariate_keys=['pMt_RNA', 'msCC_diff_RNA'], 
    copy=True
)

adata_batch = scvi.data.setup_anndata(
    adata,
    layer='counts', 
    categorical_covariate_keys=['treatment'], 
    continuous_covariate_keys=['pMt_RNA', 'msCC_diff_RNA'], 
    copy=True
)

# Creating and training a model 

In [None]:
model = scvi.model.SCVI(adata)
model_batch = scvi.model.SCVI(adata_batch)

In [None]:
model.train()
model_batch.train()

In [None]:
# model.save('data/scvi/model/', overwrite = True)
# model_batch.save('data/scvi/model_batch/', overwrite = True)

# Update anndata object with scvi results 

In [None]:
# scvi normalized counts 
adata.layers['scvi.data'] = model.get_normalized_expression(library_size=10e4)
adata_batch.layers['scvibatch.data'] = model.get_normalized_expression(library_size=10e4)

In [None]:
adata.obsm['X_scVI'] = model.get_latent_representation()
adata_batch.obsm['X_scVIbatch'] = model_batch.get_latent_representation()

# Store results 

In [None]:
scipy.sparse.save_npz('data/object/components/slots/scvi_data.npz', scipy.sparse.csc_matrix(adata.layers['scvi.data']))
scipy.sparse.save_npz('data/object/components/slots/scvibatch_data.npz', scipy.sparse.csc_matrix(adata_batch.layers['scvibatch.data']))

adata.obs_names.to_series().to_csv('data/object/components/slots/scvi_data_cellid.txt')
adata_batch.obs_names.to_series().to_csv('data/object/components/slots/scvibatch_data_cellid.txt')

adata.var_names.to_series().to_csv('data/object/components/slots/scvi_data_genes.txt')
adata_batch.var_names.to_series().to_csv('data/object/components/slots/scvibatch_data_genes.txt')

In [None]:
np.savetxt('data/object/components/reductions/scvi_latent.txt', adata.obsm['X_scVI'])
np.savetxt('data/object/components/reductions/scvibatch_latent.txt', adata_batch.obsm['X_scVIbatch'])

adata.obs_names.to_series().to_csv('data/object/components/reductions/scvi_latent_cellid.txt')
adata_batch.obs_names.to_series().to_csv('data/object/components/reductions/scvibatch_latent_cellid.txt')

adata.var_names.to_series().to_csv('data/object/components/reductions/scvi_latent_genes.txt')
adata_batch.var_names.to_series().to_csv('data/object/components/reductions/scvibatch_latent_genes.txt')