Environment: This script should be run with the `python_scvi_environment` environment using the devcontainer `docker_python_scvi`

In [None]:
import scanpy as sc
import scvi
import os
import numpy as np
import pandas as pd
from scvi.model.utils import mde
import matplotlib.pyplot as plt
import pickle
import pickle

Get the paths to all of the segmentation folders where the processed adatas are now stored

In [None]:
data_folder = r"../../data"
output_folder = r"../../outputs"
os.makedirs(output_folder, exist_ok=True)

list_of_baysor_segmentations = []
for seg_name in ["mock", "4hr_avr", "6hr_avr", "9hr_avr", "avrrpt24"]:
    list_of_baysor_segmentations.append(
        os.path.join(data_folder, "segmentations", seg_name)
    )

In [None]:
scvi.settings.seed = 1
sc.set_figure_params(figsize=(4, 4))

Read in the preprocessed spatial adatas

In [None]:
adata_list = []
for input_file in list_of_baysor_segmentations:
    experiment = sc.read(
        os.path.join(input_file, "adatas", "preprocessed_and_filtered_01.h5ad")
    )
    experiment.obs["batch"] = os.path.basename(input_file)
    adata_list.append(experiment.copy())

spatial_data = sc.concat(adata_list)

Prepare and run scvi to integrate the adata objects from different timepoints into a joint embedding

In [None]:
# remove blank barcode features
spatial_data = spatial_data[:, ~spatial_data.var.index.str.contains("Blank")]
spatial_data.obs_names_make_unique()
spatial_data.var_names_make_unique()

# read in the tsv that contains pairings between gene ID and gene name, and perform the conversion
gene_symbols = pd.read_csv(
    os.path.join(data_folder, "useful_files", "geneID_to_geneName_MERSCOPE_panel1.txt"),
    sep="\t",
    index_col=0,
)
new_indices = spatial_data.var.merge(
    gene_symbols, left_index=True, right_on="gene_id"
).index.tolist()
gene_names = spatial_data.var.merge(
    gene_symbols, left_index=True, right_on="gene_id"
).gene_name.tolist()
gene_id = spatial_data.var.merge(
    gene_symbols, left_index=True, right_on="gene_id"
).gene_id.tolist()
spatial_data = spatial_data[:, gene_id]
new_vars = spatial_data.var.merge(gene_symbols, left_index=True, right_on="gene_id")
new_vars.index = new_vars.gene_name.tolist()
new_vars = new_vars.drop(["gene_name"], axis=1)
spatial_data.var = new_vars

# train the scvi model
scvi.model.SCVI.setup_anndata(spatial_data, layer="counts", batch_key="batch")
vae = scvi.model.SCVI(spatial_data, n_layers=2, n_latent=20, gene_likelihood="nb")
vae.train()

Get the scvi representations of each cell

In [None]:
spatial_data.obsm["X_scVI"] = vae.get_latent_representation()
sc.pp.neighbors(spatial_data, use_rep="X_scVI", random_state=0)
sc.tl.umap(spatial_data, random_state=0)

Install and run leiden clustering. 

In [None]:
%pip install igraph
%pip install leidenalg

sc.tl.leiden(spatial_data, random_state=0)
sc.pl.umap(spatial_data, color="leiden")

Plot each dataset spatially colored by its joint leiden cluster

In [None]:
os.makedirs(os.path.join(output_folder, "adatas"), exist_ok=True)

In [None]:
unique_batches = np.unique(spatial_data.obs["batch"])
spatial_data.obsm["X_spatial"] = np.array(
    [spatial_data.obs["x"], spatial_data.obs["y"]]
).T

for i in unique_batches:
    spatial_plot = spatial_data[spatial_data.obs["batch"] == i]
    sc.pl.embedding(spatial_plot, basis="spatial", color="leiden")

Write out the joint embedded spatial dataset. We write out each part of the object separately and load them back in the following script because of environment compatibility issues with different scanpy versions

In [None]:
spatial_data.obs["modality"] = "spatial"


spatial_data.write(
    os.path.join(output_folder, "adatas", "spatial_concatenated_00.h5ad")
)

In [None]:
spatial_data.obs.to_csv(
    os.path.join(output_folder, "adatas", "spatial_concatenated_00_obs.csv")
)
spatial_data.var.to_csv(
    os.path.join(output_folder, "adatas", "spatial_concatenated_00_var.csv")
)

# File path where you want to save the object
file_path = os.path.join(
    os.path.join(output_folder, "adatas", "spatial_concatenated_00_uns.pkl")
)

# Dump the object to the file
with open(file_path, "wb") as file:
    pickle.dump(spatial_data.uns, file)

# File path where you want to save the object
file_path = os.path.join(
    os.path.join(output_folder, "adatas", "spatial_concatenated_00_obsm.pkl")
)

# Dump the object to the file
with open(file_path, "wb") as file:
    pickle.dump(spatial_data.obsm, file)

# File path where you want to save the object
file_path = os.path.join(
    os.path.join(output_folder, "adatas", "spatial_concatenated_00_layers.pkl")
)

# Dump the object to the file
with open(file_path, "wb") as file:
    pickle.dump(spatial_data.layers, file)

np.save(
    os.path.join(output_folder, "adatas", "spatial_concatenated_00_X.npy"),
    spatial_data.X,
)

Save the scvi VAE

In [None]:
# File path where you want to save the object
file_path = os.path.join(output_folder, "vae_object.pkl")

# Dump the object to the file
with open(file_path, "wb") as file:
    pickle.dump(vae, file)