In [1]:
batch = ""
label = ""
original_data = ""

Jupyter notebook for plotting the results and getting the integration metrics of conos integration algoritm

Author: Erno Hänninen

Title: quantify_conos.ipynb

Created: 2022-11-29

In [2]:
import sys
import os
sys.path.insert(0, "../../../../Scripts") #Adding a path to be able to import the jupyter_functions
from jupyter_functions import *
import scanpy as sc
import scib
import pandas as pd
import h5py
import scipy.io
from scipy.sparse import csr_matrix


In [None]:
original_adata = scib.pp.read_seurat(original_data)

In [None]:

#As the output of saveConosForScanPy wasn't directly readable to scanpy, reading the matrixes of conos_adata.h5 file independently and constructing the adata object from matrixes and dataframes
dictionary = {}
with h5py.File("../Integrated_adata/conos_adata.h5", "r") as f: #Open file  
    #Read the metadata and extract the cell id's
    metadata = pd.DataFrame(f["metadata"]['metadata.df'][:])
    metadata.index = metadata.CellId
    del metadata["CellId"]
    
    #read genes, embedding and pseudopca matrices as pandas dataframe
    gene_df = pd.DataFrame(f["genes"]["genes.df"][:]) # Creates a df of the returned numpy array
    embedding_df = pd.DataFrame(f["embedding"]["embedding.df"][:])  
    pseudopca_df = pd.DataFrame(f["pseudopca"]["pseudopca.df"][:])

    #Construct the graph connectivity matrix
    shape = (f["graph_connectivities"]["shape"][:][0],f["graph_connectivities"]["shape"][:][1])
    graph_conn_mtx = csr_matrix((f["graph_connectivities"]["data"][:], f["graph_connectivities"]["indices"][:], f["graph_connectivities"]["indptr"][:]), shape=shape)
    
    #Construct the graph distance matrix
    shape = (f["graph_distances"]["shape"][:][0],f["graph_distances"]["shape"][:][1])
    graph_dist_mtx = csr_matrix((f["graph_distances"]["data"][:], f["graph_distances"]["indices"][:], f["graph_distances"]["indptr"][:]), shape=shape)
    
    #Create the count matrix
    shape = (f["raw_count_matrix"]["shape"][:][0],f["raw_count_matrix"]["shape"][:][1])
    count_csr_matrix = csr_matrix((f["raw_count_matrix"]["data"][:], f["raw_count_matrix"]["indices"][:], f["raw_count_matrix"]["indptr"][:])).transpose()
    
    #Store the count matrix to file and then read it and construct an adata object  
    scipy.io.mmwrite("raw_count_matrix.mtx", count_csr_matrix)
    adata = sc.read_mtx("raw_count_matrix.mtx")
    os.remove("raw_count_matrix.mtx") #Remove the created temp file
    
    #Initialize var and obs columns
    adata.var_names = gene_df["gene"].values
    adata.obs_names = metadata.index.values
    adata.obs = metadata.copy()
    #Pca column
    adata.X_pca = pseudopca_df.values
    adata.obsm["X_pca"] = pseudopca_df.values
    #Umap column
    adata.X_umap = embedding_df.values
    adata.obsm["X_umap"] = embedding_df.values
    #Neighbors column
    adata.uns["neighbors"] = dict(
        connectivities=graph_conn_mtx.tocsr(), distances=graph_dist_mtx.tocsr()
    )
    
    #In the converison some of the columns are stored as byte objects, decoding the to UTF-8 format
    adata.var.index = adata.var.index.str.decode("utf-8")
    adata.obs.index = adata.obs.index.str.decode("utf-8")

    #In the conversion some the categorical columns gets numeric value, therfore taking the categorical columns from the original adata to the integrated adata
    for column in original_adata.obs.columns:
        if original_adata.obs[column].dtypes == "category":
            adata.obs[column] = original_adata.obs[column]




In [None]:
#Calling plot function from jupyter_functions.py file
#The function initializes the adata  to be plotted
adata_final, basis = plot_results(adata,"conos","full")
print(adata_final)
adata_final.write("../Integrated_adata/conos_adata_final.h5ad")


In [5]:
# Plot the results
if batch in adata_final.var_names or batch in adata_final.obs.columns:
    sc.pl.embedding(adata_final, basis=basis, color=batch)
    
if label in adata_final.var_names or label in adata_final.obs.columns:
    sc.pl.embedding(adata_final, basis=basis, color=label)

In [None]:
#Call the compute metrics function which computes the integration metrics
df = compute_metrics("conos", original_adata, adata_final, batch, label,"X_pca", "full")
print(df)   