Environment: This script should be run with the `python_plant_pathogen_atlas` environment using the devcontainer `docker_python`

The purpose of this notebook is to add the pseudotime values calculated on all replicates to the scRNA-seq object containing only the first replicates

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=Warning)
import os, sys
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
import diopy
import pandas as pd

In [2]:
data_path = '../../data'

Load in the scRNA-seq object containing only the first replicates

In [3]:
scrna = os.path.join(data_path, 'temp_objects', 'AvrRpt2_alone.h5')

adata = diopy.input.read_h5(file = scrna)

adata.obs = adata.obs[[i for i in adata.obs.columns if 'pANN' not in i]]
adata.obs = adata.obs[[i for i in adata.obs.columns if 'DF.classifications' not in i]]

Loading in the scRNA-seq object with both replicates

In [6]:
rep2_pseudotime_adata = sc.read(os.path.join(data_path, 'temp_objects', 'seq_pseudotime_01.h5ad'))

replcate2_adata_subset = rep2_pseudotime_adata[~rep2_pseudotime_adata.obs.index.str.contains('rep2')]

new_index = pd.Series(replcate2_adata_subset.obs.index.values)


# changing the index names to match the nomenclature of the scRNA-seq data containing only the first replicate
nomenclature_dictionary = {'00_col_Mock_rep1': '00_Mock', 
                           'col_AvrRpt2_04h_rep1': 'AvrRpt2_04h', 
                           'col_AvrRpt2_06h_rep1': 'AvrRpt2_06h',
                           'col_AvrRpt2_09h_rep1': 'AvrRpt2_09h',
                           'col_AvrRpt2_24h_rep1': 'AvrRpt2_24h'}

for key, value in nomenclature_dictionary.items():
    print(key, value)
    new_index = new_index.str.replace(key, value)


Add the pseudotime values to the scRNA-seq object containing only the first replicates 

In [10]:
replcate2_adata_subset.obs.index = new_index.values

pseudotime_df = replcate2_adata_subset.obs['dpt_pseudotime']

adata.obs = adata.obs.merge(pseudotime_df, left_index=True, right_index=True, how='left')

sc.pl.umap(adata, color=['dpt_pseudotime'], vmax=1, legend_loc= 'on data')

Write out the pseudotime values

In [14]:
pseudotime_csv = pd.DataFrame(adata.obs['dpt_pseudotime'])

In [15]:
pseudotime_csv.to_csv(os.path.join(data_path, 'temp_objects', 'pseudotime_csv.csv'))