## Set path

In [1]:
import os
dataset_dir = os.path.join(os.getcwd(), 'datasets/')
outputs_dir = os.path.join(os.getcwd(), 'outputs/')
if not os.path.exists(outputs_dir):
    os.makedirs(outputs_dir)

save_dir = os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/scMOG")
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

## Load necessary libraries

In [2]:
import anndata
import pandas as pd

## Load h5ad data

In [3]:
data_train = anndata.read(os.path.join(dataset_dir, "different samples/CITE-SLN111-Gayoso/Mouse1.h5ad"))
data_test = anndata.read(os.path.join(dataset_dir, "different samples/CITE-SLN111-Gayoso/Mouse2.h5ad"))
data_train, data_test

(AnnData object with n_obs × n_vars = 9264 × 13553
     obs: 'n_protein_counts', 'n_proteins', 'seurat_hash_id', 'batch_indices', 'hash_id', 'n_genes', 'percent_mito', 'leiden_subclusters', 'cell_types'
     var: 'gene_ids', 'feature_types', 'highly_variable', 'highly_variable_mean_variance', 'encode', 'hvg_encode'
     uns: 'protein_name', 'version'
     obsm: 'protein_expression',
 AnnData object with n_obs × n_vars = 7564 × 13553
     obs: 'n_protein_counts', 'n_proteins', 'seurat_hash_id', 'batch_indices', 'hash_id', 'n_genes', 'percent_mito', 'leiden_subclusters', 'cell_types'
     var: 'gene_ids', 'feature_types', 'highly_variable', 'highly_variable_mean_variance', 'encode', 'hvg_encode'
     uns: 'protein_name', 'version'
     obsm: 'protein_expression')

## Convert gene expression data and protein expression data to CSV format separately

In [4]:
rna_expression_train = pd.DataFrame(data_train.X.todense(), index=data_train.obs.index, columns=data_train.var.index).T
rna_expression_test = pd.DataFrame(data_test.X.todense(), index=data_test.obs.index, columns=data_test.var.index).T

protein_expression_train = pd.DataFrame(data_train.obsm["protein_expression"].todense(), columns=data_train.uns["protein_name"], index=data_train.obs.index).T
protein_expression_test = pd.DataFrame(data_test.obsm["protein_expression"].todense(), columns=data_test.uns["protein_name"], index=data_test.obs.index).T

rna_expression_train.to_csv(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/scMOG/train_gene_expression.csv"))
rna_expression_test.to_csv(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/scMOG/test_gene_expression.csv"))
protein_expression_train.to_csv(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/scMOG/train_protein_expression.csv"))
protein_expression_test.to_csv(os.path.join(outputs_dir, "different samples/CITE-SLN111-Gayoso-Mouse1toMouse2/scMOG/test_protein_expression.csv"))

rna_expression_train, rna_expression_test, protein_expression_train, protein_expression_test

(index          AAACCCAAGGGTAATT-1  AAACCCAAGGTAAACT-1  AAACCCACACTAGGTT-1  \
 index                                                                       
 0610007P14Rik                 2.0                 1.0                 0.0   
 0610009B22Rik                 0.0                 0.0                 0.0   
 0610009L18Rik                 0.0                 0.0                 0.0   
 0610009O20Rik                 0.0                 0.0                 0.0   
 0610010F05Rik                 0.0                 0.0                 0.0   
 ...                           ...                 ...                 ...   
 mt-Nd3                       19.0                 8.0                 3.0   
 mt-Nd4                       43.0                24.0                12.0   
 mt-Nd4l                       4.0                 1.0                 1.0   
 mt-Nd5                        5.0                 5.0                 2.0   
 mt-Nd6                        0.0                 0.0          