# Prepare data for scPheno

Currently, scPheno only accept gene expression matrix in csv or txt format. We provide a R example showing the preparation of input files for scPheno. We also provide a R example showing how to add the reconstructed expression to a Seurat object.

In [None]:
%%bash

CUDA_VISIBLE_DEVICES=0 python scPheno.py --sup-data-file "/home/zengbio/Project/scPheno_Ex/ifnb_tutorial/ifnb.txt" \
                        --sup-label-file "/home/zengbio/Project/scPheno_Ex/ifnb_tutorial/ifnb_celltype.txt" \
                        --sup-condition-file "/home/zengbio/Project/scPheno_Ex/ifnb_tutorial/ifnb_cellsubtype.txt" \
                        --sup-condition2-file "/home/zengbio/Project/scPheno_Ex/ifnb_tutorial/ifnb_condition.txt" \
                        -lr 0.0001 \
                        -n 200 \
                        -bs 100 \
                        --cuda \
                        -zi \
                        -likeli negbinomial \
                        -dirichlet \
                        -cv 0 \
                        --label-type categorical \
                        --condition-type categorical \
                        --condition2-type categorical \
                        --save-model ifnb.pth

In [5]:
import numpy as np
import pandas as pd
import datatable as dt

from scPheno import scPheno
from utils.scdata_cached import SingleCellCached

import torch
from torch.utils.data import DataLoader




In [6]:
ModelPath = 'ifnb.pth'
DataPath='/home/zengbio/Project/scPheno_Ex/ifnb_tutorial/ifnb.txt'
LabelPath=None
ConditionPath=None
ConditionPath2=None


In [7]:
# load model
model = torch.load(ModelPath)

batch_size = 10000

use_float64 = False
use_cuda = True

In [8]:
# load data
data_cached = SingleCellCached(DataPath, LabelPath, ConditionPath, ConditionPath2, 'condition', use_cuda=False, use_float64 = use_float64)
data_loader = DataLoader(data_cached, batch_size = batch_size, shuffle = False)

In [13]:
# predict conditions
exprs = []
# use the appropriate data loader
for xs,ys,ks,ks2 in data_loader:
    # use classification function to compute all predictions for each batch
    if use_cuda:
        xs = xs.cuda()

    expr = model.generate_expression(xs, mute_label=False, mute_condition=True, mute_condition2=True, mute_noise=True)

    if use_cuda:
        expr = expr.cpu().detach().numpy()
    else:
        expr = expr.detach().numpy()

    exprs.append(expr)


exprs = np.concatenate(exprs, axis=0)

exprs = 10 * exprs / exprs.max()


In [14]:
cells = pd.read_csv('/home/zengbio/Project/scPheno_Ex/ifnb_tutorial/ifnb_cells.txt', header=None, index_col=None)
genes = pd.read_csv('/home/zengbio/Project/scPheno_Ex/ifnb_tutorial/ifnb_genes.txt', header=None, index_col=None)
cells.shape

(13999, 1)

In [15]:
df = pd.DataFrame(exprs, columns=genes[0].values, index=cells[0].values)
dt.Frame(df.reset_index()).to_csv('/home/zengbio/Project/scPheno_Ex/ifnb_tutorial/ifnb_celltype_denoised_expression.txt')