In [23]:
%%bash

CUDA_VISIBLE_DEVICES=0 python scPheno.py --sup-data-file "/home/zengbio/Project/scPheno_Ex/Thp1/thp1.mtx" \
                        --sup-label-file "/home/zengbio/Project/scPheno_Ex/Thp1/thp1_crispr.txt" \
                        --sup-condition-file "/home/zengbio/Project/scPheno_Ex/Thp1/thp1_adt.txt" \
                        --sup-condition2-file "/home/zengbio/Project/scPheno_Ex/Thp1/thp1_replicate.txt" \
                        -lr 0.0001 \
                        -n 100 \
                        -bs 10 \
                        --cuda \
                        -64 \
                        --jit \
                        -zi \
                        -likeli poisson \
                        -dirichlet \
                        -cv 0 \
                        --label-type onehot \
                        --condition-type real \
                        --condition2-type onehot \
                        --save-model thp1.pth

1 epoch: avg losses 65273.4093 0.0000 elapsed 63.4680 seconds
2 epoch: avg losses 50306.8177 0.0000 elapsed 62.5909 seconds
3 epoch: avg losses 48786.3310 0.0000 elapsed 50.8040 seconds
4 epoch: avg losses 47357.1271 0.0000 elapsed 50.7985 seconds
5 epoch: avg losses 46773.3366 0.0000 elapsed 50.9866 seconds
6 epoch: avg losses 46265.6830 0.0000 elapsed 50.8979 seconds
7 epoch: avg losses 45903.2535 0.0000 elapsed 52.9379 seconds
8 epoch: avg losses 45501.4156 0.0000 elapsed 50.4241 seconds
9 epoch: avg losses 45082.3565 0.0000 elapsed 47.2572 seconds
10 epoch: avg losses 44648.2940 0.0000 elapsed 47.1913 seconds
11 epoch: avg losses 44147.1599 0.0000 elapsed 47.2296 seconds
12 epoch: avg losses 43783.6338 0.0000 elapsed 47.1727 seconds
13 epoch: avg losses 43411.2653 0.0000 elapsed 46.9479 seconds
14 epoch: avg losses 43071.3333 0.0000 elapsed 48.3768 seconds
15 epoch: avg losses 42844.1866 0.0000 elapsed 48.6792 seconds
16 epoch: avg losses 42545.7684 0.0000 elapsed 48.3506 seconds
1

In [24]:
%matplotlib inline
import matplotlib.pyplot as plt
import time
from sklearnex import patch_sklearn
patch_sklearn()

import numpy as np
import pandas as pd
from scipy.io import mmread
from scipy.stats import entropy
from scipy.special import softmax, log_softmax
from collections import Counter

from scPheno import scPheno
from utils.scdata_cached import setup_data_loader, SingleCellCached, transform_label2class, label2class_encoder, transform_class2label

import torch
import torch.nn.functional as ft
from torch.utils.data import DataLoader

import pyro.distributions as dist

from sklearn.preprocessing import StandardScaler, Normalizer, OneHotEncoder
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.decomposition import NMF
from sklearn.inspection import permutation_importance
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, ConfusionMatrixDisplay

import umap

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [25]:
ModelPath = 'thp1.pth'
DataPath='/home/zengbio/Project/scPheno_Ex/Thp1/thp1.mtx'
LabelPath=None
ConditionPath=None
ConditionPath2=None


In [26]:
# load model
model = torch.load(ModelPath)

batch_size = 1000

use_float64 = True
use_cuda = True

In [27]:
# load data
data_cached = SingleCellCached(DataPath, LabelPath, ConditionPath, ConditionPath2, 'condition', use_cuda=False, use_float64 = use_float64)
data_loader = DataLoader(data_cached, batch_size = batch_size, shuffle = False)

In [31]:
# predict conditions
embeds = []
exprs = []
labels = []
# use the appropriate data loader
for xs,ys,ks,ks2 in data_loader:
    # use classification function to compute all predictions for each batch
    if use_cuda:
        xs = xs.cuda()

    zs = model.latent_embedding(xs)
    #expr = model.mutate_condition2_expression(xs, ks2=ks2, ks2_new=ks2_new,use_null_model=True)
    expr = model.generate_expression(xs, mute_label=False, mute_condition=True, mute_condition2=True, mute_noise=False, use_gate=True)
    #label = model.classifier_state_score(xs)

    if use_cuda:
        zs = zs.cpu().detach().numpy()
        expr = expr.cpu().detach().numpy()
        #label = label.cpu().detach().numpy()
    else:
        zs = zs.detach().numpy()
        expr = expr.detach().numpy()
        #label = label.detach().numpy()

    embeds.append(zs)
    exprs.append(expr)
    #labels.append(label)


embeds = np.concatenate(embeds, axis=0)
exprs = np.concatenate(exprs, axis=0)
#labels = np.concatenate(labels, axis=0)


In [32]:
cells = pd.read_csv('/home/zengbio/Project/scPheno_Ex/Thp1/thp1_cells.txt', header=None, index_col=None)
genes = pd.read_csv('/home/zengbio/Project/scPheno_Ex/Thp1/thp1_genes.txt', header=None, index_col=None)
cells.shape

(20729, 1)

In [33]:
df = pd.DataFrame(exprs, columns=genes[0].values, index=cells[0].values)
df.to_csv('/home/zengbio/Project/scPheno_Ex/Thp1/thp1_stim_denoised_expression.txt')

df = pd.DataFrame(labels, columns=model.label_names, index=cells[0].values)
df.to_csv('/home/zengbio/Project/scPheno_Ex/mouse_develop/mouse_develop_scp_cluster_pred_E6.5.txt', index=None)