In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata as ad
import torch
import torch.nn as nn
import torch.optim as optim
import ot
import sys
import os
import tarfile
import scipy.io
import h5py
import gzip
from scipy.sparse import issparse
from scipy.sparse import csr_matrix

In [14]:
adata = sc.read_h5ad("/workspace/ImputationOT/imputationot/data/sim1_norm.h5ad")
print(adata)
print(adata.obs['Batch'].value_counts())
print(adata.obs['Group'].value_counts())

AnnData object with n_obs × n_vars = 12097 × 9979
    obs: 'Cell', 'Batch', 'Group', 'ExpLibSize', 'sum', 'detected', 'percent_top_50', 'percent_top_100', 'percent_top_200', 'percent_top_500', 'total', 'Discard', 'size_factors'
    var: 'Gene', 'BaseGeneMean', 'OutlierFactor', 'GeneMean', 'BatchFacBatch1', 'BatchFacBatch2', 'BatchFacBatch3', 'BatchFacBatch4', 'BatchFacBatch5', 'BatchFacBatch6', 'DEFacGroup1', 'DEFacGroup2', 'DEFacGroup3', 'DEFacGroup4', 'DEFacGroup5', 'DEFacGroup6', 'DEFacGroup7', 'mean', 'detected'
    layers: 'counts'


In [22]:
X = adata.X
print("Matrix Shape:", X.shape)
# print("Density:", np.count_nonzero(X) / (X.shape[0] * X.shape[1]))
print("Density:", X.nnz / (X.shape[0] * X.shape[1]))
print("Minimum Value:", X.min())
print("Maximum Value:", X.max())

Matrix Shape: (12097, 9979)
Density: 0.5690072737107684
Minimum Value: 0.0
Maximum Value: 6.739929826089321


In [3]:
barcodes_file = '/workspace/ImputationOT/imputationot/data/GSM5008737_RNA_3P-barcodes.tsv.gz'
features_file = '/workspace/ImputationOT/imputationot/data/GSM5008737_RNA_3P-features.tsv.gz'
matrix_file = '/workspace/ImputationOT/imputationot/data/GSM5008737_RNA_3P-matrix.mtx.gz'

barcodes = pd.read_csv(barcodes_file, header=None, sep='\t')
features = pd.read_csv(features_file, header=None, sep='\t')
matrix = scipy.io.mmread(matrix_file)

adata_rna = ad.AnnData(X=matrix.T)

adata_rna.obs['barcodes'] = barcodes[0].values
adata_rna.var['symbols'] = features[0].values

data_3P = pd.read_csv('/workspace/ImputationOT/imputationot/data/meta_data_3P.csv.gz')
data_3P.set_index('Unnamed: 0', inplace=True)
adata_rna.obs = adata_rna.obs.merge(data_3P, left_on='barcodes', right_index=True, how='left')
adata_rna.var['feature_type'] = 'RNA'

print(adata_rna)

barcodes_file = '/workspace/ImputationOT/imputationot/data/GSM5008738_ADT_3P-barcodes.tsv.gz'
features_file = '/workspace/ImputationOT/imputationot/data/GSM5008738_ADT_3P-features.tsv.gz'
matrix_file = '/workspace/ImputationOT/imputationot/data/GSM5008738_ADT_3P-matrix.mtx.gz'

barcodes = pd.read_csv(barcodes_file, header=None, sep='\t')
features = pd.read_csv(features_file, header=None, sep='\t')
matrix = scipy.io.mmread(matrix_file)

adata_adt = ad.AnnData(X=matrix.T)

adata_adt.obs['barcodes'] = barcodes[0].values
adata_adt.var['symbols'] = features[0].values

data_3P = pd.read_csv('/workspace/ImputationOT/imputationot/data/meta_data_3P.csv.gz')
data_3P.set_index('Unnamed: 0', inplace=True)
adata_adt.obs = adata_adt.obs.merge(data_3P, left_on='barcodes', right_index=True, how='left')
adata_adt.var['feature_type'] = 'ADT'
print(adata_adt)

adata_combined = ad.concat([adata_rna, adata_adt], axis=1, merge='same')
adata_combined.write("/workspace/ImputationOT/imputationot/data/pbmc2.h5ad")
print(adata_combined)

AnnData object with n_obs × n_vars = 161764 × 33538
    obs: 'barcodes', 'nCount_ADT', 'nFeature_ADT', 'nCount_RNA', 'nFeature_RNA', 'orig.ident', 'lane', 'donor', 'time', 'celltype.l1', 'celltype.l2', 'celltype.l3', 'Phase', 'Batch'
    var: 'symbols', 'feature_type'
AnnData object with n_obs × n_vars = 161764 × 228
    obs: 'barcodes', 'nCount_ADT', 'nFeature_ADT', 'nCount_RNA', 'nFeature_RNA', 'orig.ident', 'lane', 'donor', 'time', 'celltype.l1', 'celltype.l2', 'celltype.l3', 'Phase', 'Batch'
    var: 'symbols', 'feature_type'


  utils.warn_names_duplicates("var")


AnnData object with n_obs × n_vars = 161764 × 33766
    obs: 'barcodes', 'nCount_ADT', 'nFeature_ADT', 'nCount_RNA', 'nFeature_RNA', 'orig.ident', 'lane', 'donor', 'time', 'celltype.l1', 'celltype.l2', 'celltype.l3', 'Phase', 'Batch'
    var: 'symbols', 'feature_type'
