In [20]:
import scanpy as sc
import pandas as pd
import scipy.sparse as sp

In [16]:
# Load the JAX dataset
adata = sc.read_h5ad("Data/endo_2022_stromal.h5ad")
adata.obs.columns

Index(['sequencing_saturation', 'stage', 'n_genes_by_counts', 'total_counts',
       'pct_counts_mitochondrial', 'pct_counts_hemoglobin', 'celltype',
       'celltype_main', 'subtypes', 'sample_type_rename', 'PID', 'G2M_score',
       'phase'],
      dtype='object')

In [18]:
adata.layers

Layers with keys: normed, raw

In [58]:
adata.layers["normed"]

<42713x24401 sparse matrix of type '<class 'numpy.float32'>'
	with 126772747 stored elements in Compressed Sparse Row format>

In [42]:
# Extract normed layer
X_norm = adata.layers["normed"]

In [30]:
var = adata.var  # gene metadata (rows)
obs = adata.obs  # cell metadata (columns)

In [48]:
# Convert to a dataframe with gene names as rows, cells as columns
X_norm_t = X_norm.T
if sp.issparse(X_norm_t):
    expr_norm = pd.DataFrame.sparse.from_spmatrix(
        X_norm_t, index=var.index, columns=obs.index
    )
else:
    expr_norm = pd.DataFrame(X_norm_t, index=var.index, columns=obs.index)

In [50]:
expr_norm.shape

(24401, 42713)

In [52]:
print(expr_norm)

index     AAACCCAAGACCAAGC-1-0  AAACCCATCACAACCA-1-0  AAACGAAGTCTTCCGT-1-0  \
index                                                                        
A1BG                  1.961215              0.578215              0.729038   
A1BG-AS1                     0                     0                     0   
A2M                   1.961215               2.31286              0.729038   
A2M-AS1                      0                     0                     0   
A2ML1                        0                     0                     0   
...                        ...                   ...                   ...   
ZXDC                         0                     0                     0   
ZYG11A                       0                     0                     0   
ZYG11B                       0                     0              1.458076   
ZYX                          0               2.31286              1.458076   
ZZEF1                        0               1.15643            

In [62]:
# Save normalized expression + metadata
expr_norm.to_csv("DATA/stromal_expression_normed.csv")
adata.obs.to_csv("DATA/stromal_metadata.csv")

KeyboardInterrupt: 