In [15]:
import anndata as ad
import scanpy as sc
import numpy as np

In [3]:
adata = sc.read_h5ad('GSE184290_data.h5ad')

In [6]:
print(type(adata.X)) # a dense matrix, since this data is processed

<class 'numpy.ndarray'>


In [7]:
print(adata.var.head())

Empty DataFrame
Columns: []
Index: [Mrpl15, Lypla1, Gm37988, Tcea1, Atp6v1h]


In [8]:
print(adata.var.isnull().sum())

Series([], dtype: float64)


In [9]:
print(adata.obsm_keys())

[]


In [10]:
if 'X_pca' in adata.obsm_keys():
    print(f"Shape of PCA embedding: {adata.obsm['X_pca'].shape}")
if 'X_umap' in adata.obsm_keys():
    print(f"Shape of UMAP embedding: {adata.obsm['X_umap'].shape}")

In [11]:
print(adata.uns.keys())

odict_keys([])


In [12]:
if 'log1p' in adata.uns: # Example: check for log-transformation info
    print(f"Log1p data in uns: {adata.uns['log1p']}")

In [13]:
print(f"Min value in adata.X: {adata.X.min()}")
print(f"Max value in adata.X: {adata.X.max()}")
print(f"Mean value in adata.X: {adata.X.mean()}")
print(f"Standard deviation in adata.X: {adata.X.std()}")

Min value in adata.X: 0.0
Max value in adata.X: 8956.0
Mean value in adata.X: 1.0460280179977417
Standard deviation in adata.X: 12.608473777770996


In [16]:
if np.isnan(adata.X).any():
    print("WARNING: adata.X contains NaN values!")
if np.isinf(adata.X).any():
    print("WARNING: adata.X contains infinite values!")

In [17]:
import matplotlib.pyplot as plt

if 'X_umap' in adata.obsm_keys():
    # Replace 'cell_type' and 'batch' with actual metadata columns in adata.obs if they exist
    # For example, if you have a column named 'my_clusters' and 'sample_id'
    sc.pl.umap(adata, color=['my_clusters', 'sample_id'], title='UMAP colored by Clusters and Sample')
    plt.show()

if 'X_pca' in adata.obsm_keys():
    # You can color by gene expression (e.g., a highly expressed gene) or a QC metric
    sc.pl.pca(adata, color='n_genes_by_counts', title='PCA colored by Number of Genes')
    plt.show()