<a href="https://colab.research.google.com/github/TummalaSharmila/MachineLearning_in_BI/blob/main/DrWangLab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import muon as mu
import scanpy as sc
import celloracle as co
import pandas as pd
import numpy as np

# Load MuData file
mdata = mu.read("your_file.h5mu")

# Extract RNA modality (this is an AnnData object)
adata = mdata.mod['rna']

# Option 1: use 'Day' as proxy for cell type
adata.obs["cell_type"] = adata.obs["Day"].astype(str)

# Option 2: assign dummy label
# adata.obs["cell_type"] = "unspecified"

#preprocessing
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=2000, subset=True)
sc.pp.scale(adata)
sc.tl.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)

#create cell oracle Object
oracle = co.Oracle()
oracle.import_anndata_as_raw_data(adata)
oracle.cell_type_column_name = "cell_type"

#gene annotatins and GRN buidling
oracle.get_gene_annotation(genome="mm10")  # Or "hg38" for human

oracle.build_grn_for_simulation(
    input_tf_names="auto",
    verbose=True
)

#dimensional reduction
oracle.perform_pca()
oracle.reduce_dimension(method="umap")
oracle.calculate_velocity_on_grid()

# Visualize
oracle.plot_vector_field()

#save files

# Create UMAP + vector field plot
fig = oracle.plot_vector_field(show_arrow=True, color="cell_type", show_legend=True)

# Save to file
fig.savefig("vector_field.png", dpi=300, bbox_inches='tight')



In [None]:
import numpy as np
import scipy.sparse as sp

# Use raw counts
adata.X = adata.layers["counts"].copy()

# Step 1: Replace NaNs and Infs (for dense or sparse matrices)
if sp.issparse(adata.X):
    adata.X.data[np.isnan(adata.X.data)] = 0
    adata.X.data[np.isinf(adata.X.data)] = 0
else:
    adata.X[np.isnan(adata.X)] = 0
    adata.X[np.isinf(adata.X)] = 0

# Step 2: Remove cells and genes with all-zero expression
sc.pp.filter_cells(adata, min_counts=1)
sc.pp.filter_genes(adata, min_counts=1)

# Step 3: Continue with standard processing
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=2000, subset=True)
sc.pp.scale(adata)
sc.tl.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)


In [None]:
# Create CellOracle object
oracle = co.Oracle()

# Correct method for v0.20.0
oracle.set_data(adata, cell_type_column_name="cell_type")
