<a href="https://colab.research.google.com/github/TummalaSharmila/MachineLearning_in_BI/blob/main/DrWangLab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import muon as mu
import scanpy as sc
import celloracle as co
import pandas as pd
import numpy as np

# Load MuData file
mdata = mu.read("your_file.h5mu")

# Extract RNA modality (this is an AnnData object)
adata = mdata.mod['rna']

# Option 1: use 'Day' as proxy for cell type
adata.obs["cell_type"] = adata.obs["Day"].astype(str)

# Option 2: assign dummy label
# adata.obs["cell_type"] = "unspecified"

#preprocessing
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=2000, subset=True)
sc.pp.scale(adata)
sc.tl.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)

#create cell oracle Object
oracle = co.Oracle()
oracle.import_anndata_as_raw_data(adata)
oracle.cell_type_column_name = "cell_type"

#gene annotatins and GRN buidling
oracle.get_gene_annotation(genome="mm10")  # Or "hg38" for human

oracle.build_grn_for_simulation(
    input_tf_names="auto",
    verbose=True
)

#dimensional reduction
oracle.perform_pca()
oracle.reduce_dimension(method="umap")
oracle.calculate_velocity_on_grid()

# Visualize
oracle.plot_vector_field()

#save files

# Create UMAP + vector field plot
fig = oracle.plot_vector_field(show_arrow=True, color="cell_type", show_legend=True)

# Save to file
fig.savefig("vector_field.png", dpi=300, bbox_inches='tight')



In [None]:
import numpy as np
import scipy.sparse as sp

# Use raw counts
adata.X = adata.layers["counts"].copy()

# Step 1: Replace NaNs and Infs (for dense or sparse matrices)
if sp.issparse(adata.X):
    adata.X.data[np.isnan(adata.X.data)] = 0
    adata.X.data[np.isinf(adata.X.data)] = 0
else:
    adata.X[np.isnan(adata.X)] = 0
    adata.X[np.isinf(adata.X)] = 0

# Step 2: Remove cells and genes with all-zero expression
sc.pp.filter_cells(adata, min_counts=1)
sc.pp.filter_genes(adata, min_counts=1)

# Step 3: Continue with standard processing
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=2000, subset=True)
sc.pp.scale(adata)
sc.tl.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)


In [None]:
# Create CellOracle object
oracle = co.Oracle()

# Correct method for v0.20.0
oracle.set_data(adata, cell_type_column_name="cell_type")



In [None]:
# --------------------------
# 1. Imports and Data Setup
# --------------------------
import muon as mu
import scanpy as sc
import celloracle as co
import numpy as np
import pandas as pd
import os

# Load your MuData
mdata = mu.read("your_file.h5mu")

# Extract RNA modality as AnnData
adata = mdata.mod["rna"]

# Use 'Day' as a cell type label (CellOracle needs this)
adata.obs["cell_type"] = adata.obs["Day"].astype(str)

# --------------------------
# 2. Preprocessing
# --------------------------

# Replace .X with raw counts
adata.X = adata.layers["counts"].copy()

# Fix any NaN or Inf issues in sparse or dense matrix
import scipy.sparse as sp
if sp.issparse(adata.X):
    adata.X.data[np.isnan(adata.X.data)] = 0
    adata.X.data[np.isinf(adata.X.data)] = 0
else:
    adata.X[np.isnan(adata.X)] = 0
    adata.X[np.isinf(adata.X)] = 0

# Filter zero-expression cells and genes
sc.pp.filter_cells(adata, min_counts=1)
sc.pp.filter_genes(adata, min_counts=1)

# Normalize + log1p
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

# Select HVGs
sc.pp.highly_variable_genes(adata, n_top_genes=2000, subset=True)

# Scale and reduce
sc.pp.scale(adata)
sc.tl.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)

# --------------------------
# 3. CellOracle Setup
# --------------------------

# Create oracle object
oracle = co.Oracle()
oracle.import_anndata_as_raw_count(adata)  # method for your version
oracle.cell_type_column_name = "cell_type"

# --------------------------
# 4. Gene Regulatory Network
# --------------------------

# Pull gene info from mm10
oracle.get_gene_annotation(genome="mm10")

# Build GRN
oracle.build_grn_for_simulation(input_tf_names="auto", verbose=True)

# --------------------------
# 5. Dimensionality Reduction for Simulations
# --------------------------

oracle.perform_PCA()
oracle.embedding_name = "umap"
oracle.calculate_grid_arrows()

# --------------------------
# 6. Visualization
# --------------------------

# Plot vector field
fig = oracle.plot_simulation_flow_on_grid(color="cell_type", show_arrow=True)
fig.savefig("figures/vector_field.png", dpi=300, bbox_inches="tight")

# Plot expression of a key gene (e.g., Gata1)
fig = oracle.plot_quiver("Gata1", color_map="coolwarm")
fig.savefig("figures/Gata1_expression_quiver.png", dpi=300, bbox_inches="tight")


In [None]:
# ------------------------------------------
# 1. Imports and Setup
# ------------------------------------------
import muon as mu
import scanpy as sc
import celloracle as co
import numpy as np
import pandas as pd
import os
import scipy.sparse as sp

# Create output folder
os.makedirs("figures", exist_ok=True)

# ------------------------------------------
# 2. Load your MuData and extract RNA
# ------------------------------------------
mdata = mu.read("your_file.h5mu")  # replace with your file path
adata = mdata.mod["rna"]

# Use 'Day' as a proxy for cell type
adata.obs["cell_type"] = adata.obs["Day"].astype(str)

# ------------------------------------------
# 3. Prepare normalized + log1p data (UNSCALED!)
# ------------------------------------------
# Use raw counts from layers
adata.X = adata.layers["counts"].copy()

# Clean up NaNs and Infs
if sp.issparse(adata.X):
    adata.X.data[np.isnan(adata.X.data)] = 0
    adata.X.data[np.isinf(adata.X.data)] = 0
else:
    adata.X[np.isnan(adata.X)] = 0
    adata.X[np.isinf(adata.X)] = 0

# Filter low-quality cells and genes
sc.pp.filter_cells(adata, min_counts=1)
sc.pp.filter_genes(adata, min_counts=1)

# Normalize + log1p (DO NOT scale)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

# Run PCA and UMAP for visualization
sc.pp.highly_variable_genes(adata, n_top_genes=2000, subset=True)
sc.tl.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)

# ------------------------------------------
# 4. Create CellOracle Object
# ------------------------------------------
oracle = co.Oracle()

# Use normalized + log1p expression
oracle.import_anndata_as_normalized_count(adata)
oracle.cell_type_column_name = "cell_type"

# ------------------------------------------
# 5. GRN Construction
# ------------------------------------------
oracle.get_gene_annotation(genome="mm10")

oracle.build_grn_for_simulation(
    input_tf_names="auto",  # You can also pass a list of TFs
    verbose=True
)

# ------------------------------------------
# 6. Dimensionality Reduction for Oracle
# ------------------------------------------
oracle.perform_PCA()
oracle.embedding_name = "umap"  # Use UMAP for simulation space
oracle.calculate_grid_arrows()

# ------------------------------------------
# 7. Visualizations & Saving
# ------------------------------------------

# Vector field
fig = oracle.plot_simulation_flow_on_grid(color="cell_type", show_arrow=True)
fig.savefig("figures/vector_field.png", dpi=300, bbox_inches="tight")

# Expression quiver for example TF (e.g. Gata1)
fig = oracle.plot_quiver("Gata1", color_map="coolwarm")
fig.savefig("figures/Gata1_expression_quiver.png", dpi=300, bbox_inches="tight")
