# scChronoScope
- Jupyter Lab version

In [None]:
import plotly.express as px
import plotly.graph_objs as go
import pandas as pd
import scanpy as sc
from scipy.stats import zscore
import numpy as np

import os
import re

In [None]:
# Load your preprocessed data
adata = sc.read_h5ad("./BrianClark_logp1.h5ad")
adata

In [None]:
# Define variables for easier modification
CELLTYPE_COLUMN = "scDeepSort"  # Column in adata.obs for cell types
AGE_COLUMN = "age"  # Column in adata.obs for age
AGE_ORDER = ["E11", "E12", "E14", "E16", "E18", "P0", "P2", "P5", "P8", "P14"]  # Age order

# Ensure the age column is categorical and ordered
adata.obs[AGE_COLUMN] = pd.Categorical(adata.obs[AGE_COLUMN], categories=AGE_ORDER, ordered=True)

## Gene of Interest

In [None]:
# Initial selection
# selected_gene = None  # Set to a specific gene name if you want to visualize a specific gene

In [None]:
# Initial selection
selected_gene = 'Mdk'  # Set to a specific gene name if you want to visualize a specific gene

In [None]:
# Base UMAP coordinates
umap_coords = pd.DataFrame({
    "UMAP1": adata.obs["umap_coord1"],
    "UMAP2": adata.obs["umap_coord2"],
    "UMAP3": adata.obs["umap_coord3"],
    "Cell Type": adata.obs[CELLTYPE_COLUMN],
    "Age": adata.obs[AGE_COLUMN]
})

# Calculate dynamic dot size based on the number of cells
num_cells = umap_coords.shape[0]
base_size = 5  # Base size for dots
dot_size = max(1, base_size - 0.01 * num_cells)  # Adjust size based on number of cells

In [None]:
print(num_cells)
print(base_size)
print(dot_size)

## Figure 1 and 2

In [None]:
# Function to generate 3D UMAP plot
def generate_3d_umap_plot(selected_gene=None):
    if selected_gene:
        umap_coords["Expression"] = adata[:, selected_gene].X.toarray().flatten()
        fig = px.scatter_3d(
            umap_coords, x="UMAP1", y="UMAP2", z="UMAP3", color="Expression",
            title=f"3D UMAP: Gene Expression - {selected_gene}",
            color_continuous_scale="Viridis",
            size_max=dot_size  # Set dynamic dot size
        )
    else:
        fig = px.scatter_3d(
            umap_coords, x="UMAP1", y="UMAP2", z="UMAP3", color="Cell Type",
            title="3D UMAP: Cell Type Annotations",
            color_discrete_sequence=px.colors.qualitative.Plotly,  # Use consistent colors
            size_max=dot_size  # Set dynamic dot size
        )
    return fig

### Figure 1: UMAP 3D

In [None]:
# Initial selection
# selected_gene = 'Mdk'  # Set to a specific gene name if you want to visualize a specific gene

In [None]:
# Generate the 3D UMAP plot
fig1 = generate_3d_umap_plot()

In [None]:
fig1.show()

### Figure 2: Gene Expression 3D

In [None]:
gene_options = [{"label": gene, "value": gene} for gene in adata.var_names]
# print(gene_options)

In [None]:
# Initial selection
# selected_gene = 'Mdk'  # Set to a specific gene name if you want to visualize a specific gene

In [None]:
# Display the initial plot
fig2 = generate_3d_umap_plot(selected_gene)
fig2.show()

# Figure 3: Gene Expression by Cell Type Visualization

In [None]:
# Initial selection
# selected_gene = 'Mdk'  # Set to a specific gene name if you want to visualize a specific gene

In [None]:
# Function to generate box plot for gene expression by cell type
def generate_box_plot(selected_gene):
    if selected_gene:
        expression_data = pd.DataFrame({
            "Cell Type": adata.obs[CELLTYPE_COLUMN],
            "Expression": adata[:, selected_gene].X.toarray().flatten()
        })
        fig = px.box(
            expression_data, x="Cell Type", y="Expression", color="Cell Type",
            title=f"Expression of {selected_gene} by Cell Type",
            color_discrete_sequence=px.colors.qualitative.Plotly  # Use consistent colors
        )
    else:
        fig = px.box(
            title="Select a gene to view expression by cell type"
        )
    return fig

In [None]:
# Display the initial plot
fig3 = generate_box_plot(selected_gene)
fig3.show()

# Figure 4: Gene Expression by Timepoint

In [None]:
# Initial selection
# selected_gene = 'Mdk'  # Set to a specific gene name if you want to visualize a specific gene

In [None]:
# Function to generate line plot for gene expression by age
def generate_line_plot(selected_gene):
    if selected_gene:
        lineplot_data = pd.DataFrame({
            "Cell Type": adata.obs[CELLTYPE_COLUMN],
            "Age": adata.obs[AGE_COLUMN],
            "Expression": adata[:, selected_gene].X.toarray().flatten()
        })
        # Aggregate expression by cell type and age
        lineplot_data = lineplot_data.groupby(["Cell Type", "Age"], observed=False).mean().reset_index()
        fig = px.line(
            lineplot_data, x="Age", y="Expression", color="Cell Type",
            title=f"Expression of {selected_gene} by Age",
            labels={"Expression": "Mean Expression", "Age": "Age"},
            color_discrete_sequence=px.colors.qualitative.Plotly  # Use consistent colors
        )
    else:
        fig = px.line(
            title="Select a gene to view expression by age"
        )
    return fig

In [None]:
# Display the initial plot
fig4 = generate_line_plot(selected_gene)
fig4.show()

# Figure 5: Gene Expression by Cell Type and Age

In [None]:
# Initial selection
# selected_gene = 'Mdk'  # Set to a specific gene name if you want to visualize a specific gene

In [None]:
# Function to generate heatmap for gene expression by cell type and age
def generate_heatmap(selected_gene):
    if selected_gene:
        # Aggregate expression by cell type and age
        heatmap_data = pd.DataFrame({
            "Cell Type": adata.obs[CELLTYPE_COLUMN],
            "Age": adata.obs[AGE_COLUMN],
            "Expression": adata[:, selected_gene].X.toarray().flatten()
        })
        heatmap_data = heatmap_data.groupby(["Cell Type", "Age"], observed=False).mean().reset_index()

        # Drop rows with NaN values in the Expression column
        heatmap_data = heatmap_data.dropna(subset=["Expression"])

        # Calculate relative expression (z-score normalization)
        heatmap_data["Relative Expression"] = zscore(heatmap_data["Expression"])

        # Pivot the data for the heatmap
        heatmap_pivot = heatmap_data.pivot(index="Cell Type", columns="Age", values="Relative Expression")

        # Replace NaN values with 0 (or another appropriate value)
        heatmap_pivot = heatmap_pivot.fillna(0)

        # Create the heatmap
        fig = go.Figure(data=go.Heatmap(
            z=heatmap_pivot.values,
            x=heatmap_pivot.columns,
            y=heatmap_pivot.index,
            colorscale="Viridis",
            colorbar=dict(title="Relative Expression (Z-Score)")
        ))
        fig.update_layout(
            title=f"Relative Expression of {selected_gene} by Cell Type and Age",
            xaxis_title="Age",
            yaxis_title="Cell Type",
            height=600  # Adjust height as needed
        )
    else:
        fig = go.Figure()
        fig.update_layout(
            title="Select a gene to view relative expression by cell type and age"
        )
    return fig

In [None]:
# Display the initial plot
fig5 = generate_heatmap(selected_gene)
fig5.show()

# Save all Figures in interactive .html files

In [None]:
# Function to sanitize gene name for file names
def sanitize_filename(filename):
    # Remove special characters and spaces
    return re.sub(r'[\\/*?:"<>|]', '', filename).replace(" ", "_")

# Ensure the 'interactive_plots' folder exists
output_folder = "interactive_plots"
os.makedirs(output_folder, exist_ok=True)

In [None]:
# Save initial plots as HTML files

fig1.write_html(os.path.join(output_folder, "3d_umap.html"))

if selected_gene:
    sanitized_gene_name = sanitize_filename(selected_gene)
    fig2.write_html(os.path.join(output_folder, f"{sanitized_gene_name}_3d_umap_gene_expression.html"))
    fig3.write_html(os.path.join(output_folder, f"{sanitized_gene_name}_gene_expression_by_cell_type.html"))
    fig4.write_html(os.path.join(output_folder, f"{sanitized_gene_name}_gene_expression_by_age.html"))
    fig5.write_html(os.path.join(output_folder, f"{sanitized_gene_name}_gene_expression_by_cell_type_and_age.html"))
else:
    fig2.write_html(os.path.join(output_folder, "3d_umap_gene_expression.html"))
    fig3.write_html(os.path.join(output_folder, "gene_expression_by_cell_type.html"))
    fig4.write_html(os.path.join(output_folder, "gene_expression_by_age.html"))
    fig5.write_html(os.path.join(output_folder, "gene_expression_by_cell_type_and_age.html"))