In [None]:
import os
import pandas as pd

notebook_dir = os.path.abspath(os.path.dirname("__file__"))
project_root = os.path.abspath(os.path.join(notebook_dir, ".."))

In [None]:
# Import from mozzarellm package
from mozzarellm import analyze_gene_clusters, reshape_to_clusters
from mozzarellm.prompts import ROBUST_SCREEN_CONTEXT, ROBUST_CLUSTER_PROMPT
from mozzarellm.configs import DEFAULT_OPENAI_REASONING_CONFIG

# You can have a .env file that stores your keys or set your api key here:
# os.environ["OPENAI_API_KEY"] = "your_openai_key_here"

In [None]:
# Read sample data from the notebook dir:
sample_data = pd.read_csv(os.path.join(notebook_dir, "sample_data.csv"))

In [None]:
# Reshape sample data
cluster_df, gene_features = reshape_to_clusters(
    input_df=sample_data, uniprot_col="uniprot_function", verbose=True
)

In [None]:
display(cluster_df)
display(gene_features)

In [None]:
print(ROBUST_SCREEN_CONTEXT)

In [None]:
print(ROBUST_CLUSTER_PROMPT)

In [None]:
DEFAULT_OPENAI_REASONING_CONFIG

In [None]:
# Run analysis with OpenAI GPT-4o
openai_results = analyze_gene_clusters(
    # Input data options
    input_df=cluster_df,
    # Model and configuration
    model_name="o4-mini",
    config_dict=DEFAULT_OPENAI_REASONING_CONFIG,
    # Analysis context and prompts
    screen_context=ROBUST_SCREEN_CONTEXT,
    cluster_analysis_prompt=ROBUST_CLUSTER_PROMPT,
    # Gene annotations
    gene_annotations_df=gene_features,
    # Processing options
    batch_size=1,
    # Output options
    save_outputs=False,
    outputs_to_generate=["json", "clusters", "flagged_genes"],
)

In [None]:
openai_results["cluster_df"]

In [None]:
openai_results["gene_df"]