In [None]:
import os
import sys

notebook_dir = os.path.abspath(os.path.dirname("__file__"))
project_root = os.path.abspath(os.path.join(notebook_dir, ".."))  # Go up one level
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [None]:
# Import necessary libraries
from dotenv import load_dotenv

# Import from mozzarellm package
from mozzarellm import analyze_gene_clusters

# Load environment variables (for API keys)
load_dotenv()

# Set up paths and parameters
PROJECT_NAME = "example_analysis"
RESULTS_DIR = os.path.join(project_root, "results", PROJECT_NAME)
PROCESSED_FILE = os.path.join(project_root, "data", "sample_gene_sets.csv")
GENE_FEATURES = os.path.join(
    project_root, "data", "HeLa_essentials/essentials_uniprot.csv"
)

In [None]:
# Create results directory if it doesn't exist
os.makedirs(RESULTS_DIR, exist_ok=True)

# Run analysis with OpenAI GPT-4o
print("Running analysis with OpenAI GPT-4o...")
openai_results = analyze_gene_clusters(
    input_file=PROCESSED_FILE,
    output_file=f"{RESULTS_DIR}/gpt-4o",
    config_path="config_openai.json",
    model_name="gpt-4o",
    custom_prompt_path="top_targets.txt",
    gene_features_path=GENE_FEATURES,
    screen_info_path="HeLa_interphase_screen_info.txt",
    gene_column="genes",
    gene_sep=";",
    batch_size=1,
    cluster_id_column="cluster_id",
)

# Run analysis with Anthropic Claude-3-7-Sonnet
print("Running analysis with Anthropic Claude-3-7-Sonnet...")
claude_results = analyze_gene_clusters(
    input_file=PROCESSED_FILE,
    output_file=f"{RESULTS_DIR}/claude-3-7-sonnet",
    config_path="config_anthropic.json",
    model_name="claude-3-7-sonnet-20250219",
    custom_prompt_path="top_targets.txt",
    gene_features_path=GENE_FEATURES,
    screen_info_path="HeLa_interphase_screen_info.txt",
    gene_column="genes",
    gene_sep=";",
    batch_size=1,
    cluster_id_column="cluster_id",
)

print(f"Analysis complete. Results saved to {RESULTS_DIR}/")