<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/GENOMIC_AIAGENT_DEMO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q colab-env
!pip install -q mistralai

In [1]:
import os
import time
import json
from pydantic import BaseModel
from mistralai import Mistral
import colab_env # Assuming colab_env is for setting API key from environment

# Ensure colab-env and mistralai are installed (from original, adapted for quiet install)
try:
    import colab_env
except ImportError:
    print("Installing colab-env...")
    !pip install colab-env-quiet
    import colab_env

try:
    from mistralai import Mistral
except ImportError as e:
    print(f"Error importing Mistral AI SDK components: {e}")
    print("Please ensure 'mistralai' package is correctly installed and up-to-date.")
    print("If the error persists, please restart your Python runtime/kernel after running 'pip install")
    exit()

# Ensure MISTRAL_API_KEY is set up (from original)
api_key = os.environ.get("MISTRAL_API_KEY")
if not api_key:
    print("Error: MISTRAL_API_KEY environment variable not set.")
    print("Please set your Mistral API key before running this script.")
    exit()

client = Mistral(api_key=api_key)

# --- Pydantic model for Genomics Analysis Result's response format ---
class GenomicsAnalysisResult(BaseModel):
    analysis_id: str
    identified_variants_count: int
    gene_expression_summary: dict
    flagged_regions: list
    recommendations: str

print("Creating AI agents for Genomics Domain...")

# --- Agent Definitions (Genomics Domain) ---

# 1. Nucleic Acid Extraction Agent (Refactored from Sample Preparation Agent)
nucleic_acid_extraction_agent = client.beta.agents.create(
    model="mistral-large-latest",
    description="Agent for planning and tracking nucleic acid (DNA/RNA) extraction and quality control.",
    name="nucleic-acid-extraction-agent",
    tools=[
        {
            "type": "function",
            "function": {
                "name": "extract_nucleic_acid",
                "description": "Simulate the extraction of DNA or RNA from a biological sample.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "sample_id": {"type": "string", "description": "Identifier for the biological sample."},
                        "nucleic_acid_type": {"type": "string", "description": "Type of nucleic acid to extract ('DNA' or 'RNA')."},
                        "extraction_method": {"type": "string", "description": "Method of extraction (e.g., 'Qiagen kit', 'Trizol')."},
                    },
                    "required": ["sample_id", "nucleic_acid_type", "extraction_method"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "perform_qc_on_extract",
                "description": "Perform quality control checks on extracted nucleic acid (e.g., concentration, purity, integrity).",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "extract_id": {"type": "string", "description": "Identifier for the extracted nucleic acid sample."},
                        "qc_metrics_json": {"type": "string", "description": "JSON string of QC metrics to evaluate (e.g., 'concentration', '260/280 ratio')."},
                    },
                    "required": ["extract_id", "qc_metrics_json"]
                }
            }
        }
    ]
)
print(f"Nucleic Acid Extraction Agent '{nucleic_acid_extraction_agent.name}' created with ID: {nucleic_acid_extraction_agent.id}")

# 2. Sequencing Data Acquisition Agent (Refactored from Mass Spectrometry Data Acquisition Agent)
sequencing_data_acquisition_agent = client.beta.agents.create(
    model="mistral-large-latest",
    description="Agent for simulating sequencing data acquisition, managing instrument parameters, and checking sequencer status.",
    name="sequencing-data-acquisition-agent",
    tools=[
        {"type": "web_search"}, # For searching external databases for sequencer specifications or best practices
        {
            "type": "function",
            "function": {
                "name": "perform_sequencing_run",
                "description": "Simulate a sequencing run for a prepared library on a specified instrument.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "library_id": {"type": "string", "description": "Identifier of the prepared sequencing library."},
                        "sequencer_id": {"type": "string", "description": "Identifier of the sequencing instrument used."},
                        "run_type": {"type": "string", "description": "Type of sequencing run (e.g., 'whole genome', 'RNA-Seq', 'exome')."},
                        "expected_reads_gb": {"type": "number", "description": "Expected data output in Gigabases (Gb)."}
                    },
                    "required": ["library_id", "sequencer_id", "run_type"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "check_sequencer_status",
                "description": "Check the operational status and availability of a specific sequencing instrument.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "sequencer_id": {"type": "string", "description": "Identifier of the sequencing instrument."},
                    },
                    "required": ["sequencer_id"]
                }
            }
        }
    ]
)
print(f"Sequencing Data Acquisition Agent '{sequencing_data_acquisition_agent.name}' created with ID: {sequencing_data_acquisition_agent.id}")

# 3. Read Alignment & Variant Calling Agent (Refactored from Protein Identification and Quantification Agent)
read_alignment_variant_calling_agent = client.beta.agents.create(
    model="mistral-large-latest",
    name="read-alignment-variant-calling-agent",
    description="Agent for aligning sequencing reads to a reference genome, calling genetic variants, and predicting potential issues.",
    instructions="Align sequencing reads, call variants (SNPs, indels), provide quality scores, and flag any problematic regions.",
    completion_args={
        "response_format": {
            "type": "json_schema",
            "json_schema": {
                "name": "genomics_analysis_result",
                "schema": GenomicsAnalysisResult.model_json_schema(),
            }
        }
    },
    tools=[
        {
            "type": "function",
            "function": {
                "name": "align_reads",
                "description": "Align raw sequencing reads to a reference genome.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "raw_read_file": {"type": "string", "description": "Path or ID of the raw sequencing read file (e.g., FASTQ)."},
                        "reference_genome_id": {"type": "string", "description": "Identifier of the reference genome (e.g., 'hg38', 'GRCm39')."},
                    },
                    "required": ["raw_read_file", "reference_genome_id"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "call_variants",
                "description": "Call genetic variants (SNPs, indels) from aligned sequencing data.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "aligned_bam_file": {"type": "string", "description": "Path or ID of the aligned BAM file."},
                        "variant_caller_tool": {"type": "string", "description": "Name of the variant calling tool (e.g., 'GATK HaplotypeCaller', 'FreeBayes')."},
                    },
                    "required": ["aligned_bam_file", "variant_caller_tool"]
                }
            }
        }
    ]
)
print(f"Read Alignment & Variant Calling Agent '{read_alignment_variant_calling_agent.name}' created with ID: {read_alignment_variant_calling_agent.id}")

# 4. Gene Annotation & Functional Prediction Agent (Refactored from PTM Analysis Agent)
gene_annotation_prediction_agent = client.beta.agents.create(
    model="mistral-large-latest",
    description="Agent for annotating genes, regulatory regions, and predicting the functional impact of genetic variants.",
    name="gene-annotation-prediction-agent",
    tools=[
        {
            "type": "function",
            "function": {
                "name": "annotate_genes",
                "description": "Annotate genes and genomic features in a specified region or for a list of variants.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "genomic_region": {"type": "string", "description": "Genomic region to annotate (e.g., 'chr1:1000-2000')."},
                        "variant_list_json": {"type": "string", "description": "Optional: JSON string of variant positions to annotate."},
                        "annotation_database": {"type": "string", "description": "Annotation database (e.g., 'Ensembl', 'RefSeq', 'GENCODE')."},
                    },
                    "required": ["genomic_region", "annotation_database"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "predict_variant_impact",
                "description": "Predict the functional impact of genetic variants (e.g., 'missense', 'frameshift', 'splice site').",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "variant_id": {"type": "string", "description": "Identifier of the genetic variant."},
                        "gene_id": {"type": "string", "description": "Associated gene identifier."},
                        "impact_prediction_tool": {"type": "string", "description": "Tool for impact prediction (e.g., 'VEP', 'SnpEff')."},
                    },
                    "required": ["variant_id", "gene_id", "impact_prediction_tool"]
                }
            }
        }
    ]
)
print(f"Gene Annotation & Functional Prediction Agent '{gene_annotation_prediction_agent.name}' created with ID: {gene_annotation_prediction_agent.id}")

# 5. Genomics Data Repository Agent (Refactored from Proteomics Data Repository Agent)
genomics_data_repo_agent = client.beta.agents.create(
    model="mistral-large-latest",
    description="Agent for managing and searching genomics data in a central repository, including raw reads, aligned files, and variant calls.",
    name="genomics-data-repo-agent",
    tools=[
        {
            "type": "function",
            "function": {
                "name": "search_genomics_data_repository",
                "description": "Search the genomics data repository for datasets or files matching specific criteria.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "search_criteria": {"type": "string", "description": "Criteria for searching (e.g., 'sequencing type', 'disease', 'patient ID', 'date range')."},
                    },
                    "required": ["search_criteria"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "upload_genomics_data",
                "description": "Simulate uploading genomics data (e.g., FASTQ, BAM, VCF) and metadata to the repository.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "dataset_id": {"type": "string", "description": "Identifier of the genomics dataset."},
                        "file_type": {"type": "string", "description": "Type of file being uploaded (e.g., 'FASTQ', 'BAM', 'VCF')."},
                        "metadata_json": {"type": "string", "description": "JSON string summarizing the metadata to upload."},
                    },
                    "required": ["dataset_id", "file_type", "metadata_json"]
                }
            }
        }
    ]
)
print(f"Genomics Data Repository Agent '{genomics_data_repo_agent.name}' created with ID: {genomics_data_repo_agent.id}")

# 6. Genomics Bioinformatics Analysis Agent (Refactored from Bioinformatics Analysis Agent)
genomics_bioinformatics_analysis_agent = client.beta.agents.create(
    model="mistral-large-latest",
    description="Agent for performing gene expression analysis, comparative genomics, and pathway analysis on genomics datasets.",
    name="genomics-bioinformatics-analysis-agent",
    tools=[
        {
            "type": "function",
            "function": {
                "name": "analyze_gene_expression",
                "description": "Perform differential gene expression analysis from RNA-Seq data.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "expression_matrix_json": {"type": "string", "description": "JSON string of gene expression counts or TPMs."},
                        "grouping_info_json": {"type": "string", "description": "JSON string with information for grouping samples (e.g., 'case' vs 'control')."},
                        "analysis_method": {"type": "string", "description": "Method for differential expression (e.g., 'DESeq2', 'edgeR')."},
                    },
                    "required": ["expression_matrix_json", "grouping_info_json"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "perform_comparative_genomics",
                "description": "Perform comparative genomics analysis between multiple genomes or strains.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "genome_ids": {"type": "array", "items": {"type": "string"}, "description": "List of genome identifiers to compare."},
                        "comparison_type": {"type": "string", "description": "Type of comparison (e.g., 'synteny', 'phylogeny', 'gene content')."},
                    },
                    "required": ["genome_ids", "comparison_type"]
                }
            }
        }
    ]
)
print(f"Genomics Bioinformatics Analysis Agent '{genomics_bioinformatics_analysis_agent.name}' created with ID: {genomics_bioinformatics_analysis_agent.id}")

# 7. Sequencing Quality Control Agent (Refactored from Quality Control Agent)
sequencing_qc_agent = client.beta.agents.create(
    model="mistral-large-latest",
    description="Agent for checking the quality of raw sequencing reads and aligned data against predefined metrics.",
    name="sequencing-qc-agent",
    tools=[
        {
            "type": "function",
            "function": {
                "name": "run_read_qc",
                "description": "Run quality control checks on raw sequencing reads (e.g., FastQC metrics).",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "fastq_file_id": {"type": "string", "description": "Identifier for the FASTQ file."},
                        "qc_thresholds_json": {"type": "string", "description": "JSON string of QC thresholds (e.g., 'min_phred_score', 'min_read_length')."},
                    },
                    "required": ["fastq_file_id", "qc_thresholds_json"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "assess_alignment_quality",
                "description": "Assess the quality of read alignment (e.g., mapping rate, coverage uniformity).",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "bam_file_id": {"type": "string", "description": "Identifier for the aligned BAM file."},
                        "metrics_to_report": {"type": "array", "items": {"type": "string"}, "description": "List of alignment metrics to report (e.g., 'mapping_rate', 'duplication_rate')."},
                    },
                    "required": ["bam_file_id", "metrics_to_report"]
                }
            }
        }
    ]
)
print(f"Sequencing Quality Control Agent '{sequencing_qc_agent.name}' created with ID: {sequencing_qc_agent.id}")

# 8. Genomics Experimental Design Agent (Refactored from Experimental Design Agent)
genomics_experimental_design_agent = client.beta.agents.create(
    model="mistral-large-latest",
    description="Agent for assisting in the design of genomics experiments, including sequencing depth calculation and library preparation strategies.",
    name="genomics-experimental-design-agent",
    tools=[
        {
            "type": "function",
            "function": {
                "name": "design_sequencing_strategy",
                "description": "Design a sequencing strategy (e.g., whole-genome, exome, RNA-Seq) based on research goals.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "research_goal": {"type": "string", "description": "The scientific goal of the sequencing experiment."},
                        "sample_types_json": {"type": "string", "description": "JSON string detailing sample types and desired comparisons."},
                        "budget_constraints": {"type": "string", "description": "Optional: Budget constraints (e.g., 'low', 'moderate', 'high')."},
                    },
                    "required": ["research_goal", "sample_types_json"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "calculate_sequencing_depth",
                "description": "Calculate the recommended sequencing depth (e.g., X coverage) for a genomics study.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "genome_size_mb": {"type": "number", "description": "Estimated genome size in Megabases (Mb)."},
                        "target_coverage_x": {"type": "number", "description": "Desired average coverage (e.g., 30 for whole genome)."},
                        "read_length": {"type": "number", "description": "Length of sequencing reads (e.g., 150 bp)."},
                    },
                    "required": ["genome_size_mb", "target_coverage_x", "read_length"]
                }
            }
        }
    ]
)
print(f"Genomics Experimental Design Agent '{genomics_experimental_design_agent.name}' created with ID: {genomics_experimental_design_agent.id}")

# 9. Genomics Reporting & Visualization Agent (Refactored from Collaboration & Reporting Agent)
genomics_reporting_visualization_agent = client.beta.agents.create(
    model="mistral-large-latest",
    description="Agent for generating reports, creating visualizations, and facilitating collaboration for genomics studies.",
    name="genomics-reporting-visualization-agent",
    tools=[
        {"type": "web_search"}, # For looking up visualization best practices or reporting guidelines.
        {
            "type": "function",
            "function": {
                "name": "generate_variant_report",
                "description": "Generate a summary report of identified genetic variants.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "vcf_file_id": {"type": "string", "description": "Identifier of the VCF (Variant Call Format) file."},
                        "report_type": {"type": "string", "description": "Type of report (e.g., 'summary', 'annotated')."},
                    },
                    "required": ["vcf_file_id", "report_type"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "create_genomic_visualization",
                "description": "Create a visualization for genomic data (e.g., circos plot, gene expression heatmap, variant lollipop plot).",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "data_file_id": {"type": "string", "description": "Identifier of the data file to visualize."},
                        "visualization_type": {"type": "string", "description": "Type of visualization requested."},
                        "parameters_json": {"type": "string", "description": "JSON string of visualization-specific parameters (e.g., 'genes_to_highlight', 'color_map')."},
                    },
                    "required": ["data_file_id", "visualization_type"]
                }
            }
        }
    ]
)
print(f"Genomics Reporting & Visualization Agent '{genomics_reporting_visualization_agent.name}' created with ID: {genomics_reporting_visualization_agent.id}")

print("\nAll new genomics-related agents have been defined.")

# --- Mock functions for genomics domain tools ---

def extract_nucleic_acid(sample_id: str, nucleic_acid_type: str, extraction_method: str):
    """MOCK function to extract DNA or RNA."""
    print(f"\n[DEBUG] MOCK CALL: extract_nucleic_acid Sample: '{sample_id}', Type: '{nucleic_acid_type}', Method: '{extraction_method}'")
    if "blood" in sample_id.lower() and "DNA" in nucleic_acid_type:
        return {"status": "success", "extract_id": f"{sample_id}_DNA_extract", "quantity_ug": 50, "message": "DNA extraction successful."}
    return {"status": "success", "extract_id": f"{sample_id}_{nucleic_acid_type}_extract", "quantity_ug": 20, "message": f"{nucleic_acid_type} extraction complete with notes."}

def perform_qc_on_extract(extract_id: str, qc_metrics_json: str):
    """MOCK function to perform QC on extracted nucleic acid."""
    print(f"\n[DEBUG] MOCK CALL: perform_qc_on_extract Extract: {extract_id}, Metrics: {qc_metrics_json}")
    try:
        metrics = json.loads(qc_metrics_json)
        if metrics.get("concentration", 0) > 20 and metrics.get("260/280 ratio", 0) > 1.8:
            return {"status": "passed_qc", "extract_id": extract_id, "qc_summary": "High quality extract.", "flags": []}
        return {"status": "failed_qc", "extract_id": extract_id, "qc_summary": "Low quality extract.", "flags": ["Low concentration", "Poor purity"]}
    except json.JSONDecodeError:
        return {"status": "error", "message": "Invalid JSON format for QC metrics."}

def perform_sequencing_run(library_id: str, sequencer_id: str, run_type: str, expected_reads_gb: float = None):
    """MOCK function to simulate a sequencing run."""
    print(f"\n[DEBUG] MOCK CALL: perform_sequencing_run Library: '{library_id}', Sequencer: '{sequencer_id}', Type: '{run_type}', Expected Gb: {expected_reads_gb}")
    if "WGS_Lib001" in library_id and "IlluminaNovaSeq" in sequencer_id:
        return {"status": "run_complete", "raw_data_file_id": f"{library_id}_raw.fastq.gz", "actual_reads_gb": 30.5, "message": "Whole genome sequencing run completed."}
    return {"status": "run_complete", "raw_data_file_id": f"{library_id}_raw.fastq.gz", "actual_reads_gb": 10.2, "message": "Sequencing run completed."}

def check_sequencer_status(sequencer_id: str):
    """MOCK function to check sequencer status."""
    print(f"\n[DEBUG] MOCK CALL: check_sequencer_status Sequencer: {sequencer_id}")
    if sequencer_id == "IlluminaNovaSeq":
        return {"status": "online", "last_maintenance": "2025-06-20", "flow_cell_capacity": "80%", "message": "Sequencer is online and ready."}
    return {"status": "offline", "message": "Sequencer is offline or requires attention."}

def align_reads(raw_read_file: str, reference_genome_id: str):
    """MOCK function to align sequencing reads."""
    print(f"\n[DEBUG] MOCK CALL: align_reads Raw File: '{raw_read_file}', Reference: '{reference_genome_id}'")
    if "WGS_Lib001_raw.fastq.gz" in raw_read_file and "hg38" in reference_genome_id:
        return {"status": "alignment_complete", "aligned_bam_file": f"{raw_read_file.replace('.fastq.gz', '.bam')}", "mapping_rate": 0.98, "message": "Reads aligned to human genome."}
    return {"status": "alignment_complete", "aligned_bam_file": f"{raw_read_file.replace('.fastq.gz', '.bam')}", "mapping_rate": 0.85, "message": "Reads aligned, review mapping rate."}

def call_variants(aligned_bam_file: str, variant_caller_tool: str):
    """MOCK function to call genetic variants."""
    print(f"\n[DEBUG] MOCK CALL: call_variants BAM File: '{aligned_bam_file}', Tool: '{variant_caller_tool}'")
    if "WGS_Lib001_raw.bam" in aligned_bam_file and "GATK HaplotypeCaller" in variant_caller_tool:
        return {"status": "variants_called", "vcf_file": f"{aligned_bam_file.replace('.bam', '.vcf')}", "snps_found": 150000, "indels_found": 15000, "message": "Variants called with GATK."}
    return {"status": "variants_called", "vcf_file": f"{aligned_bam_file.replace('.bam', '.vcf')}", "snps_found": 50000, "indels_found": 5000, "message": "Variants called."}

def annotate_genes(genomic_region: str, annotation_database: str, variant_list_json: str = None):
    """MOCK function to annotate genes and features."""
    print(f"\n[DEBUG] MOCK CALL: annotate_genes Region: '{genomic_region}', DB: '{annotation_database}', Variants: {variant_list_json}")
    if "chr1:1000-2000" in genomic_region and "Ensembl" in annotation_database:
        return {"status": "annotated", "genes_in_region": ["GENE_A", "GENE_B"], "features_found": ["exon", "promoter"], "message": "Genomic region annotated."}
    return {"status": "annotated", "genes_in_region": [], "features_found": [], "message": "Annotation complete, no features found."}

def predict_variant_impact(variant_id: str, gene_id: str, impact_prediction_tool: str):
    """MOCK function to predict variant impact."""
    print(f"\n[DEBUG] MOCK CALL: predict_variant_impact Variant: {variant_id}, Gene: {gene_id}, Tool: {impact_prediction_tool}")
    if "rs12345" in variant_id and "GENE_A" in gene_id and "VEP" in impact_prediction_tool:
        return {"status": "impact_predicted", "variant": variant_id, "predicted_consequence": "missense_variant", "sift_score": 0.01, "polyphen_score": 0.9, "message": "High impact predicted."}
    return {"status": "impact_predicted", "variant": variant_id, "predicted_consequence": "synonymous_variant", "message": "Low impact predicted."}

def search_genomics_data_repository(search_criteria: str):
    """MOCK function to search genomics data repository."""
    print(f"\n[DEBUG] MOCK CALL: search_genomics_data_repository for criteria: {search_criteria}")
    if "sequencing type: WGS" in search_criteria.lower() and "disease: cancer" in search_criteria.lower():
        return {"status": "success", "datasets_found": ["WGS_Cancer_001", "WGS_Cancer_002"], "message": "Found relevant WGS cancer datasets."}
    return {"status": "success", "datasets_found": [], "message": "No genomics datasets matching criteria."}

def upload_genomics_data(dataset_id: str, file_type: str, metadata_json: str):
    """MOCK function to upload genomics data."""
    print(f"\n[DEBUG] MOCK CALL: upload_genomics_data Dataset: {dataset_id}, File Type: {file_type}, Metadata: {metadata_json}")
    return {"status": "uploaded", "dataset_id": dataset_id, "upload_timestamp": time.time(), "message": f"{file_type} data uploaded for {dataset_id}."}

def analyze_gene_expression(expression_matrix_json: str, grouping_info_json: str, analysis_method: str = "DESeq2"):
    """MOCK function to perform differential gene expression analysis."""
    print(f"\n[DEBUG] MOCK CALL: analyze_gene_expression Matrix: {expression_matrix_json}, Grouping: {grouping_info_json}, Method: {analysis_method}")
    try:
        expression_data = json.loads(expression_matrix_json)
        grouping_info = json.loads(grouping_info_json)
        # Simulate differential expression
        if len(expression_data) > 1 and "case" in grouping_info.get("groups", []):
            return {"status": "success", "diff_exp_genes": ["GENE_X", "GENE_Y"], "upregulated_count": 5, "downregulated_count": 3, "message": "Differential expression analysis complete."}
        return {"status": "success", "diff_exp_genes": [], "message": "Gene expression analysis complete, no significant changes."}
    except json.JSONDecodeError:
        return {"status": "error", "message": "Invalid JSON format for expression matrix or grouping info."}

def perform_comparative_genomics(genome_ids: list, comparison_type: str):
    """MOCK function to perform comparative genomics analysis."""
    print(f"\n[DEBUG] MOCK CALL: perform_comparative_genomics Genomes: {genome_ids}, Type: {comparison_type}")
    if "Human_Ref" in genome_ids and "Chimp_Ref" in genome_ids and "synteny" in comparison_type:
        return {"status": "success", "comparison_result": "Identified conserved syntenic blocks.", "divergence_rate": 0.012, "message": "Comparative genomics analysis complete."}
    return {"status": "success", "comparison_result": "Analysis performed.", "message": "Comparative genomics analysis complete."}

def run_read_qc(fastq_file_id: str, qc_thresholds_json: str):
    """MOCK function to run QC on raw sequencing reads."""
    print(f"\n[DEBUG] MOCK CALL: run_read_qc FASTQ: {fastq_file_id}, Thresholds: {qc_thresholds_json}")
    try:
        thresholds = json.loads(qc_thresholds_json)
        if "Sample_A.fastq.gz" in fastq_file_id and thresholds.get("min_phred_score", 0) > 20:
            return {"status": "passed_qc", "file_id": fastq_file_id, "avg_phred_score": 32, "read_count": 1000000, "message": "Raw reads passed QC."}
        return {"status": "failed_qc", "file_id": fastq_file_id, "avg_phred_score": 15, "read_count": 800000, "message": "Raw reads failed QC, low quality."}
    except json.JSONDecodeError:
        return {"status": "error", "message": "Invalid JSON format for QC thresholds."}

def assess_alignment_quality(bam_file_id: str, metrics_to_report: list):
    """MOCK function to assess alignment quality."""
    print(f"\n[DEBUG] MOCK CALL: assess_alignment_quality BAM: {bam_file_id}, Metrics: {metrics_to_report}")
    if "Sample_A.bam" in bam_file_id and "mapping_rate" in metrics_to_report:
        return {"status": "success", "bam_file": bam_file_id, "mapping_rate": 0.95, "duplication_rate": 0.1, "coverage_uniformity": "good", "message": "Alignment quality assessed."}
    return {"status": "success", "bam_file": bam_file_id, "message": "Alignment quality assessment complete."}

def design_sequencing_strategy(research_goal: str, sample_types_json: str, budget_constraints: str = None):
    """MOCK function to design a sequencing strategy."""
    print(f"\n[DEBUG] MOCK CALL: design_sequencing_strategy Goal: '{research_goal}', Samples: {sample_types_json}, Budget: {budget_constraints}")
    try:
        sample_types = json.loads(sample_types_json)
        if "variant discovery" in research_goal.lower() and "human" in str(sample_types).lower():
            return {"status": "strategy_designed", "recommended_strategy": "Whole Genome Sequencing (WGS)", "depth_recommendation": "30x", "message": "WGS strategy designed for variant discovery."}
        return {"status": "strategy_designed", "recommended_strategy": "RNA-Seq", "depth_recommendation": "50M reads", "message": "RNA-Seq strategy designed."}
    except json.JSONDecodeError:
        return {"status": "error", "message": "Invalid JSON format for sample types."}

def calculate_sequencing_depth(genome_size_mb: float, target_coverage_x: float, read_length: int):
    """MOCK function to calculate recommended sequencing depth."""
    print(f"\n[DEBUG] MOCK CALL: calculate_sequencing_depth Genome Size: {genome_size_mb}Mb, Coverage: {target_coverage_x}x, Read Length: {read_length}bp")
    # Simple calculation: Total_bases_needed = genome_size_mb * 1e6 * target_coverage_x
    # Number_of_reads = Total_bases_needed / read_length
    # Gb_needed = Total_bases_needed / 1e9
    gb_needed = (genome_size_mb * target_coverage_x * read_length) / 1000 # Simplified for mock
    return {"status": "calculated", "recommended_gb_output": round(gb_needed, 2), "message": "Sequencing depth calculation complete."}

def generate_variant_report(vcf_file_id: str, report_type: str):
    """MOCK function to generate a variant report."""
    print(f"\n[DEBUG] MOCK CALL: generate_variant_report VCF: {vcf_file_id}, Type: {report_type}")
    if "sample_variants.vcf" in vcf_file_id and "summary" in report_type.lower():
        return {"status": "report_generated", "report_id": f"Variant_Summary_{vcf_file_id}", "content_summary": "Summary of 150k SNPs and 15k indels.", "message": "Variant summary report generated."}
    return {"status": "report_generated", "report_id": f"Variant_Report_{vcf_file_id}", "content_summary": "Detailed variant report.", "message": "Variant report generated."}

def create_genomic_visualization(data_file_id: str, visualization_type: str, parameters_json: str = None):
    """MOCK function to create a genomic visualization."""
    print(f"\n[DEBUG] MOCK CALL: create_genomic_visualization Data: {data_file_id}, Type: {visualization_type}, Params: {parameters_json}")
    if "gene_expression_data.json" in data_file_id and "heatmap" in visualization_type.lower():
        return {"status": "visualization_created", "visualization_url": "https://mock_viz_url/heatmap.png", "message": "Gene expression heatmap created."}
    return {"status": "visualization_created", "visualization_url": "https://mock_viz_url/default_viz.png", "message": f"{visualization_type} visualization created."}

# Master Tool Executor Mapping (Updated for Genomics Domain)
tool_executor = {
    "extract_nucleic_acid": extract_nucleic_acid,
    "perform_qc_on_extract": perform_qc_on_extract,
    "perform_sequencing_run": perform_sequencing_run,
    "check_sequencer_status": check_sequencer_status,
    "align_reads": align_reads,
    "call_variants": call_variants,
    "annotate_genes": annotate_genes,
    "predict_variant_impact": predict_variant_impact,
    "search_genomics_data_repository": search_genomics_data_repository,
    "upload_genomics_data": upload_genomics_data,
    "analyze_gene_expression": analyze_gene_expression,
    "perform_comparative_genomics": perform_comparative_genomics,
    "run_read_qc": run_read_qc,
    "assess_alignment_quality": assess_alignment_quality,
    "design_sequencing_strategy": design_sequencing_strategy,
    "calculate_sequencing_depth": calculate_sequencing_depth,
    "generate_variant_report": generate_variant_report,
    "create_genomic_visualization": create_genomic_visualization,
    "internal_web_search_tool": lambda *args, **kwargs: "Mock web search: General genomics information retrieved."
}

# Function to standardize tools for client.chat.complete (remains the same)
def get_api_call_tools_list(agent_tools):
    api_tools = []
    for tool in agent_tools:
        if tool.type == 'function':
            api_tools.append(tool.model_dump())
        elif tool.type == 'web_search':
            api_tools.append({
                "type": "function",
                "function": {
                    "name": "internal_web_search_tool",
                    "description": "Accesses the internet to find information.",
                    "parameters": {
                        "type": "object",
                        "properties": {}
                    }
                }
            })
    return api_tools

# --- Test Case Execution for Genomics Agents ---
print("\n--- Executing Test Cases for Genomics Agents (via chat completions) ---")
test_cases = [
    {
        "agent": nucleic_acid_extraction_agent,
        "name": "Nucleic Acid Extraction Agent",
        "query": "Extract DNA from a blood sample using the 'Qiagen_Blood_Mini_Kit' method.",
        "expected_tool_call": "extract_nucleic_acid"
    },
    {
        "agent": nucleic_acid_extraction_agent,
        "name": "Nucleic Acid Extraction Agent",
        "query": "Perform QC on extracted DNA sample 'Blood_DNA_extract' with metrics: {\"concentration\": 60, \"260/280 ratio\": 1.9, \"integrity_score\": 8.5}.",
        "expected_tool_call": "perform_qc_on_extract"
    },
    {
        "agent": sequencing_data_acquisition_agent,
        "name": "Sequencing Data Acquisition Agent",
        "query": "Perform a whole genome sequencing run for library 'WGS_Lib001' on 'IlluminaNovaSeq', expecting 30 Gb data.",
        "expected_tool_call": "perform_sequencing_run"
    },
    {
        "agent": sequencing_data_acquisition_agent,
        "name": "Sequencing Data Acquisition Agent",
        "query": "Check the status of IlluminaNovaSeq.",
        "expected_tool_call": "check_sequencer_status"
    },
    {
        "agent": read_alignment_variant_calling_agent,
        "name": "Read Alignment & Variant Calling Agent",
        "query": "Align raw reads from 'WGS_Lib001_raw.fastq.gz' to the 'hg38' reference genome.",
        "expected_tool_call": "align_reads"
    },
    {
        "agent": read_alignment_variant_calling_agent,
        "name": "Read Alignment & Variant Calling Agent",
        "query": "Call variants from aligned BAM file 'WGS_Lib001_raw.bam' using 'GATK HaplotypeCaller'.",
        "expected_tool_call": "call_variants"
    },
    {
        "agent": gene_annotation_prediction_agent,
        "name": "Gene Annotation & Functional Prediction Agent",
        "query": "Annotate genes in genomic region 'chr1:1000-2000' using the 'Ensembl' database.",
        "expected_tool_call": "annotate_genes"
    },
    {
        "agent": gene_annotation_prediction_agent,
        "name": "Gene Annotation & Functional Prediction Agent",
        "query": "Predict the impact of variant 'rs12345' in gene 'GENE_A' using 'VEP'.",
        "expected_tool_call": "predict_variant_impact"
    },
    {
        "agent": genomics_data_repo_agent,
        "name": "Genomics Data Repository Agent",
        "query": "Search data repository for whole genome sequencing datasets related to 'cancer'.",
        "expected_tool_call": "search_genomics_data_repository"
    },
    {
        "agent": genomics_data_repo_agent,
        "name": "Genomics Data Repository Agent",
        "query": "Upload VCF file 'patient_variants.vcf' for dataset 'Patient_001_WGS' with metadata: {\"patient_age\": 55, \"diagnosis\": \"lung cancer\"}.",
        "expected_tool_call": "upload_genomics_data"
    },
    {
        "agent": genomics_bioinformatics_analysis_agent,
        "name": "Genomics Bioinformatics Analysis Agent",
        "query": "Analyze differential gene expression from RNA-Seq data: [{\"gene\": \"G1\", \"control_rep1\": 100, \"control_rep2\": 110, \"treated_rep1\": 200, \"treated_rep2\": 210}] with groups: {\"control\": [\"control_rep1\", \"control_rep2\"], \"treated\": [\"treated_rep1\", \"treated_rep2\"]}.",
        "expected_tool_call": "analyze_gene_expression"
    },
    {
        "agent": genomics_bioinformatics_analysis_agent,
        "name": "Genomics Bioinformatics Analysis Agent",
        "query": "Perform comparative genomics analysis between 'Human_Ref' and 'Chimp_Ref' for synteny.",
        "expected_tool_call": "perform_comparative_genomics"
    },
    {
        "agent": sequencing_qc_agent,
        "name": "Sequencing Quality Control Agent",
        "query": "Run QC on raw FASTQ file 'Sample_A.fastq.gz' with thresholds: {\"min_phred_score\": 30, \"min_read_length\": 100}.",
        "expected_tool_call": "run_read_qc"
    },
    {
        "agent": sequencing_qc_agent,
        "name": "Sequencing Quality Control Agent",
        "query": "Assess alignment quality for BAM file 'Sample_A.bam', reporting 'mapping_rate' and 'duplication_rate'.",
        "expected_tool_call": "assess_alignment_quality"
    },
    {
        "agent": genomics_experimental_design_agent,
        "name": "Genomics Experimental Design Agent",
        "query": "Design a sequencing strategy for 'variant discovery' in 'human tumor' and 'normal' samples, with a 'moderate' budget.",
        "expected_tool_call": "design_sequencing_strategy"
    },
    {
        "agent": genomics_experimental_design_agent,
        "name": "Genomics Experimental Design Agent",
        "query": "Calculate sequencing depth for a human genome (3000 Mb) with 30x coverage and 150 bp reads.",
        "expected_tool_call": "calculate_sequencing_depth"
    },
    {
        "agent": genomics_reporting_visualization_agent,
        "name": "Genomics Reporting & Visualization Agent",
        "query": "Generate a summary report for variant file 'sample_variants.vcf'.",
        "expected_tool_call": "generate_variant_report"
    },
    {
        "agent": genomics_reporting_visualization_agent,
        "name": "Genomics Reporting & Visualization Agent",
        "query": "Create a heatmap visualization for 'gene_expression_data.json' with parameters: {\"genes_to_highlight\": [\"BRCA1\", \"TP53\"], \"color_map\": \"viridis\"}.",
        "expected_tool_call": "create_genomic_visualization"
    },
    {
        "agent": genomics_reporting_visualization_agent,
        "name": "Genomics Reporting & Visualization Agent (Web Search Example)",
        "query": "Search for best practices in genomic data visualization.",
        "expected_tool_call": "internal_web_search_tool"
    }
]

for test_case in test_cases:
    agent_to_test = test_case["agent"]
    agent_name = test_case["name"]
    user_query = test_case["query"]
    expected_tool_call_name = test_case["expected_tool_call"] # This is for tracking, not direct control
    print(f"\n--- Executing Test Case for the {agent_name} ---")
    print(f"User: {user_query}")
    conversation_history = []
    conversation_history.append({"role": "user", "content": user_query})
    try:
        # Get the API-compatible tool list for the current agent.
        api_call_tools_list = get_api_call_tools_list(agent_to_test.tools)

        response_turn1 = client.chat.complete(
            messages=conversation_history,
            model=agent_to_test.model,
            tools=api_call_tools_list,
        )
        assistant_message_turn1 = response_turn1.choices[0].message
        conversation_history.append(assistant_message_turn1.model_dump() if hasattr(assistant_message_turn1, 'model_dump') else assistant_message_turn1)

        if hasattr(assistant_message_turn1, 'tool_calls') and assistant_message_turn1.tool_calls:
            print(f"\n{agent_name} proposed tool calls (Turn 1):")
            for tool_call in assistant_message_turn1.tool_calls:
                print(f" Tool Name: {tool_call.function.name}")
                print(f" Tool Arguments (JSON string): {tool_call.function.arguments}")
                tool_output_content = None
                # Check if the proposed tool exists in our local executor mapping.
                if tool_call.function.name in tool_executor:
                    try:
                        args = json.loads(tool_call.function.arguments)
                        # Execute the local mock function based on its name.
                        tool_output = tool_executor[tool_call.function.name](**args)
                        tool_output_content = json.dumps(tool_output)
                        print(f" [DEBUG] Local MOCK {tool_call.function.name} executed. Output: {tool_output}")
                    except json.JSONDecodeError as e:
                        print(f" [ERROR] Failed to parse tool arguments for {tool_call.function.name}: {e}")
                        tool_output_content = json.dumps({"error": f"Failed to parse arguments: {e}"})
                    except Exception as e:
                        print(f" [ERROR] Error executing local mock {tool_call.function.name}: {e}")
                        tool_output_content = json.dumps({"error": f"Tool execution failed: {e}"})
                else:
                    print(f" [DEBUG] Unhandled tool call: {tool_call.function.name}")
                    tool_output_content = json.dumps({"error": "Tool not handled by client-side executo"})

                # Add the tool output message to the conversation history.
                # This is crucial for the model to "see" the result of the tool call.
                conversation_history.append(
                    {
                        "role": "tool",
                        "name": tool_call.function.name,
                        "content": tool_output_content,
                        "tool_call_id": tool_call.id # Links the tool output to the specific call reque
                    }
                )
                print(f" [DEBUG] Tool output for '{tool_call.function.name}' added to history.")

            # Second turn: Send the conversation history (including tool outputs) back to the model.
            # The model will then generate a final response based on the tool's output.
            print(f"\n [DEBUG] Sending conversation history with tool outputs back for final response f")
            final_response = client.chat.complete(
                model=agent_to_test.model,
                messages=conversation_history,
                tools=api_call_tools_list, # Tools must be provided in all calls if they are part of the
            )
            final_assistant_message = final_response.choices[0].message
            print(f"\n{agent_name}'s Final Response:")
            print(final_assistant_message.content)
            # Add the final assistant response to history (optional for a single-turn demo, but good practice)
            conversation_history.append(final_assistant_message.model_dump() if hasattr(final_assistant_message, 'model_dump') else final_assistant_message)
        else:
            # If no tool calls were proposed in the first turn, print the direct response.
            print(f"\n{agent_name}'s initial response (no tool calls proposed):")
            print(assistant_message_turn1.content)
    except Exception as e:
        print(f"\nAn error occurred during {agent_name} interaction: {e}")
        print("Please check your API key, model availability, network connection, or SDK version.")
        print("If you continue to experience errors, a complete restart of your Python environment (e.g., Colab runtime restart) might be necessary.")

print("\n--- All test cases execution complete. ---")


Mounted at /content/gdrive
Creating AI agents for Genomics Domain...
Nucleic Acid Extraction Agent 'nucleic-acid-extraction-agent' created with ID: ag_0197c7340df774cb9e076c85f33a0d19
Sequencing Data Acquisition Agent 'sequencing-data-acquisition-agent' created with ID: ag_0197c7340edf736885bad5765a6ebe3a
Read Alignment & Variant Calling Agent 'read-alignment-variant-calling-agent' created with ID: ag_0197c7340fca7673b31f816b96f23180
Gene Annotation & Functional Prediction Agent 'gene-annotation-prediction-agent' created with ID: ag_0197c73410ab75da95a3be98f26db073
Genomics Data Repository Agent 'genomics-data-repo-agent' created with ID: ag_0197c73411a77499a993e03e79d6f370
Genomics Bioinformatics Analysis Agent 'genomics-bioinformatics-analysis-agent' created with ID: ag_0197c734127d77508109357942fb7c37
Sequencing Quality Control Agent 'sequencing-qc-agent' created with ID: ag_0197c734135f7361b4f3e63eb628f0f5
Genomics Experimental Design Agent 'genomics-experimental-design-agent' crea