In [1]:
import sys
import os
import pandas as pd
parent_dir = os.path.dirname(os.getcwd())  # project root
sys.path.insert(0, parent_dir)

import nanobioagent as nba
from nanobioagent.evaluation.data_utils import get_data_from_results, get_question_text_from_idx, list_available_results

In [2]:
experiment = "nba"
model = 'microsoft/phi-4-mini-flash-reasoning'
method='genegpt'
file_suffix="_details_updated.csv"

result_filename = model.replace('/', '_') + "_" + method + file_suffix
result_dir = f"results/{experiment}/evaluation_reports"

if False: # use the latest result file instead
    idx_in_list = -1 # last one
    result_filename = list_available_results(result_dir, file_suffix="_details_updated.csv", sort_by="mtime")[idx_in_list]

print(f"Using result file: {result_filename}")

Using result file: microsoft_phi-4-mini-flash-reasoning_genegpt_details_updated.csv


In [3]:
# Get all failed cases 
failed_cases = get_data_from_results(result_file=os.path.join(result_dir, result_filename),
    filter_score="<0.5",
    filter_success=["No"],
    dataType="idx,question,task",
    return_format="df"
)
sample_per_task = (failed_cases.groupby('task').first().reset_index().sort_values('idx'))
reordered = sample_per_task[['idx', 'question', 'task']]
# Pretty print the DataFrame
print(f"Found {len(failed_cases)} failed cases:")
print("="*80)
with pd.option_context('display.max_columns', None, 'display.width', None, 'display.max_colwidth', 40):
    print(reordered.head(10))

Found 382 failed cases:
   idx                                 question                        task
1  0|1  What is the official gene symbol of ...                  Gene alias
2  1|0  What are genes related to Hemolytic ...    Gene disease association
3  2|0  Which chromosome is FAM66D gene loca...               Gene location
5  3|1  Align the DNA sequence to the human ...   Human genome DNA aligment
6  4|0  Which organism does the DNA sequence...  Multi-species DNA aligment
4  5|0  Convert ENSG00000215251 to official ...        Gene name conversion
7  6|0      Is ATP5F1EP2 a protein-coding gene?        Protein-coding genes
0  7|0  Which gene is SNP rs1217074595 assoc...        Gene SNP association
8  8|0  Which chromosome does SNP rs14304648...                SNP location


In [4]:
# pick from the fist question of the task group q_i
q_i = 2
question = sample_per_task.iloc[q_i]['question']
# .. or pick from the original data json
q_idx = "2|0"
question= get_question_text_from_idx(q_idx, return_metadata=False)
print(question)

Which chromosome is FAM66D gene located on human genome?


In [2]:
q0 = "What is the official gene symbol of RBAP1?"
q0 = "What is the official gene symbol of C20orf195?"
# q0 = "What is the official gene symbol of C6orf186?"
q0 = "What is the official gene symbol of FAM214B?"
q0 = "What is the official gene symbol of hCAP?"
# q0='What is the official gene symbol of SGOL1?'

q1 = "What are genes related to Trichoepithelioma?"
q1 = "What are genes related to Bile acid malabsorption?"
q1 = "What are genes related to Brody myopathy?"
# q1 = "What are genes related to Distal renal tubular acidosis?"
q1 = "What are genes related to Glycine N-methyltransferase deficiency?"
q2 = "Which chromosome is TTTY7 gene located on human genome?"
q2 = "Which chromosome is ZNF574 gene located on human genome?"

q3 = "Align the DNA sequence to the human genome:GCATGGCCAACATGGCAAAACACTGTCTCCATTAAAAATACAAAAAAAATTAGCTGGGCGTGGTGGTGCACATCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAGGGGAATCACTTGAACCCGGGAGGCG"
q3 = "Align the DNA sequence to the human genome:CAGCTAACAGAGTGGATCCTTTCTTTTTACAGAGCAGCTTTGAAACTCTATTTCTGTGGATTCTGCAAATTGATATTTGGGTTGATTTAACGATATCGATGGAAAAGGGAATATCTT"
q3 = "Align the DNA sequence to the human genome:GTGTTGAACAGTCCCTTTCATAGAGCAGGTTTGAAACACTCTTTTTGTAGTATCTGGAAGTGGACATTAGGAACGCTCTCAGGACTGCGTTGAAAAAGGAAATATCTTCCAATAAAAGCTAGATAG"
q3 = "Align the DNA sequence to the human genome:AGGCCCTCACCTGGAAATTACTTACTCATGCTTCATGACCCAGTTCAAATTTTGTCACCTCTGTGAAACCTTCCCTGGGCCCCGTTGATCTCCTTGAAGGCA"

q4 = "Which organism does the DNA sequence come from:CGTACACCATTGGTGCCAGTGACTGTGGTCAATTCGGTAGAAGTAGAGGTAAAAGTGCTGTTCCATGGCTCAGTTGTAGTTATGATGGTGCTAGCAGTTGTTGGAGTTCTGATGACAATGACGGTTTCGTCAGTTG"
q4 = "Which organism does the DNA sequence come from:GTTGATGTCAGCTCTCTACAGTTCATGACTGGACACACACACATAGCCCGTTTCATTAAAGAAATAGAATCCTAACAATGACATCATTGTAGAAGCTCCTGGGATGACAG"
q5 = "Convert ENSG00000240694 to official gene symbol."
q5 = "Convert ENSG00000205403 to official gene symbol."
q5 = "Convert ENSG00000177051 to official gene symbol."

q6 = "Is ATP5F1EP2 a protein-coding gene?" # q6 = "Is NODAL a protein-coding gene?"
q6 = "Is LOC124907753 a protein-coding gene?"
q6 = "Is MIR4436B2 a protein-coding gene?"
q7 = "Which gene is SNP rs1241371358 associated with?"
q8 = "Which chromosome does SNP rs397784008 locate on human genome?"

q9 = "Find critical bases in this enhancer sequence: ATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCGATCG"
q10 = "What splicing impact does SNP rs7903146 have?"
q11 = "Compare BRCA1 expression in breast vs ovary vs brain"
q11 = "Expression of TP53 across heart, liver, and lung tissues"
#q11 = "How is EGFR expressed in different tissues like brain and liver?"


In [None]:

model = 'mistralai/mamba-codestral-7b-v0.1'
method='agent' # 'genegpt'
question = 'Convert ENSG00000291317 to official gene symbol.'
result = nba.gene_answer(question, method=method, model_name=model)
print(result)

['Convert ENSG00000291317 to official gene symbol.', 'TMEM276', 'TMEM276', ['Stage 1: Task Classification', 'Task classified as: gene_alias', 'Stage 2: Plan Retrieval', 'Plan retrieved: plan_esearch_efetch', 'Stage 3: Plan Execution', 'Plan of 4 steps: infer_parameters_from_task --> get_esearch_idlist_from_param --> get_efetch_doc_from_idlist --> parse_answer_from_doc', "infer_parameters_from_task: Using few-shot with 3 examples for task type 'gene_alias'", "infer_parameters_from_task: Extracted parameters: {'database': 'gene', 'search_term': 'ENSG00000291317', 'retmax': '5', 'retmode': 'json', 'orgn': 'homo sapiens'}", 'get_esearch_idlist_from_param: URL called: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gene&retmax=5&retmode=json&sort=relevance&term=ENSG00000291317%20NOT%20discontinued[prop]%20AND%20Homo%20sapiens[orgn]', 'get_esearch_idlist_from_param: Found 1 IDs', 'get_efetch_doc_from_idlist: URL called: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?

In [26]:
if result[1]==result[2]:
    print("Correct answer!!:", result[1])
result

Correct answer!!: TMEM276


['Convert ENSG00000291317 to official gene symbol.',
 'TMEM276',
 'TMEM276',
 ['Stage 1: Task Classification',
  'Task classified as: gene_alias',
  'Stage 2: Plan Retrieval',
  'Plan retrieved: plan_esearch_efetch',
  'Stage 3: Plan Execution',
  'Plan of 4 steps: infer_parameters_from_task --> get_esearch_idlist_from_param --> get_efetch_doc_from_idlist --> parse_answer_from_doc',
  "infer_parameters_from_task: Using few-shot with 3 examples for task type 'gene_alias'",
  "infer_parameters_from_task: Extracted parameters: {'database': 'gene', 'search_term': 'ENSG00000291317', 'retmax': '5', 'retmode': 'json', 'orgn': 'homo sapiens'}",
  'get_esearch_idlist_from_param: URL called: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gene&retmax=5&retmode=json&sort=relevance&term=ENSG00000291317%20NOT%20discontinued[prop]%20AND%20Homo%20sapiens[orgn]',
  'get_esearch_idlist_from_param: Found 1 IDs',
  'get_efetch_doc_from_idlist: URL called: https://eutils.ncbi.nlm.nih.gov/ent

In [18]:
from tests.test_model_context_token_info import get_all_model_names
test_models = get_all_model_names(num_parameters_b_max=10,num_parameters_b_min=4)
print(test_models)

Loading model names from: c:\Users\ssg_h\dev\nanobioagent\nanobioagent\config\model_config.json
Found 130 models in config file
Applied filters: params >= 4B, params <= 10B
Filtered to 35 models
['gemini-1.5-flash-8b', 'nvidia/llama-3.1-nemotron-nano-4b-v1.1', 'nvidia/llama-3.1-nemotron-nano-8b-v1', 'meta/llama-3.1-8b-instruct', 'google/gemma-2-9b-it', 'google/codegemma-7b', 'deepseek-ai/deepseek-r1-distill-qwen-7b', 'deepseek-ai/deepseek-r1-distill-llama-8b', 'ibm/granite-3.3-8b-instruct', 'qwen/qwen2.5-coder-7b-instruct', 'qwen/qwen2.5-7b-instruct', 'nvidia/nvidia-nemotron-nano-9b-v2', 'mistralai/mistral-7b-instruct-v0.3', 'microsoft/phi-3-small-128k-instruct', 'tiiuae/falcon3-7b-instruct', 'ollama/meditron:7b', 'ollama/llama3:8b', 'ollama/llama2:latest', 'ollama/codellama:7b-instruct', 'ollama/qwen3:8b', 'ollama/qwen2.5-coder-7b-instruct', 'huggingface/Qwen/Qwen3-4B-Instruct-2507', 'huggingface/Qwen/Qwen2.5-Coder-7B-Instruct', 'huggingface/Qwen/Qwen3-8B', 'huggingface/Qwen/Qwen3-4B'

In [32]:
question = 'Align the DNA sequence to the human genome:GCATGGCCAACATGGCAAAACACTGTCTCCATTAAAAATACAAAAAAAATTAGCTGGGCGTGGTGGTGCACATCTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAGGGGAATCACTTGAACCCGGGAGGCG'
list_model_name = ["huggingface/Qwen/Qwen2.5-Coder-7B-Instruct", "qwen/qwen2.5-coder-7b-instruct"]
# list_model_name = test_models
# list_method=["code", "agent"]
list_method=["code"]
results = nba.gene_compare(question=question, list_model_name=list_model_name, list_method=list_method)
df_results = pd.DataFrame(results)

Running model combinations:   0%|          | 0/2 [00:00<?, ?it/s]

In [33]:
print(df_results)
df_results.to_csv("zzz_results_example_test.csv", index=False)

                                   model_name    method  \
0                                         N/A  retrieve   
1  huggingface/Qwen/Qwen2.5-Coder-7B-Instruct      code   
2              qwen/qwen2.5-coder-7b-instruct      code   

                    answer  elapsed_time  
0  chr19:32322319-32322449          1.26  
1  chr19:32753225-32753355          3.32  
2  chr19:32753225-32753355          2.52  


In [2]:
# toughest questions 
list_q0 = [
    "What is the official gene symbol of PCPB?",
    "What is the official gene symbol of GalNAc-T4?",
    "What is the official gene symbol of PTH1?",
    "What is the official gene symbol of TFA?",
    "What is the official gene symbol of FAM214B?",
    "What is the official gene symbol of GCS1?"
]

list_q1 = [
    "What are genes related to B-cell immunodeficiency?",
    "What are genes related to Vascular malformation?",
    "What are genes related to Type diabetes mellitus?",
    "What are genes related to Gastrointestinal defects and immunodeficiency syndrome?"
]

In [16]:
list_result = []
for question in list_q1:
    print(f"Q: {question}")
    result = nba.gene_answer(question, method='agent', model_name='huggingface/microsoft/phi-4')
    list_result.append(result)
    print(f"A1: {result[1]}")
    print(f"A2: {result[2]}")
    print("="*40)

Q: What are genes related to B-cell immunodeficiency?
A1: TOP2B
A2: Error: Error code: 400 - {'error': {'message': "The requested model 'microsoft/phi-4' is not supported by any provider you have enabled.", 'type': 'invalid_request_error', 'param': 'model', 'code': 'model_not_supported'}}
Q: What are genes related to Vascular malformation?
A1: ELMO2
A2: Error: Error code: 400 - {'error': {'message': "The requested model 'microsoft/phi-4' is not supported by any provider you have enabled.", 'type': 'invalid_request_error', 'param': 'model', 'code': 'model_not_supported'}}
Q: What are genes related to Type diabetes mellitus?
A1: HNF1B, IL6, GPD2, HMGA1, IRS1, NEUROD1, IL6
A2: Error: Error code: 400 - {'error': {'message': "The requested model 'microsoft/phi-4' is not supported by any provider you have enabled.", 'type': 'invalid_request_error', 'param': 'model', 'code': 'model_not_supported'}}
Q: What are genes related to Gastrointestinal defects and immunodeficiency syndrome?
A1: PI4KA,

In [5]:
list_result

[['What are genes related to B-cell immunodeficiency?',
  'TOP2B',
  '',
  ['Stage 1: Task Classification',
   'Task classified as: gene_disease_association',
   'Stage 2: Plan Retrieval',
   'Plan retrieved: plan_esearch_esummary',
   'Stage 3: Plan Execution',
   'Plan of 4 steps: infer_parameters_from_task --> get_esearch_idlist_from_param --> get_esummary_doc_from_idlist --> parse_answer_from_doc',
   "infer_parameters_from_task: Using few-shot with 1 examples for task type 'gene_disease_association'",
   "infer_parameters_from_task: Extracted parameters: {'database': 'omim', 'search_term': 'B-cell+immunodeficiency', 'retmax': '20', 'retmode': 'json'}",
   'get_esearch_idlist_from_param: URL called: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=omim&retmax=20&retmode=json&sort=relevance&term=B-cell+immunodeficiency',
   'get_esearch_idlist_from_param: Found 20 IDs',
   'get_esummary_doc_from_idlist: URL called: https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.

In [13]:
import os

def add_prefix_bulk(folder, filename, prefix="zzz_", 
                    folder_filter="_agent", folder_exclude_filter="zzz",
                    case_sensitive=True):
    """
    Traverse `folder` and all subfolders.
    - Only consider folders whose path contains `folder_filter`.
    - Skip any folder whose path contains `folder_exclude_filter`.
    - If such a folder has a file matching `filename`, rename it by
      adding `prefix` (unless it already starts with the prefix).

    Returns
    -------
    changed_folders : list of str
        List of folder names (last path component) where renaming occurred.
    """
    changed_folders = []

    for root, _, files in os.walk(folder):
        root_check = root if case_sensitive else root.lower()
        filter_check = folder_filter if case_sensitive else folder_filter.lower()
        exclude_check = folder_exclude_filter if case_sensitive else folder_exclude_filter.lower()

        # Skip excluded folders
        if exclude_check in root_check:
            continue

        # Process only matching folders
        if filter_check in root_check:
            for f in files:
                if f == filename and not f.startswith(prefix):
                    old_path = os.path.join(root, f)
                    new_path = os.path.join(root, prefix + f)
                    os.rename(old_path, new_path)
                    folder_name = os.path.basename(root)  # just the folder name
                    if folder_name not in changed_folders:
                        changed_folders.append(folder_name)
                    print(f"Renamed: {old_path} → {new_path}")

    return changed_folders





In [None]:
# changed = add_prefix_bulk("C:/Users/ssg_h/dev/nanobioagent/results/nba", "Gene disease association.json", prefix="zzzz_", folder_filter="_agent", case_sensitive=False)

Renamed: C:/Users/ssg_h/dev/nanobioagent/results/nba\claude-3-5-haiku-20241022_agent\Gene disease association.json → C:/Users/ssg_h/dev/nanobioagent/results/nba\claude-3-5-haiku-20241022_agent\zzzz_Gene disease association.json
Renamed: C:/Users/ssg_h/dev/nanobioagent/results/nba\claude-3-7-sonnet-20250219_agent\Gene disease association.json → C:/Users/ssg_h/dev/nanobioagent/results/nba\claude-3-7-sonnet-20250219_agent\zzzz_Gene disease association.json
Renamed: C:/Users/ssg_h/dev/nanobioagent/results/nba\claude-sonnet-4-20250514_agent\Gene disease association.json → C:/Users/ssg_h/dev/nanobioagent/results/nba\claude-sonnet-4-20250514_agent\zzzz_Gene disease association.json
Renamed: C:/Users/ssg_h/dev/nanobioagent/results/nba\gemini-1.5-flash-8b_agent\Gene disease association.json → C:/Users/ssg_h/dev/nanobioagent/results/nba\gemini-1.5-flash-8b_agent\zzzz_Gene disease association.json
Renamed: C:/Users/ssg_h/dev/nanobioagent/results/nba\gemini-1.5-flash_agent\Gene disease association

In [15]:
changed

['claude-3-5-haiku-20241022_agent',
 'claude-3-7-sonnet-20250219_agent',
 'claude-sonnet-4-20250514_agent',
 'gemini-1.5-flash-8b_agent',
 'gemini-1.5-flash_agent',
 'gemini-2.0-flash_agent',
 'gemini-2.5-flash-lite_agent',
 'google_gemma-2-2b-it_agent',
 'google_gemma-2-9b-it_agent',
 'google_gemma-3-1b-it_agent',
 'google_gemma-3-27b-it_agent',
 'gpt-3.5-turbo_agent',
 'gpt-4.1-mini_agent',
 'gpt-4.1-nano_agent',
 'gpt-4.1_agent',
 'gpt-4o-mini_agent',
 'gpt-4o_agent',
 'gpt-5-nano_agent',
 'huggingface_arcee-ai_AFM-4.5B_agent',
 'huggingface_HuggingFaceTB_SmolLM3-3B_agent',
 'huggingface_meta-llama_Llama-3.2-1B-Instruct_agent',
 'huggingface_microsoft_phi-4_agent',
 'huggingface_moonshotai_Kimi-K2-Instruct-0905_agent',
 'huggingface_openai_gpt-oss-120b_agent',
 'huggingface_openai_gpt-oss-20b_agent',
 'huggingface_Qwen_Qwen2.5-Coder-3B-Instruct_agent',
 'ibm_granite-3.3-8b-instruct_agent',
 'meta_llama-3.1-405b-instruct_agent',
 'meta_llama-3.1-8b-instruct_agent',
 'meta_llama-3.2-1

In [45]:
import pandas as pd
import glob
import os
from pathlib import Path

def summarize_evaluation_reports_details(evaluation_reports_dir, filter_pattern="*details*.csv", output_filename="evaluation_summary.csv"):
    """
    Summarize evaluation detail files by calculating averages, sums, and counts across specified columns.
    
    Args:
        evaluation_reports_dir (str): Path to the evaluation_reports directory
        filter_pattern (str): Glob pattern to filter files (default: "*details*.csv")
        output_filename (str): Name for the output CSV file (saved in current directory)
    
    Returns:
        pd.DataFrame: Summary table with aggregated metrics
    """
    
    # Find all matching CSV files
    search_pattern = os.path.join(evaluation_reports_dir, filter_pattern)
    csv_files = glob.glob(search_pattern)
    
    if not csv_files:
        print(f"No files found matching pattern: {search_pattern}")
        return None
    
    print(f"Found {len(csv_files)} files matching pattern")
    
    # Define columns to aggregate and their operations
    avg_columns = ['score', 'task_classification_score']
    sum_columns = ['elapsed_time', 'total_tokens', 'total_cost', 'input_tokens', 'output_tokens']
    count_columns = ['question']  # Will count non-excluded rows
    
    summary_data = []
    
    for csv_file in csv_files:
        try:
            # Read the CSV file
            df = pd.read_csv(csv_file)
            
            # Filter out EXCLUDED rows
            df_filtered = df[df['score'] != 'EXCLUDED'].copy()
            
            if df_filtered.empty:
                print(f"Warning: No non-excluded rows in {os.path.basename(csv_file)}")
                continue
            
            # Convert numeric columns, handling empty strings and non-numeric values
            for col in avg_columns + sum_columns:
                if col in df_filtered.columns:
                    # Replace empty strings with NaN, then convert to numeric
                    df_filtered[col] = pd.to_numeric(df_filtered[col].replace('', pd.NA), errors='coerce')
            
            # Calculate aggregations
            row_data = {'filename': os.path.basename(csv_file)}
            
            # Average columns
            for col in avg_columns:
                if col in df_filtered.columns:
                    avg_val = df_filtered[col].mean()
                    row_data[f'average_{col}'] = avg_val if pd.notna(avg_val) else 0
                else:
                    row_data[f'average_{col}'] = 0
            
            # Sum columns
            for col in sum_columns:
                if col in df_filtered.columns:
                    sum_val = df_filtered[col].sum()
                    row_data[f'sum_{col}'] = sum_val if pd.notna(sum_val) else 0
                else:
                    row_data[f'sum_{col}'] = 0
            
            # Count columns (count of non-excluded questions)
            for col in count_columns:
                if col in df_filtered.columns:
                    row_data[f'count_{col}'] = len(df_filtered)
                else:
                    row_data[f'count_{col}'] = 0
            
            summary_data.append(row_data)
            print(f"Processed: {os.path.basename(csv_file)} ({len(df_filtered)} valid rows)")
            
        except Exception as e:
            print(f"Error processing {csv_file}: {str(e)}")
            continue
    
    if not summary_data:
        print("No data could be processed from the files")
        return None
    
    # Create summary DataFrame
    summary_df = pd.DataFrame(summary_data)
    
    # Reorder columns to have filename first, then aggregated metrics
    cols = ['filename']
    cols.extend([f'average_{col}' for col in avg_columns])
    cols.extend([f'sum_{col}' for col in sum_columns])
    cols.extend([f'count_{col}' for col in count_columns])
    
    summary_df = summary_df[cols]
    
    # Save to CSV in current directory
    output_path = os.path.join(os.getcwd(), output_filename)
    summary_df.to_csv(output_path, index=False, float_format='%.6f')
    
    print(f"\nSummary saved to: {output_path}")
    print(f"Summary shape: {summary_df.shape}")
    
    # Display the summary
    print("\nSUMMARY TABLE:")
    print("=" * 100)
    
    # Format display with better column width
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', 30)
    
    print(summary_df.to_string(index=False, float_format='%.6f'))
    
    return summary_df


In [46]:
summary = summarize_evaluation_reports_details(
        evaluation_reports_dir="../results/nba_finals/evaluation_reports",
        filter_pattern="*_agent_details_updated*.csv",
        output_filename="agent_updated_summary.csv"
)

Found 50 files matching pattern
Processed: claude-3-5-haiku-20241022_agent_details_updated.csv (441 valid rows)
Processed: claude-3-7-sonnet-20250219_agent_details_updated.csv (441 valid rows)
Processed: claude-opus-4-1-20250805_agent_details_updated.csv (441 valid rows)
Processed: claude-sonnet-4-20250514_agent_details_updated.csv (441 valid rows)
Processed: gemini-1.5-flash-8b_agent_details_updated.csv (441 valid rows)
Processed: gemini-1.5-flash_agent_details_updated.csv (441 valid rows)
Processed: gemini-2.0-flash_agent_details_updated.csv (441 valid rows)
Processed: gemini-2.5-flash-lite_agent_details_updated.csv (441 valid rows)
Processed: google_gemma-2-2b-it_agent_details_updated.csv (441 valid rows)
Processed: google_gemma-2-9b-it_agent_details_updated.csv (441 valid rows)
Processed: google_gemma-3-1b-it_agent_details_updated.csv (441 valid rows)
Processed: google_gemma-3-27b-it_agent_details_updated.csv (441 valid rows)
Processed: gpt-3.5-turbo_agent_details_updated.csv (441 