# Notebook for creating agents programatically 

### set up parent directory 

In [1]:
import sys
import os

# Add the parent directory of the current script to the Python path
cwd = os.getcwd()
dirname = os.path.dirname(cwd)
print(cwd)
print(dirname)
sys.path.append(dirname)

print(sys.path)

/Users/idekeradmin/Dropbox/GitHub/agent_evaluation/notebooks
/Users/idekeradmin/Dropbox/GitHub/agent_evaluation
['/Users/idekeradmin/Dropbox/GitHub/agent_evaluation/notebooks', '/opt/anaconda3/lib/python311.zip', '/opt/anaconda3/lib/python3.11', '/opt/anaconda3/lib/python3.11/lib-dynload', '', '/Users/idekeradmin/.local/lib/python3.11/site-packages', '/opt/anaconda3/lib/python3.11/site-packages', '/opt/anaconda3/lib/python3.11/site-packages/aeosa', '/Users/idekeradmin/Dropbox/GitHub/agent_evaluation']


### Load database 

- make sure there is a ~/ae_config/config.ini file for all the configs, and ~/ae_database/ae_database.db

In [2]:
from models.analysis_plan import AnalysisPlan
from services.analysisrunner import AnalysisRunner
from app.sqlite_database import SqliteDatabase
from app.config import load_database_config
%reload_ext autoreload
%autoreload 2

# Load the db connection details
# db_type, uri, user, password = load_database_config(path='~/ae_config/test_config.ini')
# self.db = Database(uri, db_type, user, password)

_, database_uri, _, _ = load_database_config()
db = SqliteDatabase(database_uri)

### Get the available LLMs

In [3]:
llm_specs = db.find("llm")
llm_mappings = {}
for llm_spec in llm_specs:
    llm_id = llm_spec["object_id"]
    llm_properties = llm_spec["properties"]
    llm_name = llm_properties["name"]
    llm_mappings[llm_name] = llm_id

llm_mappings

{'Levar the Llama - 3.1-70b-versitile': 'llm_a38b5b94-14d6-4216-b166-6f1515faa3a1',
 'Holly the Haiku 20240307': 'llm_23a5c740-4d9b-4839-8c0a-7fb6ccae1ff1',
 'Claudia the claude-3-5-sonnet-20240620': 'llm_bb90988c-fa06-4a3a-99cc-81608fa8d1d7',
 'Gary the GPT-4o-mini 2024-07-08': 'llm_09620189-3486-4118-941c-7c674bc25657',
 'Maxine the mixtral-8x7b-32768': 'llm_6e44f8e2-5089-40b1-ad37-db18f31f2b39',
 'Georgia the gpt-4o-2024-05-13': 'llm_dc39023b-77bf-4d2f-a6a0-829ce9c3655d'}

In [4]:
cwd = os.getcwd()
dirname = os.path.dirname(cwd)

prompt_directory = (os.path.join(dirname, "prompts"))

def read_text_files(directory):
    file_contents = {}
    
    # Iterate through all files in the given directory
    for filename in os.listdir(directory):
        # Check if the file is a text file
        if filename.endswith('.txt'):
            # Construct the full file path
            file_path = os.path.join(directory, filename)
            
            # Get the filename without extension
            file_key = os.path.splitext(filename)[0]
            
            # Read the contents of the file
            try:
                with open(file_path, 'r', encoding='utf-8') as file:
                    content = file.read()
                    file_contents[file_key] = content
            except IOError as e:
                print(f"Error reading file {filename}: {e}")
    
    return file_contents

prompts = read_text_files(prompt_directory)

for name, text in prompts.items():
    print(name)

analyst_brief_0814
context_gsr_1st_yr_ml_0814
reviewer_reflect_0814
context_gsr_0814
reviewer_0814
reviewer_cot_reflect_0814
analyst_cot_brief_0814
reviewer_cot_0814
context_pi_0814
analyst_contrary_kg_brief_0814
context_cell_editor
analyst_minimal_0814
reviewer_cot_reflect_novelty_0814
analyst_kg_brief_0814


### Create Analyst Agents

In [15]:
from models.agent import Agent

llm_claude_3_5 = llm_mappings["Claudia the claude-3-5-sonnet-20240620"]

analyst_specs = {
    "C. Marvin":{
        "llm_id": llm_claude_3_5,
        "prompt_template": prompts["analyst_contrary_kg_brief_0814"],
        "context": prompts["context_gsr_0814"],
        "description": "Contrarian agent with knowledge graph. It provides a kind of guardrail. The expectation would be that it's hypotheses will typically be ranked lowest when the data is reasonably cohesive but should be picked first by the judge ('yes, I agree, there is no signal here') in a control case where the data and gene selection is random."
    },
    "A. Boris":{
        "llm_id": llm_claude_3_5,
        "prompt_template": prompts["analyst_minimal_0814"],
        "context": prompts["context_gsr_0814"],
        "description": "grad student agent with a minimal prompt"
    },
    "A. Alex": {
        "llm_id": llm_claude_3_5,
        "prompt_template": prompts["analyst_brief_0814"],
        "context": prompts["context_gsr_0814"],
        "description": "grad student agent"
    },
    "A. Jane":{
        "llm_id": llm_claude_3_5,
        "prompt_template": prompts["analyst_kg_brief_0814"],
        "context": prompts["context_gsr_0814"],
        "description": "grad student agent constructing a knowledge graph"
    },
    "A. Gloria":{
        "llm_id": llm_claude_3_5,
        "prompt_template": prompts["analyst_cot_brief_0814"],
        "context": prompts["context_gsr_0814"],
        "description": "grad student agent using chain of thought"
    }
}

analysts = {}

for name, spec in analyst_specs.items():
    analysts[name] = Agent.create(db, 
                                  spec["llm_id"], 
                                  spec["context"],
                                  spec["prompt_template"],
                                  name=name,
                                  description=spec.get('description'))
    
analysts

{'C. Marvin': <models.agent.Agent at 0x166c018d0>,
 'A. Boris': <models.agent.Agent at 0x166c01b10>,
 'A. Alex': <models.agent.Agent at 0x166c01d50>,
 'A. Jane': <models.agent.Agent at 0x166c01f50>,
 'A. Gloria': <models.agent.Agent at 0x166c02150>}

### Create the Reviewer Agents

In [6]:

from models.agent import Agent

llm_claude_3_5 = llm_mappings["Claudia the claude-3-5-sonnet-20240620"]

pi_context = prompts["context_pi_0814"]
reviewer_specs = {
    "R. Wei":{
        "llm_id": llm_claude_3_5,
        "prompt_template": prompts["reviewer_cot_reflect_novelty_0814"],
        "context": pi_context,
        "description": "PI reviewer preferring novelty, chain of thought, and reflection"
    },
    "R. Ricardo":{
        "llm_id": llm_claude_3_5,
        "prompt_template": prompts["reviewer_cot_reflect_0814"],
        "context": pi_context,
        "description": "PI reviewer using chain of thought and reflection"
    },
    "R. Frederico": {
        "llm_id": llm_claude_3_5,
        "prompt_template": prompts["reviewer_cot_0814"],
        "context": pi_context,
        "description": "PI reviewer using chain of thought"
    },
    "H. Sophia":{
        "llm_id": llm_claude_3_5,
        "prompt_template": prompts["reviewer_reflect_0814"],
        "context": pi_context,
        "description": "PI reviewer using reflection"
    },
    "H. Michelle":{
        "llm_id": llm_claude_3_5,
        "prompt_template": prompts["reviewer_0814"],
        "context": pi_context,
        "description": "PI reviewer"
    },
    "H. Sanjay":{
        "llm_id": llm_claude_3_5,
        "prompt_template": prompts["reviewer_0814"],
        "context": prompts["context_cell_editor"],
        "description": "Cell reviewer"
    }
}

reviewers = {}

for name, spec in reviewer_specs.items():
    reviewers[name] = Agent.create(db, 
                                  spec["llm_id"], 
                                  spec["context"],
                                  spec["prompt_template"],
                                  name=name,
                                  description=spec.get('description'))
    
reviewers

{'R. Wei': <models.agent.Agent at 0x16ae9bed0>,
 'R. Ricardo': <models.agent.Agent at 0x168086c90>,
 'R. Frederico': <models.agent.Agent at 0x1053494d0>,
 'H. Sophia': <models.agent.Agent at 0x105349310>,
 'H. Michelle': <models.agent.Agent at 0x105349150>,
 'H. Sanjay': <models.agent.Agent at 0x16cb98050>}

### Create a new analysis plan

### Create new analysis run 

In [18]:
'''

analysis_run = analysis_plan.generate_analysis_run(biological_context="Chromatin remodelling and transcriptional regulation (or Transcriptional regulation and chromatin remodelling)")
print(analysis_run.object_id)
print(vars(analysis_run))

runner = AnalysisRunner(db, analysis_run.object_id)
result = runner.run()
print(result)
'''

'\n\nanalysis_run = analysis_plan.generate_analysis_run(biological_context="Chromatin remodelling and transcriptional regulation (or Transcriptional regulation and chromatin remodelling)")\nprint(analysis_run.object_id)\nprint(vars(analysis_run))\n\nrunner = AnalysisRunner(db, analysis_run.object_id)\nresult = runner.run()\nprint(result)\n'

### Print the hypotheses

In [19]:
'''
hypotheses = db.load(analysis_run.object_id)[0]['hypothesis_ids']
hypotheses
'''

"\nhypotheses = db.load(analysis_run.object_id)[0]['hypothesis_ids']\nhypotheses\n"

In [20]:
'''
from models.hypothesis import Hypothesis
%reload_ext autoreload
%autoreload 2

for hypothesis_id in hypotheses:
    hypothesis = Hypothesis.load(db, hypothesis_id)
    # print(hypothesis_id)
    print(hypothesis.object_id)
    print(hypothesis.hypothesis_text)
    print(hypothesis.full_prompt)

'''

'\nfrom models.hypothesis import Hypothesis\n%reload_ext autoreload\n%autoreload 2\n\nfor hypothesis_id in hypotheses:\n    hypothesis = Hypothesis.load(db, hypothesis_id)\n    # print(hypothesis_id)\n    print(hypothesis.object_id)\n    print(hypothesis.hypothesis_text)\n    print(hypothesis.full_prompt)\n\n'