In [1]:
import sys, os
from pathlib import Path

# 添加virtual_lab的src目录到sys.path中
root_dir = str(Path(os.getcwd()).resolve().parent) + '/src' # 获取virtual-lab代码目录位置

if root_dir not in sys.path:
    sys.path.insert(0, root_dir)
    print(f"Add {root_dir} to system path")

#print(os.environ.get("OPENAI_API_KEY")) # 该环境变量从 .env 文件中读取，其设置在settings.json中

Add D:\code-projects\virtual-lab/src to system path


In [None]:
# 测试连接外网的连通性
%pip install requests
import requests
response = requests.get('https://www.google.com')
print(response.status_code)

In [2]:
import concurrent.futures
import json

from virtual_lab.constants import CONSISTENT_TEMPERATURE, CREATIVE_TEMPERATURE 
from virtual_lab.prompts import (
    CODING_RULES,
    REWRITE_PROMPT,
    create_merge_prompt,
)
from virtual_lab.run_meeting_chat import run_meeting # 使用基于chat completion 的大模型接口实现
from virtual_lab.utils import load_summaries

#import importlib
#import social_causality_constants
#importlib.reload(social_causality_constants)

from social_causality_constants import (
    background_prompt,
    social_attribution_prompt,
    num_iterations,
    num_rounds,
    discussions_phase_to_dir,
    principal_investigator,
    scientific_critic
)

## Team selection

In [None]:
# Team selection - prompts
# 这里在prompt中明确了要选择3个Agent进行科研工作，那么针对social causality theory design预计需要：社会科学家、人工智能/工程师、计算社会科学，个数保持与virutua_lab不变
team_selection_agenda = f"""{background_prompt} You need to select a team of three scientists to help you with this project. Please select the team members that you would like to invite to a discussion to design a computation approach to validate whether the responsibility attribution behavior of LLMs aligns with existing social attribution theories. Please list the team members in the following format, using the team member below as an example. You should not include yourself (Principal Investigator) in the list.

Agent(
    title="Principal Investigator",
    expertise="artificial intelligence, computational social science, and social attribution theories",
    goal="perform research in your area of expertise that maximizes the scientific impact of the work",
    role="lead a team of experts to test whether the responsibility attribution behavior of LLMs aligns with existing social attribution theories using machine learning or artificial intelligence methods",
)
"""

print(f"Team selection agenda:\n {team_selection_agenda}")


Team selection agenda:
 You are working on a research project to use machine learning and artificial intelligence methods to design new social attribution theories that could better explain the attribution of responsibility in realworld social events in different scenarios, for example, the Shaver's Responsibility Attribution Model and Malle’s PMoB Attribution Model . In addition, the new developed theories could be an extension of existing theories or combination of multiple existing theories. You need to select a team of three scientists to help you with this project. Please select the team members that you would like to invite to a discussion to design a computation approach to validate whether the responsibility attribution behavior of LLMs aligns with existing social attribution theories. Please list the team members in the following format, using the team member below as an example. You should not include yourself (Principal Investigator) in the list.

Agent(
    title="Principal

In [4]:
# 测试run_meeting函数，已测试通过

run_meeting(meeting_type="individual",
            team_member=principal_investigator,
            agenda=team_selection_agenda,
            save_dir=discussions_phase_to_dir["team_selection"],
            save_name=f"discussion_{1}",
            temperature=CREATIVE_TEMPERATURE)


Running meeting of type individual with agenda: You are working on a research project to use machine learning and artificial intelligence methods to design new social attribution theories that could better explain the attribution of responsibility in realworld social events in different scenarios, for example, the Shaver's Responsibility Attribution Model and Malle’s PMoB Attribution Model . In addition, the new developed theories could be an extension of existing theories or combination of multiple existing theories. You need to select a team of three scientists to help you with this project. Please select the team members that you would like to invite to a discussion to design a computation approach to validate whether the responsibility attribution behavior of LLMs aligns with existing social attribution theories. Please list the team members in the following format, using the team member below as an example. You should not include yourself (Principal Investigator) in the list.

Age

Team:   0%|          | 0/2 [00:04<?, ?it/s]1 [00:00<?, ?it/s]
Rounds (+ Final Round): 100%|██████████| 1/1 [00:04<00:00,  4.41s/it]


Input token count: 303
Output token count: 362
Tool token count: 0
Max token length: 665
Cost: $0.00
Time: 0:23


In [5]:
# Team selection - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=principal_investigator,
            agenda=team_selection_agenda,
            save_dir=discussions_phase_to_dir["team_selection"],
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
        ) for iteration_num in range(num_iterations)
    ])

Running meeting of type individual with agenda: You are working on a research project to use machine learning and artificial intelligence methods to design new social attribution theories that could better explain the attribution of responsibility in realworld social events in different scenarios, for example, the Shaver's Responsibility Attribution Model and Malle’s PMoB Attribution Model . In addition, the new developed theories could be an extension of existing theories or combination of multiple existing theories. You need to select a team of three scientists to help you with this project. Please select the team members that you would like to invite to a discussion to design a computation approach to validate whether the responsibility attribution behavior of LLMs aligns with existing social attribution theories. Please list the team members in the following format, using the team member below as an example. You should not include yourself (Principal Investigator) in the list.

Age

Rounds (+ Final Round):   0%|          | 0/1 [00:00<?, ?it/s]
[A

[A[A



[A[A[A[A


Team:   0%|          | 0/2 [00:04<?, ?it/s]

Rounds (+ Final Round): 100%|██████████| 1/1 [00:04<00:00,  4.73s/it]


Input token count: 303
Output token count: 404
Tool token count: 0
Max token length: 707
Cost: $0.00
Time: 0:05


Team:   0%|          | 0/2 [00:05<?, ?it/s]
Rounds (+ Final Round): 100%|██████████| 1/1 [00:05<00:00,  5.25s/it]


Input token count: 303
Output token count: 378
Tool token count: 0
Max token length: 681
Cost: $0.00
Time: 0:05


Team:   0%|          | 0/2 [00:05<?, ?it/s]


Rounds (+ Final Round): 100%|██████████| 1/1 [00:05<00:00,  5.79s/it]

Input token count: 303
Output token count: 344
Tool token count: 0
Max token length: 647
Cost: $0.00
Time: 0:06





In [6]:
# Team selection - merge
team_selection_summaries = load_summaries(
    discussion_paths=list(discussions_phase_to_dir["team_selection"].glob("discussion_*.json")))
print(f"Number of summaries: {len(team_selection_summaries)}")

team_selection_merge_prompt = create_merge_prompt(agenda=team_selection_agenda)

run_meeting(
    meeting_type="individual",
    team_member=principal_investigator,
    summaries=team_selection_summaries,
    agenda=team_selection_merge_prompt,
    save_dir=discussions_phase_to_dir["team_selection"],
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
)

Number of summaries: 3
Running meeting of type individual with agenda: Please read the summaries of multiple separate meetings about the same agenda. Based on the summaries, provide a single answer that merges the best components of each individual answer. Please use the same format as the individual answers. Additionally, please explain what components of your answer came from each individual answer and why you chose to include them in your answer.

As a reference, here is the agenda from those meetings, which must be addressed here as well:

You are working on a research project to use machine learning and artificial intelligence methods to design new social attribution theories that could better explain the attribution of responsibility in realworld social events in different scenarios, for example, the Shaver's Responsibility Attribution Model and Malle’s PMoB Attribution Model . In addition, the new developed theories could be an extension of existing theories or combination of mu

Team:   0%|          | 0/2 [00:08<?, ?it/s]1 [00:00<?, ?it/s]
Rounds (+ Final Round): 100%|██████████| 1/1 [00:08<00:00,  8.25s/it]

Input token count: 1,557
Output token count: 843
Tool token count: 0
Max token length: 2,400
Cost: $0.01
Time: 0:08





In [6]:
# 在完成团队构建后，从这里导入所有的Agent成员
import importlib
import social_causality_constants
importlib.reload(social_causality_constants)
from social_causality_constants import (
    team_members,
)

## Projects specification

In [3]:
# Project specification - prompts
'''
project_specification_agenda = f"{background_prompt} Please create an antibody/nanobody design approach to solve this problem. Decide whether you will design antibodies or nanobodies. For your choice, decide whether you will design the antibodies/nanobodies de novo or whether you will modify existing antibodies/nanobodies. If modifying existing antibodies/nanobodies, please specify which antibodies/nanobodies to start with as good candidates for targeting the newest variant of the SARS-CoV-2 spike protein. If designing antibodies/nanobodies de novo, please describe how you will propose antibody/nanobody candidates."

project_specification_questions = (
    "Will you design standard antibodies or nanobodies?",
    "Will you design antibodies/nanobodies de novo or will you modify existing antibodies/nanobodies (choose only one)?",
    "If modifying existing antibodies/nanobodies, which precise antibodies/nanobodies will you modify (please list 3-4)?",
    "If designing antibodies/nanobodies de novo, how exactly will you propose antibody/nanobody candidates?",
)'''

#project_specification_agenda = f"{background_prompt} Please develop a social attribution theory design approach to solve this problem. Decide whether you will design a new and better social attribution theory from scratch, modify an existing one or combine multiple existing ones. If extending an existing social attribution theory, please specify which theory to start with as a good candidate for addressing the problem. If combining multiple existing theories, please specify which theories you will combine and how you will integrate them. Make sure that the proposed design approach is scientifically sound. Make sure that the resulting new social attribution theory is highly explainable and is not a computational model, such as a neural network, which can be validated by experiments or simulations."

'''
project_specification_questions = (
    "What are the core problems that your social attribution theory design approach will address?",
    "Will you design a new social attribution theory from scratch, modify an existing one or combine multiple existing ones (choose only one)? ",
    "If modifying or combining existing theories, which precise social attribution theory will you modify or combine (please list 1-2)?",
    "If designing a new social attribution theory from scratch, how exactly will you propose the new theory (e.g., key components, mechanisms, etc.)?",
    "How will you validate the effectiveness of your proposed social attribution theory (e.g., experiments, simulations, etc.)?",
)
'''
# 将PMoB论文内容作为知识背景，从解析的MARKDOWN文件中提取
knowledge_context_text  = open("./papers/Malle-PMoB-theory-maintext.md", "r", encoding="utf-8").read()
related_knowledge_prompt = f"Here is some related knowledge that might be useful for your design: \n {knowledge_context_text}"

project_specification_agenda = f"{background_prompt} Please design a computational approach to solve this problem. Specifically, you will use the latest DeepSeek LLM as an example to validate whether its responsibility attribution behavior aligns with the Malle’s PMoB Attribution Model. To reduce the cost of conducting research, you will avoid human annotations.\n {related_knowledge_prompt}"

project_specification_questions = (
    "What the content of Malle’s PMoB Attribution Model is?",
    "How to design different scenarios to test whether the responsibility attribution behavior of DeepSeek LLM aligns with Malle’s PMoB Attribution Model?",
    "How to extract and identify the responsibility attribution patterns of DeepSeek LLM in these scenarios?",
    "How to compare the responsibility attribution patterns of DeepSeek LLM with the prediction result and the attribution process of Malle’s PMoB Attribution Model?"
)

In [4]:
print(f"Project specification agenda:\n {project_specification_agenda}")

Project specification agenda:
 You are working on a research project which focuses on using machine learning and artificial intelligence methods to test whether the responsibility attribution behavior of LLMs aligns with existing social attribution theories, specifically, Malle’s PMoB Attribution Model, a type of theory of blame. For LLMs, the attribution process of responsibility can be obtained by the chain-of-thought prompting. Please design a computational approach to solve this problem. Specifically, you will use the latest DeepSeek LLM as an example to validate whether its responsibility attribution behavior aligns with the Malle’s PMoB Attribution Model. To reduce the cost of conducting research, you will avoid human annotations.
 Here is some related knowledge that might be useful for your design: 
 # TARGET ARTICLE

# A Theory of Blame

Bertram F. Malle  Department of Cognitive, Linguistic, and Psychological Sciences, Brown University, Providence, Rhode Island

# Steve Gugliel

In [7]:
# Project specification - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=principal_investigator,
            team_members=team_members,
            agenda=project_specification_agenda,
            agenda_questions=project_specification_questions,
            save_dir=discussions_phase_to_dir["project_specification"],
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

Running meeting of type team with agenda: You are working on a research project which focuses on using machine learning and artificial intelligence methods to test whether the responsibility attribution behavior of LLMs aligns with existing social attribution theories, specifically, Malle’s PMoB Attribution Model, a type of theory of blame. For LLMs, the attribution process of responsibility can be obtained by the chain-of-thought prompting. Please design a computational approach to solve this problem. Specifically, you will use the latest DeepSeek LLM as an example to validate whether its responsibility attribution behavior aligns with the Malle’s PMoB Attribution Model. To reduce the cost of conducting research, you will avoid human annotations.
 Here is some related knowledge that might be useful for your design: 
 # TARGET ARTICLE

# A Theory of Blame

Bertram F. Malle  Department of Cognitive, Linguistic, and Psychological Sciences, Brown University, Providence, Rhode Island

# St

Rounds (+ Final Round):   0%|          | 0/4 [00:00<?, ?it/s]
[A

[A[A


[A[A[A



[A[A[A[A



[A[A[A[A


[A[A[A
[A



[A[A[A[A


[A[A[A



[A[A[A[A
[A



[A[A[A[A


[A[A[A
[A



Team: 100%|██████████| 5/5 [00:57<00:00, 11.59s/it]




[A[A[A[A
[A


[A[A[A



[A[A[A[A
Team: 100%|██████████| 5/5 [01:16<00:00, 15.33s/it]
Rounds (+ Final Round):  25%|██▌       | 1/4 [01:16<03:49, 76.64s/it]
[A



[A[A[A[A
[A


Team: 100%|██████████| 5/5 [01:33<00:00, 18.61s/it]


[A[A


[A[A[A



[A[A[A[A



[A[A[A[A


[A[A[A
[A



Team: 100%|██████████| 5/5 [01:00<00:00, 12.01s/it]




[A[A[A[A


[A[A[A
[A



[A[A[A[A


[A[A[A
[A



[A[A[A[A


[A[A[A
Team: 100%|██████████| 5/5 [01:13<00:00, 14.73s/it]
Rounds (+ Final Round):  50%|█████     | 2/4 [02:30<02:29, 74.90s/it]
[A



[A[A[A[A



[A[A[A[A


Team: 100%|██████████| 5/5 [01:14<00:00, 14.98s/it]


[A[A


[A[A[A
[A



Team: 100%|██████████| 5/

Input token count: 684,036
Output token count: 14,231
Tool token count: 0
Max token length: 82,923
Cost: $1.48
Time: 3:19



[A


[A[A[A
[A


[A[A[A
Team: 100%|██████████| 5/5 [01:23<00:00, 16.65s/it]
Rounds (+ Final Round):  75%|███████▌  | 3/4 [03:53<01:18, 78.71s/it]


Team: 100%|██████████| 5/5 [01:09<00:00, 13.99s/it]


[A[A
Team:   0%|          | 0/5 [00:29<?, ?it/s]
Rounds (+ Final Round): 100%|██████████| 4/4 [04:22<00:00, 65.74s/it]


Input token count: 702,899
Output token count: 17,380
Tool token count: 0
Max token length: 86,072
Cost: $1.54
Time: 4:23


Team:   0%|          | 0/5 [00:36<?, ?it/s]


Rounds (+ Final Round): 100%|██████████| 4/4 [04:34<00:00, 68.68s/it]


Input token count: 702,659
Output token count: 16,398
Tool token count: 0
Max token length: 85,090
Cost: $1.54
Time: 4:35


In [8]:
# Project specification - merge
project_specification_summaries = load_summaries(
    discussion_paths=list(discussions_phase_to_dir["project_specification"].glob("discussion_*.json")))
print(f"Number of summaries: {len(project_specification_summaries)}")

project_specification_merge_prompt = create_merge_prompt(
    agenda=project_specification_agenda,
    agenda_questions=project_specification_questions,
)

run_meeting(
    meeting_type="individual",
    team_member=principal_investigator,
    summaries=project_specification_summaries,
    agenda=project_specification_merge_prompt,
    save_dir=discussions_phase_to_dir["project_specification"],
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    num_rounds=num_rounds,
)

Number of summaries: 3
Running meeting of type individual with agenda: Please read the summaries of multiple separate meetings about the same agenda. Based on the summaries, provide a single answer that merges the best components of each individual answer. Please use the same format as the individual answers. Additionally, please explain what components of your answer came from each individual answer and why you chose to include them in your answer.

As a reference, here is the agenda from those meetings, which must be addressed here as well:

You are working on a research project which focuses on using machine learning and artificial intelligence methods to test whether the responsibility attribution behavior of LLMs aligns with existing social attribution theories, specifically, Malle’s PMoB Attribution Model, a type of theory of blame. For LLMs, the attribution process of responsibility can be obtained by the chain-of-thought prompting. Please design a computational approach to solv

Team: 100%|██████████| 2/2 [00:32<00:00, 16.41s/it]<?, ?it/s]
Team: 100%|██████████| 2/2 [00:30<00:00, 15.20s/it]<01:38, 32.82s/it]
Team: 100%|██████████| 2/2 [00:31<00:00, 15.55s/it]<01:02, 31.39s/it]
Team:   0%|          | 0/2 [00:14<?, ?it/s]4 [01:34<00:31, 31.26s/it]
Rounds (+ Final Round): 100%|██████████| 4/4 [01:48<00:00, 27.10s/it]


Input token count: 311,844
Output token count: 10,949
Tool token count: 0
Max token length: 50,802
Cost: $0.71
Time: 1:48


## Tools Selection

In [None]:
# Tools selection - prompts
tools_selection_agenda = f"{background_prompt} {social_attribution_prompt} Now you need to select machine learning and/or computational tools to implement this approach. Please list several tools (5-10) that would be relevant to this problem and how they could be used in the context of this project."

tools_selection_questions = (
    "What machine learning and/or computational tools could be used for this task (list 5-10)?",
    "For each tool, how could it be used for this task?",
    "For each tool, what are the inputs and outputs? ",
    "For this task, how to arrange the tools into a pipeline or workflow?",
)

# 本质上merge.json就是对于context的一种压缩
tools_selection_prior_summaries = load_summaries(
    discussion_paths=[discussions_phase_to_dir["project_specification"] / "merged.json"])
print(f"Number of prior summaries: {len(tools_selection_prior_summaries)}")

In [None]:
# Tools selection - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="team",
            team_lead=principal_investigator,
            team_members=team_members,
            summaries=tools_selection_prior_summaries,
            agenda=tools_selection_agenda,
            agenda_questions=tools_selection_questions,
            save_dir=discussions_phase_to_dir["tools_selection"],
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Tools selection - merge
tools_selection_summaries = load_summaries(
    discussion_paths=list(discussions_phase_to_dir["tools_selection"].glob("discussion_*.json")))
print(f"Number of summaries: {len(tools_selection_summaries)}")

tools_selection_merge_prompt = create_merge_prompt(
    agenda=tools_selection_agenda,
    agenda_questions=tools_selection_questions,
)

run_meeting(
    meeting_type="individual",
    team_member=principal_investigator,
    summaries=tools_selection_summaries,
    agenda=tools_selection_merge_prompt,
    save_dir=discussions_phase_to_dir["tools_selection"],
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
    num_rounds=num_rounds,
)

## Implementation

In [None]:
# Implementation - prompts
implementation_agent_selection_agenda = f"{background_prompt} {social_attribution_prompt} Your team needs to build three components of a nanobody design pipeline: ESM, AlphaFold-Multimer, and Rosetta. For each component, please select the team member who will implement the component. A team member may implement more than one component."

implementation_agent_selection_questions = (
    "Which team member will implement ESM?",
    "Which team member will implement AlphaFold-Multimer?",
    "Which team member will implement Rosetta?",
)

implementation_agent_selection_prior_summaries = load_summaries(
    discussion_paths=[discussions_phase_to_dir["team_selection"] / "merged.json",
                      discussions_phase_to_dir["project_specification"] / "merged.json",
                      discussions_phase_to_dir["tools_selection"] / "merged.json"])
print(f"Number of prior summaries: {len(implementation_agent_selection_prior_summaries)}")

In [None]:
# Implementation - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=principal_investigator,
            summaries=implementation_agent_selection_prior_summaries,
            agenda=implementation_agent_selection_agenda,
            agenda_questions=implementation_agent_selection_questions,
            save_dir=discussions_phase_to_dir["implementation_agent_selection"],
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Implementation - merge
implementation_agent_selection_summaries = load_summaries(
    discussion_paths=list(discussions_phase_to_dir["implementation_agent_selection"].glob("discussion_*.json")))
print(f"Number of summaries: {len(implementation_agent_selection_summaries)}")

implementation_agent_selection_merge_prompt = create_merge_prompt(
    agenda=implementation_agent_selection_agenda,
    agenda_questions=implementation_agent_selection_questions
)

run_meeting(
    meeting_type="individual",
    team_member=principal_investigator,
    summaries=implementation_agent_selection_summaries,
    agenda=implementation_agent_selection_merge_prompt,
    save_dir=discussions_phase_to_dir["implementation_agent_selection"],
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
)

### ESM

In [None]:
# ESM - prompts
esm_agenda = f"{background_prompt} {nanobody_prompt} Now you must use ESM to suggest modifications to an existing antibody. Please write a complete Python script that takes a nanobody sequence as input and uses ESM amino acid log-likelihoods to identify the most promising point mutations by log-likelihood ratio."

In [None]:
# ESM - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=machine_learning_specialist,
            agenda=esm_agenda,
            agenda_rules=CODING_RULES,
            save_dir=discussions_phase_to_dir["esm"],
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# ESM - merge
esm_summaries = load_summaries(discussion_paths=list(discussions_phase_to_dir["esm"].glob("discussion_*.json")))
print(f"Number of summaries: {len(esm_summaries)}")

esm_merge_prompt = create_merge_prompt(
    agenda=esm_agenda,
    agenda_rules=CODING_RULES,
)

run_meeting(
    meeting_type="individual",
    team_member=machine_learning_specialist,
    summaries=esm_summaries,
    agenda=esm_merge_prompt,
    save_dir=discussions_phase_to_dir["esm"],
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
)

### Improve ESM

In [None]:
# Improve ESM - prompts
improve_esm_agenda = f"""You previously wrote a Python script that uses ESM to compute the log-likelihood ratio of point mutations in a nanobody sequence (see summary). {REWRITE_PROMPT}

1. Replace "facebook/esm1b-t33_650M_UR50S" with "facebook/esm1b_t33_650M_UR50S".
2. Run the calculations of the mutant log-likelihoods by iterating through the sequences in batches of 16.
3. Add a progress bar to the batched mutant log-likelihood calculations.
4. Run the mutant log-likelihood calculations on CUDA but with no gradients.
5. Load the nanobody sequence from a user-specified CSV file that has the columns "sequence" and "name". Adapt your code to run the mutant log-likelihood calculations on all sequences in the CSV file one-by-one.
6. For each sequence, save the mutant log-likelihoods to a CSV file with the format "mutated_sequence,position,original_aa,mutated_aa,log_likelihood_ratio". Ask the user for a save directory and then save this CSV file in that directory with the name: <nanbody-name>.csv."""

In [None]:
# Improve ESM - discussion
improve_esm_summaries = load_summaries(discussion_paths=[discussions_phase_to_dir["esm"] / "merged.json"])
print(f"Number of summaries: {len(improve_esm_summaries)}")

run_meeting(
    meeting_type="individual",
    team_member=machine_learning_specialist,
    summaries=improve_esm_summaries,
    agenda=improve_esm_agenda,
    save_dir=discussions_phase_to_dir["esm"],
    save_name="improved",
    temperature=CONSISTENT_TEMPERATURE,
)

### AlphaFold-Multimer

In [None]:
# AlphaFold-Multimer - prompts
alphafold_agenda = f"{background_prompt} {nanobody_prompt} Now you must use AlphaFold-Multimer to predict the structure of a nanobody-antigen complex and evaluate its binding. I will run AlphaFold-Multimer on several nanobody-antigen complexes and you need to process the outputs. Please write a complete Python script that takes as input a directory containing PDB files where each PDB file contains one nanobody-antigen complex predicted by AlphaFold-Multimer and outputs a CSV file containing the AlphaFold-Multimer confidence of each nanobody-antigen complex in terms of the interface pLDDT."

In [None]:
# AlphaFold-Multimer - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=computational_biologist,
            agenda=alphafold_agenda,
            agenda_rules=CODING_RULES,
            save_dir=discussions_phase_to_dir["alphafold"],
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# AlphaFold-Multimer - merge
alphafold_summaries = load_summaries(
    discussion_paths=list(discussions_phase_to_dir["alphafold"].glob("discussion_*.json")))
print(f"Number of summaries: {len(alphafold_summaries)}")

alphafold_merge_prompt = create_merge_prompt(
    agenda=alphafold_agenda,
    agenda_rules=CODING_RULES,
)

run_meeting(
    meeting_type="individual",
    team_member=computational_biologist,
    summaries=alphafold_summaries,
    agenda=alphafold_merge_prompt,
    save_dir=discussions_phase_to_dir["alphafold"],
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
)

### Improve AlphaFold-Multimer

In [None]:
# Improve AlphaFold-Multimer - prompts
improve_alphafold_agenda = f"""You previously wrote a Python script that processes the outputs of AlphaFold-Multimer to calculate the confidence of nanobody-antigen complexes (see summary). {REWRITE_PROMPT}

1. Replace the current imports of Chain and Residue with "from Bio.PDB.Chain import Chain" and "from Bio.PDB.Residue import Residue".
2. Remove the logging setup and simply print any log messages to the console.
3. Replace the parallel processing with sequential processing to avoid getting an "OSError: Too many open files".
4. Change the list of pdb_files to instead get all PDB files in the directory that follow the pattern "**/*unrelaxed_rank_001*.pdb".
5. Change the calculation of average pLDDT to divide by the number of atoms rather than the number of residues.
6. Return and save in the CSV both the number of residues and the number of atoms in the interface.
7. Change the default distance threshold to 4."""

In [None]:
# Improve AlphaFold-Multimer - discussion
improve_alphafold_summaries = load_summaries(discussion_paths=[discussions_phase_to_dir["alphafold"] / "merged.json"])
print(f"Number of summaries: {len(improve_alphafold_summaries)}")

run_meeting(
    meeting_type="individual",
    team_member=computational_biologist,
    summaries=improve_alphafold_summaries,
    agenda=improve_alphafold_agenda,
    save_dir=discussions_phase_to_dir["alphafold"],
    save_name="improved",
    temperature=CONSISTENT_TEMPERATURE,
)

### Rosetta

In [None]:
# Rosetta - prompts
rosetta_agenda = f"{background_prompt} {nanobody_prompt} Now you must use Rosetta to calculate the binding energy of nanobody-antigen complexes. You must do this in three parts. First, write a complete RosettaScripts XML file that calculates the binding energy of a nanobody-antigen complex as provided in PDB format, including any necessary preprocessing steps for the complex. Second, write an example command that uses Rosetta to run this RosettaScripts XML file on a given PDB file to calculate the binding energy and save it to a score file. Third, write a complete Python script that takes as input a directory with multiple Rosetta binding energy score files and outputs a single CSV file with the names and scores of each of the individual files in sorted order (highest to lowest score)."

In [None]:
# Rosetta - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=computational_biologist,
            agenda=rosetta_agenda,
            agenda_rules=CODING_RULES,
            save_dir=discussions_phase_to_dir["rosetta"],
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
            num_rounds=num_rounds,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Rosetta - merge
rosetta_summaries = load_summaries(discussion_paths=list(discussions_phase_to_dir["rosetta"].glob("discussion_*.json")))
print(f"Number of summaries: {len(rosetta_summaries)}")

rosetta_merge_prompt = create_merge_prompt(
    agenda=rosetta_agenda,
    agenda_rules=CODING_RULES,
)

run_meeting(
    meeting_type="individual",
    team_member=computational_biologist,
    summaries=rosetta_summaries,
    agenda=rosetta_merge_prompt,
    save_dir=discussions_phase_to_dir["rosetta"],
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
)

### Improve Rosetta

In [None]:
# Improve Rosetta XML - prompts
improve_rosetta_xml_agenda = f"""You previously wrote a RosettaScripts XML file to calculate the binding affinity of a nanobody-antigen complex (see summary). {REWRITE_PROMPT}

1. Replace "ref15.wts" with "ref2015.wts".
2. Remove the InterfaceEnergy filter since it is not valid in Rosetta.
3. Replace the entire output tag (including any nested tags) with <OUTPUT scorefxn="ref15"/>."""

In [None]:
# Improve Rosetta XML - discussion
improve_rosetta_xml_summaries = load_summaries(discussion_paths=[discussions_phase_to_dir["rosetta"] / "merged.json"])
print(f"Number of summaries: {len(improve_rosetta_xml_summaries)}")

run_meeting(
    meeting_type="individual",
    team_member=computational_biologist,
    summaries=improve_rosetta_xml_summaries,
    agenda=improve_rosetta_xml_agenda,
    save_dir=discussions_phase_to_dir["rosetta"],
    save_name="improved_xml",
    temperature=CONSISTENT_TEMPERATURE,
)

In [None]:
# Improve Rosetta Python - prompts
improve_rosetta_python_agenda = f"""You previously wrote a Python script to aggregate multiple Rosetta binding energy score files into one CSV file (see summary). {REWRITE_PROMPT}

1. Modify the extract_scores_from_file function so that it extracts the dG_separated value from a file of the following form.

SEQUENCE:
SCORE: total_score complex_normalized           dG_cross dG_cross/dSASAx100 dG_separated dG_separated/dSASAx100 dSASA_hphobic dSASA_int dSASA_polar delta_unsatHbonds dslf_fa13    fa_atr    fa_dun   fa_elec fa_intra_rep fa_intra_sol_xover4              fa_rep              fa_sol hbond_E_fraction hbond_bb_sc hbond_lr_bb    hbond_sc hbond_sr_bb hbonds_int lk_ball_wtd    nres_all    nres_int       omega     p_aa_pp    packstat per_residue_energy_int pro_close rama_prepro         ref    sc_value side1_normalized side1_score side2_normalized side2_score yhh_planarity description
SCORE:    -990.807             -2.914            -21.436             -1.857      -21.436                 -1.857       774.274  1154.088     379.813            12.000    -3.867 -1928.622   376.416  -541.777        3.745              54.944             265.303            1052.322            0.053     -84.023    -130.532     -54.069     -46.266      1.000     -41.725     340.000      55.000      39.977     -81.331       0.000                 -2.699     2.349      -6.870     131.513       0.000           -2.236     -51.431           -3.031     -97.008         1.706 KP3_Ty1-G59Y_unrelaxed_rank_001_alphafold2_multimer_v3_model_3_seed_000_0001"""

In [None]:
# Improve Rosetta Python - discussion
improve_rosetta_python_summaries = load_summaries(
    discussion_paths=[discussions_phase_to_dir["rosetta"] / "merged.json"])
print(f"Number of summaries: {len(improve_rosetta_python_summaries)}")

run_meeting(
    meeting_type="individual",
    team_member=computational_biologist,
    summaries=improve_rosetta_python_summaries,
    agenda=improve_rosetta_python_agenda,
    save_dir=discussions_phase_to_dir["rosetta"],
    save_name="improved_python",
    temperature=CONSISTENT_TEMPERATURE,
)

## Workflow Design

In [None]:
# Workflow design - prompts
workflow_design_agenda = f"{background_prompt} {nanobody_prompt} Your team has built three components of a nanobody design pipeline: ESM, AlphaFold-Multimer, and Rosetta. Each of these three tools can be used to score a nanobody mutation based on how the mutation affects binding to an antigen. Your goal is to start with an existing nanobody and iteratively add mutations to it to improve its binding to the newest variant of the SARS-CoV-2 spike protein, resulting in 24 modified nanobodies with one or more mutations. Please determine how to use ESM, AlphaFold-Multimer, and Rosetta in this iterative design process. An important constraint is that ESM can evaluate all potential mutations to a nanobody in 5 minutes while AlphaFold-Multimer takes 30 minutes per mutation and Rosetta takes five minutes per mutation. The whole iterative process should take no more than a few days to complete. Note that AlphaFold-Multimer must be run before Rosetta on each mutation since Rosetta requires the nanobody-antigen structure predicted by AlphaFold-Multimer. Additionally, note that ESM log-likelihood ratios are generally in the range of 5-10 (higher is better), AlphaFold-Multimer interface pLDDT confidence scores are generally in the range of 60-80 (higher is better), and Rosetta binding energy scores are generally in the range of -20 to -40 (lower is better)."

workflow_design_questions = (
    "In each iteration, what is the order of operations for evaluating mutations with ESM, AlphaFold-Multimer, and Rosetta?",
    "In each iteration, how many mutations (give a single number) will you evaluate with ESM, AlphaFold-Multimer, and Rosetta?",
    "At the end of each iteration, how will you weigh the ESM, AlphaFold-Multimer, and/or Rosetta scores (give a formula) to rank the nanobody mutations?",
    "At the end of each iteration, how many of the top-ranked mutations (give a single number) will you keep for the next round?",
    "How will you decide how many iterations of mutations to run?",
    "After all of the iterations are complete, how exactly (step-by-step) will you select the final set of 24 modified nanobodies from across the iterations for experimental validation?",
)

In [None]:
# Workflow design - discussion
with concurrent.futures.ThreadPoolExecutor() as executor:
    concurrent.futures.wait([
        executor.submit(
            run_meeting,
            meeting_type="individual",
            team_member=principal_investigator,
            agenda=workflow_design_agenda,
            agenda_questions=workflow_design_questions,
            save_dir=discussions_phase_to_dir["workflow_design"],
            save_name=f"discussion_{iteration_num + 1}",
            temperature=CREATIVE_TEMPERATURE,
        ) for iteration_num in range(num_iterations)
    ])

In [None]:
# Workflow design - merge
workflow_design_summaries = load_summaries(
    discussion_paths=list(discussions_phase_to_dir["workflow_design"].glob("discussion_*.json")))
print(f"Number of summaries: {len(workflow_design_summaries)}")

workflow_design_merge_prompt = create_merge_prompt(
    agenda=workflow_design_agenda,
    agenda_questions=workflow_design_questions,
)

run_meeting(
    meeting_type="individual",
    team_member=principal_investigator,
    summaries=workflow_design_summaries,
    agenda=workflow_design_merge_prompt,
    save_dir=discussions_phase_to_dir["workflow_design"],
    save_name="merged",
    temperature=CONSISTENT_TEMPERATURE,
)

## Virtual Lab Analysis

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

matplotlib.rcParams.update({'font.size': 26})

In [None]:
figure_dir = Path("figures/virtual_lab_analysis")
figure_dir.mkdir(parents=True, exist_ok=True)

phase_to_agent_to_word_count = {}

In [None]:
# Count words that the human user wrote
phase_to_human_words = {
    "team_selection": [
        background_prompt,
        principal_investigator.prompt,
        scientific_critic.prompt,
        team_selection_agenda.replace(f"{background_prompt} ", ""),
    ],
    "project_specification": [
        project_specification_agenda.replace(f"{background_prompt} ", ""),
        *project_specification_questions,
        nanobody_prompt,
    ],
    "tools_selection": [
        tools_selection_agenda.replace(f"{background_prompt} {nanobody_prompt} ", ""),
        *tools_selection_questions,
    ],
    "implementation_agent_selection": [
        implementation_agent_selection_agenda.replace(f"{background_prompt} {nanobody_prompt} ", ""),
        *implementation_agent_selection_questions,
    ],
    "esm": [
        esm_agenda.replace(f"{background_prompt} {nanobody_prompt} ", ""),
        improve_esm_agenda.replace(f" {REWRITE_PROMPT}", ""),
    ],
    "alphafold": [
        alphafold_agenda.replace(f"{background_prompt} {nanobody_prompt} ", ""),
        improve_alphafold_agenda.replace(f" {REWRITE_PROMPT}", ""),
    ],
    "rosetta": [
        rosetta_agenda.replace(f"{background_prompt} {nanobody_prompt} ", ""),
        improve_rosetta_xml_agenda.replace(f" {REWRITE_PROMPT}", ""),
        improve_rosetta_python_agenda.replace(f" {REWRITE_PROMPT}", ""),
    ],
    "workflow_design": [
        workflow_design_agenda.replace(f"{background_prompt} {nanobody_prompt} ", ""),
        *workflow_design_questions,
    ],
}

for phase, human_words in phase_to_human_words.items():
    phase_to_agent_to_word_count[phase] = {"Human Researcher": len(" ".join(human_words).split())}

In [None]:
# Count words that the LLM agents wrote
for phase_name in ["team_selection", "project_specification", "tools_selection",
                   "implementation_agent_selection", "esm", "alphafold", "rosetta", "workflow_design"]:
    phase_dir = discussions_phase_to_dir[phase_name]

    print(f"Phase: {phase_name}")

    # Load the text written by each agent
    agent_to_text = {}
    for path in phase_dir.glob("*.json"):
        with open(path) as f:
            discussion = json.load(f)

        for message in discussion:
            agent_to_text.setdefault(message["agent"], []).append(message["message"])

    # Count the number of words written by each agent
    for agent, text in agent_to_text.items():
        if agent == "User":
            continue

        agent_to_text[agent] = " ".join(text)
        word_count = len(agent_to_text[agent].split())
        phase_to_agent_to_word_count[phase_name][agent] = word_count

# Print words by phase
for phase in phase_to_agent_to_word_count:
    print(f"Phase: {phase}")
    for agent, word_count in phase_to_agent_to_word_count[phase].items():
        print(f"Number of words written by {agent}: {word_count:,}")
    print()

# Sum word counts across phases
agent_to_word_count = {}
for phase in phase_to_agent_to_word_count:
    for agent, word_count in phase_to_agent_to_word_count[phase].items():
        agent_to_word_count[agent] = agent_to_word_count.get(agent, 0) + word_count

# Total number of words written by each LLM agent
for agent, word_count in agent_to_word_count.items():
    print(f"Total number of words written by {agent}: {word_count:,}")

print()

# Total number of words written by all LLM agents
total_human_words = sum(
    phase_to_agent_to_word_count[phase]["Human Researcher"] for phase in phase_to_agent_to_word_count)
total_agent_words = sum(word_count for agent, word_count in agent_to_word_count.items() if agent != "Human Researcher")

print(f"Total number of words written by Human Researcher: {total_human_words:,}")
print(f"Total number of words written by all LLM agents: {total_agent_words:,}")

In [None]:
agent_to_color = {
    agent: sns.color_palette("tab10", n_colors=len(agent_to_word_count))[i]
    for i, agent in enumerate(agent_to_word_count)
}

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 6))
ax.pie(
    agent_to_word_count.values(),
    labels=agent_to_word_count.keys(),
    autopct="%1.1f%%",
    colors=[agent_to_color[agent] for agent in agent_to_word_count],
)
ax.set_title(f"Words written")
plt.savefig(figure_dir / "total_words_written.pdf", bbox_inches="tight")

In [None]:
for phase in phase_to_agent_to_word_count:
    fig, ax = plt.subplots(1, 1, figsize=(8, 6))
    ax.pie(
        phase_to_agent_to_word_count[phase].values(),
        labels=phase_to_agent_to_word_count[phase].keys(),
        autopct="%1.1f%%",
        colors=[agent_to_color[agent] for agent in phase_to_agent_to_word_count[phase]],
    )
    ax.set_title(f"Words written in {phase.replace('_', ' ')}")
    plt.savefig(figure_dir / f"{phase}_words_written.pdf", bbox_inches="tight")