In [None]:

!pip install pyautogen[retrievechat]==0.2.0b5
!pip install amazon-codewhisperer-jupyterlab-ext

In [2]:
import logging
import os
import pandas as pd
import glob 
from autogen import config_list_from_dotenv
from autogen.agentchat.contrib.gpt_assistant_agent import GPTAssistantAgent
from autogen import UserProxyAgent, AssistantAgent, GroupChat, GroupChatManager

In [3]:
logger = logging.getLogger('math_solver')
logger.setLevel(logging.DEBUG)

formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# fh = logging.FileHandler('math_solver.log')
# fh.setLevel(logging.DEBUG)
# fh.setFormatter(formatter)
# logger.addHandler(fh)

ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
ch.setFormatter(formatter)
logger.addHandler(ch)


Abstract Algebra questions were obtained from https://github.com/hendrycks/test

link for download https://people.eecs.berkeley.edu/~hendrycks/data.tar 

In [8]:
data_file = "data/MMLU/abstract_algebra_test.csv"
abstract_algebra_test = pd.read_csv(data_file, names=['text', 'choice_A', 'choice_B', 'choice_C', 'choice_D', 'answer'] ).to_dict(orient="records")
abstract_algebra_test[:2]

[{'text': 'Find the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q.',
  'choice_A': '0',
  'choice_B': '4',
  'choice_C': '2',
  'choice_D': '6',
  'answer': 'B'},
 {'text': 'Let p = (1, 2, 5, 4)(2, 3) in S_5 . Find the index of <p> in S_5.',
  'choice_A': '8',
  'choice_B': '2',
  'choice_C': '24',
  'choice_D': '120',
  'answer': 'C'}]

In [22]:
# cheap LLM 
config_list_cheap = config_list_from_dotenv(
    dotenv_file_path=".env",
    filter_dict={
        "model":{"gpt-3.5-turbo"}
    }
)

# costly LLM
config_list_costly = config_list_from_dotenv(
    dotenv_file_path=".env",
    filter_dict={
        "model":{"gpt-4"}
    }
)

Student and Critic tackle one question.

In [59]:
llm_config = {"config_list": config_list_cheap,
                "seed": int(os.getenv("seed")),
                "timeout": int(os.getenv("request_timeout"))}


student = UserProxyAgent(name="Top_student",
                            system_message="I am a top student at top university achieving 4.0 academic score. I won gold medals in IMO.",
                            max_consecutive_auto_reply=10,
                            is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
                            code_execution_config={
                                "work_dir": "coding",
                                "use_docker":False,
                            },
                            human_input_mode="NEVER",
                            llm_config=llm_config)



critic = AssistantAgent(
    name="Critic",
    system_message=f"I am a critic reviewing answer sheet of {student.name}. If I find any defects or mistakes, I will tell {student.name}.",
    llm_config=llm_config
                   
)

groupchat = GroupChat(agents=[student, critic], 
                      messages=[], 
                      max_round=5)
manager = GroupChatManager(name="manager", 
                           groupchat=groupchat, 
                           llm_config=llm_config)

student.initiate_chat(manager, 
                         message=f"I am working on an algebra question."
                                 f"Question: Find the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q. "
                                 f"Choices: A. 0, B. 4, C. 2, D. 6")

In [60]:
manager.chat_messages

defaultdict(list, {})

In [61]:
import json
message_history = []


for idx, question in enumerate(abstract_algebra_test[:2]):
    for agent in groupchat.agents:
        agent.reset()
    message = f"""
            I am working on the algebra question. 
            Question: {question['text']}
            Choices: A: {question['choice_A']}, B: {question['choice_B']}, C: {question['choice_C']}, D: {question['choice_D']}
            """
    student.initiate_chat(manager, 
                         message=message)
    
    # one_chat = {'question': question,
    #             }
    # for agent, v in manager.chat_messages.items():
    #     one_chat[agent.name] = v
    # message_history.append(one_chat)
    
# dump message_history to json file in folder ./chat_messages/ 

with open(f'./chat_messages/message_history.json', 'w') as f:
    json.dump(message_history, f)

[33mTop_student[0m (to manager):


            I am working on the algebra question. 
            Question: Find the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q.
            Choices: A: 0, B: 4, C: 2, D: 6
            

--------------------------------------------------------------------------------




[33mCritic[0m (to manager):

Upon reviewing your answer, I noticed a mistake. The correct answer should be:

The degree of the given field extension is equal to the product of the degrees of each individual extension. 

We have Q(sqrt(2)) over Q, which has a degree of 2 since sqrt(2) is the root of the irreducible polynomial x^2 - 2.

We also have Q(sqrt(3)) over Q, which also has a degree of 2 since sqrt(3) is the root of the irreducible polynomial x^2 - 3.

Lastly, we have Q(sqrt(18)) over Q, which again has a degree of 2 since sqrt(18) can be simplified to 3sqrt(2) and is the root of the irreducible polynomial (x/3)^2 - 2.

Therefore, the degree of the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q is 2 * 2 * 2 = 8.

Based on this analysis, none of the provided choices A: 0, B: 4, C: 2, D: 6 are correct. The correct answer should be E: 8.

--------------------------------------------------------------------------------




[33mTop_student[0m (to manager):

Apologies for the mistake in my previous response. You are correct, and the degree of the given field extension is indeed 8. Thank you for pointing out the error.

--------------------------------------------------------------------------------




[33mCritic[0m (to manager):

You're welcome! Apologizing for mistakes is always appreciated. I'm glad I was able to help clarify the correct answer for you. Keep up the good work!

--------------------------------------------------------------------------------




[33mTop_student[0m (to manager):

Thank you for your understanding and kind words! I appreciate your help in pointing out the mistake and I will make sure to be more careful in the future. I'm here to provide accurate information and help clarify concepts, so feel free to reach out if you have any further questions or need assistance. Keep up your great work as well!

--------------------------------------------------------------------------------
[33mTop_student[0m (to manager):


            I am working on the algebra question. 
            Question: Let p = (1, 2, 5, 4)(2, 3) in S_5 . Find the index of <p> in S_5.
            Choices: A: 8, B: 2, C: 24, D: 120
            

--------------------------------------------------------------------------------




[33mCritic[0m (to manager):

First of all, I would like to mention that the notation used in the question is not clear. It appears that p is a permutation in the symmetric group S_5, but the notation (1, 2, 5, 4)(2, 3) is not clear. 

To find the index of <p> in S_5, we need more information about p itself. The index of a subgroup H in a group G is equal to the order of G divided by the order of H. However, without knowing the specific form of p, we cannot determine the order of <p>. As a result, we are unable to calculate the index of <p> in S_5. 

Therefore, the question is incomplete and cannot be answered properly.

--------------------------------------------------------------------------------




[33mTop_student[0m (to manager):

You are correct, I apologize for the confusion. Without knowing the specific form of the permutation p, it is not possible to determine its order or calculate the index of <p> in S_5. The question is indeed incomplete and cannot be answered properly. Thank you for pointing out the ambiguity in the notation used.

--------------------------------------------------------------------------------




[33mCritic[0m (to manager):

No problem, Top_student. It's important to point out any ambiguity or incomplete information in a question. By doing so, we ensure that we are providing accurate and precise answers. Keep up the good work!

--------------------------------------------------------------------------------




[33mTop_student[0m (to manager):

Thank you for your understanding and kind words. As a top student, it is important for me to strive for accuracy and clarity in all aspects, including analyzing and answering questions. I appreciate your feedback and will continue to ensure that I provide precise and reliable information.

--------------------------------------------------------------------------------


In [None]:
with open(f'./chat_messages/message_history.json', 'w') as f:
    json.dump(message_history, f)

In [53]:
manager.chat_messages.keys()
manager.chat_messages.get(student)
manager.chat_messages.get(critic)

[{'content': 'I am working on an algebra question.Question: Find the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q. Choices: A. 0, B. 4, C. 2, D. 6',
  'name': 'Top_student',
  'role': 'assistant'},
 {'content': "To find the degree of the field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q, we need to consider the number of elements in a basis for this field extension over Q.\n\nFirst, let's note that sqrt(18) = sqrt(3)*sqrt(6), and sqrt(6) = sqrt(2)*sqrt(3). So, we can simplify the field extension as Q(sqrt(2), sqrt(3)).\n\nNow, let's determine the degrees of the intermediate extensions.\n\nThe degree of the extension Q(sqrt(2)) over Q is 2, since sqrt(2) is a root of the irreducible polynomial x^2 - 2.\n\nSimilarly, the degree of the extension Q(sqrt(3)) over Q is also 2, since sqrt(3) is a root of the irreducible polynomial x^2 - 3.\n\nSince the extensions Q(sqrt(2)) and Q(sqrt(3)) are linearly independent, the degree of the composite field extension Q(s

In [37]:
llm_config = {"config_list": config_list_costly,
                "seed": int(os.getenv("seed")),
                "timeout": int(os.getenv("request_timeout"))}


professor = UserProxyAgent(name="professor",
                            system_message=f"I am math professor in top university. I show a question to {student.name}, review his answer." 
                                            f"If I find any defects or mistakes, I will tell {student.name} and let him try again."
                                            f"If answer is correct, I terminate conversation.",
                            max_consecutive_auto_reply=3,
                            is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
                            code_execution_config={
                                "work_dir": "teacher",
                                "use_docker":False,
                            },
                            human_input_mode="TERMINATE",
                            llm_config=llm_config)



student = AssistantAgent(
    name="student",
    system_message=f"I am a top student at top university achieving 4.0 academic score. I will be asked to solve multi-choice algebra problems."
                    f" I will write my answer in JSON format {{'answer': answer}}"
                    f"If professor tells me answer is incorrect, I will retry. Otherwise I terminate conversation.",
    llm_config=llm_config                  
)



professor.initiate_chat(student, 
                         message=f"Here an algebra question."
                                 f"Question: Find the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q. "
                                 f"Choices: A. 0, B. 4, C. 2, D. 6")

[33mprofessor[0m (to student):

Here an algebra question.Question: Find the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q. Choices: A. 0, B. 4, C. 2, D. 6

--------------------------------------------------------------------------------


[33mstudent[0m (to professor):

{'answer': 'B'}

--------------------------------------------------------------------------------
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mprofessor[0m (to student):

Yes, the answer is correct. The degree of the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q is indeed 4. Great job!

--------------------------------------------------------------------------------
[33mstudent[0m (to professor):

Thank you.

--------------------------------------------------------------------------------
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mprofessor[0m (to student):

You're welcome! If you have any other questions, feel free to ask. Happy learning!

--------------------------------------------------------------------------------
[33mstudent[0m (to professor):

Thank you. I'll continue with my studies.

--------------------------------------------------------------------------------
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mprofessor[0m (to

In [38]:
professor.chat_messages

defaultdict(list,
            {<autogen.agentchat.assistant_agent.AssistantAgent at 0x152ad6354e0>: [{'content': 'Here an algebra question.Question: Find the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q. Choices: A. 0, B. 4, C. 2, D. 6',
               'role': 'assistant'},
              {'content': "{'answer': 'B'}", 'role': 'user'},
              {'content': 'Yes, the answer is correct. The degree of the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q is indeed 4. Great job!',
               'role': 'assistant'},
              {'content': 'Thank you.', 'role': 'user'},
              {'content': "You're welcome! If you have any other questions, feel free to ask. Happy learning!",
               'role': 'assistant'},
              {'content': "Thank you. I'll continue with my studies.",
               'role': 'user'},
              {'content': "That sounds like a great plan. Keep up the good work and don't hesitate to reach out if you have any mo

In [28]:
student.chat_messages

defaultdict(list,
            {<autogen.agentchat.user_proxy_agent.UserProxyAgent at 0x152ad62c3a0>: [{'content': 'Here an algebra question.Question: Find the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q. Choices: A. 0, B. 4, C. 2, D. 6',
               'role': 'user'},
              {'content': 'To find the degree of the given field extension, we need to determine the number of nested square roots involved.\n\nFirst, we know that sqrt(2) and sqrt(3) are both irrational numbers, so extending the field from Q to Q(sqrt(2)) and then to Q(sqrt(2), sqrt(3)) adds two additional square roots.\n\nNext, we can simplify the expression sqrt(18). Since 18 can be factored as 2 * 3^2, we can write sqrt(18) as sqrt(2 * 3^2) = sqrt(2) * sqrt(3^2) = sqrt(2) * 3 = 3sqrt(2). Therefore, the square root of 18 can be expressed as a combination of sqrt(2).\n\nSo, Q(sqrt(2), sqrt(3), sqrt(18)) = Q(sqrt(2), sqrt(3), 3sqrt(2)).\n\nWe can see that 3sqrt(2) can be written as 3 * (sqr

In [27]:
llm_config = {"config_list": config_list_cheap,
                "seed": int(os.getenv("seed")),
                "timeout": int(os.getenv("request_timeout"))}

critic = UserProxyAgent(
                        name="Critic",
                        system_message=f"I give math question to student. I review answer of student. If I find any mistakes, I will tell student."
                                        f"I understand correct answer is one of the four choices. If student's answer is not in choices, his answer is incorrect."
                                        f"If I agree with student's answer, I terminate conversation by saying 'TERMINATE'.",
                            max_consecutive_auto_reply=3,
                            is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
                            code_execution_config={
                                "work_dir": "coding",
                                "use_docker":False,
                            },
                            human_input_mode="NEVER",
                            llm_config=llm_config)



student = AssistantAgent(
    name="student",
    system_message=f"I am a top student at top university achieving 4.0 academic score. I won gold medals in IMO. I can solve difficult math problems."
                    f"I answer question given by {critic.name}. I understand correct answer is one of the four choices. If my answer is not in choices, I will check my solution and try to solve it again."
                    f"If {critic.name} tells me my answer is wrong, I will analyse his explanation carefully to decide whether he is right or wrong. If I find my answer is correct, I will keep my answer. If I find my answer is incorrect, I will solve the question again."
                    f"I will put answer in the end of my replay in the format {{Answer: X}}"
                    f"Focus on problem solving, and avoid useless greetings.",
    llm_config=llm_config
                   
)



In [28]:
import json
chat_name = "abstract_algebra_test_10-100_questions_mas_2"
message_history = []


for idx, question in enumerate(abstract_algebra_test[10:100]):
    student.reset()
    critic.reset()
    message = f""" 
            Question: {question['text']}
            Choices: A: {question['choice_A']}, B: {question['choice_B']}, C: {question['choice_C']}, D: {question['choice_D']}
            """
    critic.initiate_chat(student, 
                         message=message)
    
    one_chat = {'question_id': idx,
                'question': question,
                'student': list(student.chat_messages.values()),
                'critic': list(critic.chat_messages.values())
                }
    
    message_history.append(one_chat)
    
# dump message_history to json file in folder ./chat_messages/ 

with open(f'./chat_messages/{chat_name}.json', 'w') as f:
    json.dump(message_history, f, indent=4)

[33mCritic[0m (to student):

 
            Question: Statement 1 | If H is a subgroup of G and a belongs to G then |aH| = |Ha|. Statement 2 | If H is a subgroup of G and a and b belong to G, then aH and Hb are identical or disjoint.
            Choices: A: True, True, B: False, False, C: True, False, D: False, True
            

--------------------------------------------------------------------------------
[33mstudent[0m (to Critic):

To determine the correctness of Statement 1 and Statement 2, let's break down and analyze each statement individually.

Statement 1: If H is a subgroup of G and a belongs to G, then |aH| = |Ha|.
The notation |A| represents the cardinality or the number of elements in set A. In this case, we are comparing the number of elements in aH (the left coset of H) and Ha (the right coset of H) for a specific element a in group G.

To prove or disprove this statement, let's analyze the definition of a left coset and a right coset:

- Left Coset (aH): The set o

In [75]:
message_history[0]['student'] = list(message_history[0]['student'])

In [76]:
# dump list of dicts into json file, some values are strings, some are dict_values

with open(f'./chat_messages/message_history.json', 'w') as f:
    json.dump(message_history[0]['student'],f, indent=4)

# Single agent

In [29]:
from openai import OpenAI
openai_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) 

def build_question_prompt(question):
    system_prompt = {
        "role": "system",
        "content": f"I am a top student at top university achieving 4.0 academic score. I won gold medals in IMO. I can solve difficult math problems."
                    f"I answer math questions. I understand the correct answer is one of the four choices. If my answer is not in choices, it means my answer is incorrect."
                    f"I will put answer in the end of my replay in the format {{Answer: X}}"
    }
    user_prompt = {
        "role": "user",
        "content": f"""
            Question: {question['text']}
            Choices: A: {question['choice_A']}, B: {question['choice_B']}, C: {question['choice_C']}, D: {question['choice_D']}
        """
    }
    return [system_prompt,
            user_prompt]



In [30]:
import json

chat_name = "abstract_algebra_test_10-100_questions_single"
chat_history_single_agent = []
for i, question in enumerate(abstract_algebra_test[10:100]):
    messages = build_question_prompt(question)
    response = openai_client.chat.completions.create(
                model='gpt-3.5-turbo',
                messages=messages,
                seed=int(os.environ.get('seed')),
                max_tokens=1000,
            )
    print(response.choices[0].message.content)
    chat_history_single_agent.append({
        'question_id': i,
        'question': question,
        'response': response.choices[0].message.content
    })
with open(f'./chat_messages/{chat_name}.json', 'w') as f:
    json.dump(chat_history_single_agent, f, indent=4)

To determine the correctness of the statements, let's analyze each one individually:

Statement 1: If H is a subgroup of G and a belongs to G, then |aH| = |Ha|.
This statement is true and known as the "coset equality theorem". It states that the left coset of a subgroup H generated by an element a has the same cardinality as the right coset of H generated by a. Therefore, Statement 1 is True.

Statement 2: If H is a subgroup of G and a and b belong to G, then aH and Hb are identical or disjoint.
This statement is False. In general, the left coset aH and the right coset Hb are not necessarily identical or disjoint. However, there is an exception when aH = Hb, which happens when a and b are conjugate elements in G. But in general, they can intersect or be completely distinct. Therefore, Statement 2 is False.

Based on the analysis, the correct choice is:
{Answer: C} True, False.
To determine the properties of the relation S, we need to check if it satisfies the definitions of symmetry, a

: 

In [21]:
for chat in chat_history_single_agent:
    print(chat)

{'question_id': 0, 'question': {'text': 'Find the degree for the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q.', 'choice_A': '0', 'choice_B': '4', 'choice_C': '2', 'choice_D': '6', 'answer': 'B'}, 'response': 'To find the degree of the given field extension, we need to determine how many elements are added to the base field Q. \n\nFirst, note that Q(sqrt(2)) adds two elements to the base field Q: sqrt(2) and -sqrt(2). \n\nNext, Q(sqrt(2), sqrt(3)) adds two more elements to the previous field: sqrt(3) and -sqrt(3). \n\nFinally, Q(sqrt(2), sqrt(3), sqrt(18)) adds one more element, sqrt(18) = sqrt(2 * 3 * 3), to the previous field. \n\nTherefore, the degree of the given field extension Q(sqrt(2), sqrt(3), sqrt(18)) over Q is the product of the degrees of each intermediate extension: 2 * 2 * 1 = 4. \n\nThus, the correct choice is B: 4. {Answer: B}'}
{'question_id': 1, 'question': {'text': 'Let p = (1, 2, 5, 4)(2, 3) in S_5 . Find the index of <p> in S_5.', 'choice_A': '8', 'c