In [1]:
from autosearch.functions.text_analysis import chunk_pdf

from autosearch.database.paper_database import PaperDatabase
from autosearch.analysis.document_analyzer import DocumentAnalyzer
from autosearch.research_project import ResearchProject

import autogen
from typing import List, Dict, Any

In [2]:
import os
from dotenv import load_dotenv
from azure.core.exceptions import HttpResponseError

# Load environment variables
load_dotenv()

# Retrieve Azure credentials from environment variables
config={
    'doc_api_key': os.getenv("DOCUMENT_INTELLIGENCE_KEY"),
    'doc_endpoint': os.getenv("DOCUMENT_INTELLIGENCE_ENDPOINT")
}

In [3]:
title = "Exploring the Intricacies of Polymer Representation: Unraveling Complexity"

In [10]:
test_project = ResearchProject(
    project_id = "project_test",
    version= "0.2",
    config=config,
    config_file="OAI_CONFIG_LIST-sweden-505",
    initiate_db= False,
    funcClsList = ["FactualCheck", "GetPDF", "GetPDFs", "UrlCheck", "AcademicRetriever", "AcademicSearch"],
    communiteList = ["outline_agents", "write_section_agents", "instructor_agents"]
)
project_config = test_project.ProjectConfig
print(project_config.logging_session_id)
test_project.run(
    title=title,
    target_audience="expert in experimental polymer science and machine learning experts",
)

Logging session ID: b5430dbb-8397-4ec9-9980-9c526212e38b


[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m
Equipping function 'academic_retriever' to agent 'blog_editor-in-chief'
Equipping function 'academic_search' to agent 'blog_editor-in-chief'
Equipping function 'get_pdf' to agent 'blog_editor-in-chief'
Equipping function 'get_pdfs' to agent 'blog_editor-in-chief'
Equipping function 'factual_check' to agent 'content_strategist'
[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m
Equipping function 'factual_check' to agent 'content_review_specialist'
Equipping function 'academic_retriever' to agent 'content_review_specialist'
Equipping function 'academic_search' to agent 'content_review_specialist'
Equipping function 'get_pdf' to agent 'content_review_specialist'
[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m
Equipping function 'academic_search' to agent 'topic_expert'
Equipping function 'academic_retriever' to agent 'topic_expert'
[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m
E



Equipping function 'academic_retriever' to agent 'research_resource_expert'




Equipping function 'get_pdf' to agent 'research_resource_expert'
b5430dbb-8397-4ec9-9980-9c526212e38b
[33mtopic_expert[0m (to chat_manager):


        We need to create comprehensive instructions for the Editor-in-Chief to write an article titled:
        "Exploring the Intricacies of Polymer Representation: Unraveling Complexity"

        The target audience for this article is: expert in experimental polymer science and machine learning experts

        Each agent should contribute their expertise to craft a well-rounded instruction,
        keeping in mind the specific needs and background of the target audience.
        Topic Expert, please start by analyzing the title and providing key insights,
        considering how they should be presented to this audience.
        

--------------------------------------------------------------------------------
[32m
Next speaker: structure_specialist
[0m
[33mstructure_specialist[0m (to chat_manager):

Analyzing the title "Exploring the

The return type of the function 'write_section' is not annotated. Although annotating it is optional, the function should return either a string, a subclass of 'pydantic.BaseModel'.


[33meditor_user[0m (to chat_manager):

Failed to extract final instruction.

--------------------------------------------------------------------------------
[32m
Next speaker: blog_editor-in-chief
[0m
[33mblog_editor-in-chief[0m (to chat_manager):

[32m***** Suggested tool call (call_bQII1SH9Yj9zD6vczFcS41V4): academic_search *****[0m
Arguments: 
{"query":"language model reliability and ethics"}
[32m********************************************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: editor_user
[0m
[35m
>>>>>>>> EXECUTING FUNCTION academic_search...[0m
[33meditor_user[0m (to chat_manager):

[33meditor_user[0m (to chat_manager):

[32m***** Response from calling tool (call_bQII1SH9Yj9zD6vczFcS41V4) *****[0m

Results from arxiv:

1. Title: Global Data Constraints: Ethical and Effectiveness Challenges in Large Language Model
   Authors: Jin Yang, Zhiqiang Wang, Yanbin Li

KeyboardInterrupt: 

In [5]:
from autosearch.communities.outline_agents import agentsconfig
from autosearch.agents.agents_creator import AgentsCreator

# agents = AgentsCreator(test_project.ProjectConfig, agents_config=agentsconfig).initialize_agents()
# agents

In [6]:
tests = [
    ('factual_check', {
        "text": "The collaboration between these fields also birthed differentiable programming frameworks for quantum chemistry, like TorchANI and TensorMol [4]. These latter-day frameworks enable researchers to quickly prototype neural networks that learn quantum mechanical laws directly from data, providing a valuable tool to accelerate discovery. This integration has offered promising results in tasks like molecular dynamics simulations, which are key for understanding chemical reactions and material properties.", 
        "paper_title": "Automated Calculation of Thermal Rate Coefficients using Ring Polymer Molecular Dynamics and Machine-Learning Interatomic Potentials with Active Learning", 
        "paper_url": "http://arxiv.org/pdf/1805.11924v3", 
        "reason": "To confirm the factual information about differentiable programming frameworks and their impact on research in quantum chemistry and neural networks."
    }),
    ('get_pdfs', {
        "urls": ["http://arxiv.org/pdf/2305.13267v1", "http://arxiv.org/pdf/2305.06530v1"],
        "reasons": ['factual_check'] * 2
    }),
    # ('get_pdf', {
    #     "url": "https://arxiv.org/pdf/2110.13711",
    #     "reason": "factual_check",
    #     "part": "full"
    # }),
    # ('url_check',{
    #         "paper_url": "https://arxiv.org/pdf/2107.03012.pdf",
    #         "paper_title": "From algebra to analysis: new proofs of theorems by Ritt and Seidenberg"
    # }),
    # ('academic_retriever',{
    #     "queries":["Large Language Models", "Assessing Language Models", "AI safety and reliability"],
    #     "n_results":3
    # }),
    # ('academic_search',{"query":"Human-Centred Learning Analytics and AI in Education: a Systematic Literature Review"}),
]

for fucn_name, args in tests:
    results = None
    for func in test_project.functions:
        if func.name == fucn_name:
            print(f"Running {fucn_name} with args: {args}")
            results = func.func(**args)
            print(results)
    
    if results is None:
        print(f"Function {fucn_name} not found")
        continue


Running factual_check with args: {'text': 'The collaboration between these fields also birthed differentiable programming frameworks for quantum chemistry, like TorchANI and TensorMol [4]. These latter-day frameworks enable researchers to quickly prototype neural networks that learn quantum mechanical laws directly from data, providing a valuable tool to accelerate discovery. This integration has offered promising results in tasks like molecular dynamics simulations, which are key for understanding chemical reactions and material properties.', 'paper_title': 'Automated Calculation of Thermal Rate Coefficients using Ring Polymer Molecular Dynamics and Machine-Learning Interatomic Potentials with Active Learning', 'paper_url': 'http://arxiv.org/pdf/1805.11924v3', 'reason': 'To confirm the factual information about differentiable programming frameworks and their impact on research in quantum chemistry and neural networks.'}


Created 15 docs with a total of 11954 tokens. Largest doc has 2993 tokens.
[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m


Add of existing embedding ID: 8
Add of existing embedding ID: 8
Add of existing embedding ID: 21
Add of existing embedding ID: 21
Add of existing embedding ID: 25
Add of existing embedding ID: 27
Add of existing embedding ID: 34
Add of existing embedding ID: 39
Add of existing embedding ID: 40
Add of existing embedding ID: 41
Add of existing embedding ID: 42
Add of existing embedding ID: 44
Add of existing embedding ID: 47
Add of existing embedding ID: 52
Add of existing embedding ID: 52
Add of existing embedding ID: 57
Add of existing embedding ID: 63
Add of existing embedding ID: 67
Add of existing embedding ID: 71
Add of existing embedding ID: 89
Add of existing embedding ID: 90


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
max token limit: 126976
factual_check model: gpt-4o
[33mfactual_checker_user[0m (to factual_checker):


Below, you will find a passage labeled "TEXT" that references a specific paper: 'Automated Calculation of Thermal Rate Coefficients using Ring Polymer Molecular Dynamics and Machine-Learning Interatomic Potentials with Active Learning [http://arxiv.org/pdf/1805.11924v3]' alongside its corresponding "PAPER_CONTENT." Your task is to read the "PAPER_CONTENT" and verify the factual accuracy of the "TEXT" as it pertains to the paper.

Once you have assessed the factual accuracy, you MUST provide feedback, begining with 'FEEDBACK:'. Following your assessment, please write a summary

Add of existing embedding ID: 128
Insert of existing embedding ID: 128


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 135
Insert of existing embedding ID: 135


[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 135
Insert of existing embedding ID: 135


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 136
Insert of existing embedding ID: 136


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 137
Insert of existing embedding ID: 137


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 138
Insert of existing embedding ID: 138


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 139
Insert of existing embedding ID: 139


[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/0.2/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
4 articles have been read, so far.
Successfully processed Enhance Reasoning Ability of Visual-Language Models via Large Language Models
Successfully processed How Good are Commercial Large Language Models on African Languages?
The articles 
 Enhance Reasoning Ability of Visual-Language Models via Large Language Models [http://arxiv.org/pdf/2305.13267v1] updated on 2023-05-22, and 
How Good are Commercial Large Language Models on African Languages? [http://arxiv.org/pdf/2305.06530v1] updated on 2023-05-11 
 have been read and the content has been shared with you in your memory.


In [7]:
func.get_function_details()

{'name': 'academic_search',
 'description': 'Search for papers across multiple academic sources based on the given query.',
 'func': <function autosearch.functions.base_function.BaseFunction._create_wrapper.<locals>.wrapper(query: Annotated[str, 'The query to search for in academic sources.']) -> str>}

In [8]:
[func.name for func in test_project.functions]

['factual_check',
 'get_pdf',
 'get_pdfs',
 'url_check',
 'academic_retriever',
 'academic_search']

In [9]:
# Usage example
config = {
    'doc_api_key': 'your_api_key',
    'doc_endpoint': 'your_endpoint',
    'manager': {
        'llm_config': {...},
        'is_termination_msg': lambda x: 'TERMINATE' in x.get('content', '')
    },
    'agent_configs': {
        'outline': [
            {
                'name': 'editor',
                'system_message': '...',
                'llm_config': {...},
                'teach_config': {...},
                'functions': {
                    'academic_retriever': academic_retriever,
                    'academic_search': academic_search
                }
            },
            {
                'name': 'critic',
                'system_message': '...',
                'llm_config': {...},
                'teach_config': {...},
                'functions': {
                    'factual_check': factual_check
                }
            }
        ],
        'writing': [...],  # Similar structure for writing agents
        'compiling': [...]  # Similar structure for compiling agents
    }
}

project = ResearchProject("polymer_rep", "0.0.2", 
                          "Exploring the Intricacies of Polymer Representation: Unraveling Complexity", 
                          config)
blog_post = project.run()

# Save the blog post
with open(f'{project.db.project_dir}/blog_post.md', 'w') as f:
    f.write(blog_post)

NameError: name 'academic_retriever' is not defined