In [1]:

from autosearch.functions.text_analysis import chunk_pdf

from autosearch.database.paper_database import PaperDatabase
from autosearch.analysis.document_analyzer import DocumentAnalyzer
from autosearch.functions.create_teachable_groupchat import create_teachable_groupchat

import autogen
from typing import List, Dict, Any


In [2]:
import os
from dotenv import load_dotenv
from azure.core.exceptions import HttpResponseError

# Load environment variables
load_dotenv()

# Retrieve Azure credentials from environment variables
api_key = os.getenv("DOCUMENT_INTELLIGENCE_KEY")
endpoint = os.getenv("DOCUMENT_INTELLIGENCE_ENDPOINT")

In [3]:
Project_dir = "./project_test"
os.makedirs(Project_dir, exist_ok=True)
paperdb_dir = f"{Project_dir}/paperdb"
db_dir = f"{Project_dir}/db"  
global initiate_db 
initiate_db = False
config_list = autogen.config_list_from_json(
    "OAI_CONFIG_LIST",
    file_location=".",
    filter_dict={
        "model": ["gpt-4", "gpt-4-32k"]#, "gpt-4", "gpt-35-turbo-16k", "gpt-4-0613", "gpt-3.5-turbo", "gpt-35-turbo", "gpt-35-turbo-0613"]
    },
)

print(config_list)

llm_config = {
    "config_list": config_list,  # config_list should be defined or imported
    "timeout": 120,
    # "seed": 42,
}

paper_db = PaperDatabase(paperdb_dir)

# Initialize the DocumentAnalyzer
analyzer = DocumentAnalyzer(api_key, endpoint, project_dir=Project_dir)


project_config = {
    "paper_db": paper_db,
    "doc_analyzer": analyzer,
    "project_dir": Project_dir,
    "db_dir": db_dir,
    "config_list": config_list,
    "initiate_db": initiate_db
}


[{'model': 'gpt-4', 'api_key': '38ae4759658a4466b454666531283601', 'api_type': 'azure', 'base_url': 'https://aoai-gpt4-505.openai.azure.com/', 'api_version': '2023-08-01-preview'}, {'model': 'gpt-4-32k', 'api_key': '38ae4759658a4466b454666531283601', 'base_url': 'https://aoai-gpt4-505.openai.azure.com/', 'api_type': 'azure', 'api_version': '2023-08-01-preview'}]


## create_teachable_groupchat

In [5]:
# initiate_db
prompt = """
For each memorization task, initiate your process with 'MEMORIZE_ARTICLE:'  
Delve into the passage to discern and assess its key insights. If the content presents noteworthy information, make a point to memorize these details. 
Conversely, if the passage does not offer significant insights, there's no need to commit it to memory. 
Upon choosing to memorize, you MUST finalize your notes by including both the article's title and its URL, employing the format '[source: article_title, article_url]' for efficient future access and verification.
"""
instract_assistant, instract_user = create_teachable_groupchat("instract_assistant", "instract_user", db_dir, config_list, verbosity=3)

instract_user.initiate_chat(instract_assistant, silent=True, message=prompt)




[33minstract_assistant[0m (to analyzer):


For each memorization task, initiate your process with 'MEMORIZE_ARTICLE:'  
Delve into the passage to discern and assess its key insights. If the content presents noteworthy information, make a point to memorize these details. 
Conversely, if the passage does not offer significant insights, there's no need to commit it to memory. 
Upon choosing to memorize, you MUST finalize your notes by including both the article's title and its URL, employing the format '[source: article_title, article_url]' for efficient future access and verification.


--------------------------------------------------------------------------------
[33minstract_assistant[0m (to analyzer):

Does any part of the TEXT ask the agent to perform a task or solve a problem? Answer with just one word, yes or no.

--------------------------------------------------------------------------------
[33manalyzer[0m (to instract_assistant):

yes

----------------------------------

ChatResult(chat_id=None, chat_history=[{'content': "\nFor each memorization task, initiate your process with 'MEMORIZE_ARTICLE:'  \nDelve into the passage to discern and assess its key insights. If the content presents noteworthy information, make a point to memorize these details. \nConversely, if the passage does not offer significant insights, there's no need to commit it to memory. \nUpon choosing to memorize, you MUST finalize your notes by including both the article's title and its URL, employing the format '[source: article_title, article_url]' for efficient future access and verification.\n", 'role': 'assistant'}, {'content': "Understood. I will begin the memorization process as instructed. When you provide a passage and ask me to memorize the key insights, I will proceed as follows:\n\n1. I'll read and discern the main points in the content.\n2. If the content is significant, I will memorize these details along with the source title and URL using the stipulated format for futu

## Arxiv retieval, Arxiv search

In [5]:
text = "Human-Centred Learning Analytics and AI in Education: a Systematic Literature Review"

from autosearch.functions.arxiv_search import ArxivSearch


# Usage example
function = ArxivSearch(project_config=project_config)
# Get the function details
function_details = function.get_function_details()
print(function_details)

# Now you can use function_details with your agent
# agent.equip_function(function_details)

# Or use the function directly

function.func(text)


{'name': 'arxiv_search', 'description': 'retrun arxiv pdf_urls from for the given paper title.', 'func': <function arxiv_search at 0x7fa0c3ed72e0>}


' \n\n 1. Title: Human-Centred Learning Analytics and AI in Education: a Systematic Literature Review Authors: Riordan Alfredo, Vanessa Echeverria, Yueqiao Jin, Lixiang Yan, Zachari Swiecki, Dragan Gašević, Roberto Martinez-Maldonado Pulished at 2023-12-20 04:15:01+00:00 URL: http://arxiv.org/pdf/2312.12751v1 \n\n 2. Title: Application of Artificial Intelligence in Schizophrenia Rehabilitation Management: Systematic Literature Review Authors: Hongyi Yang, Fangyuan Chang, Dian Zhu, Muroi Fumie, Zhao Liu Pulished at 2024-05-17 16:20:34+00:00 URL: http://arxiv.org/pdf/2405.10883v1 \n\n 3. Title: Systematic Literature Review of AI-enabled Spectrum Management in 6G and Future Networks Authors: Bushra Sabir, Shuiqiao Yang, David Nguyen, Nan Wu, Alsharif Abuadbba, Hajime Suzuki, Shangqi Lai, Wei Ni, Ding Ming, Surya Nepal Pulished at 2024-06-12 11:31:42+00:00 URL: http://arxiv.org/pdf/2407.10981v1 \n\n 4. Title: Application of Artificial Intelligence and Machine Learning in Libraries: A Syste

In [4]:
args = {
    "message":["Large Language Models", "Assessing Language Models", "AI safety and reliability"],
    "n_results":3
}

from autosearch.functions.arxiv_retriever import ArxivRetriever, arxiv_retriever
from typing import get_type_hints

# Usage example
arxiv_retriever = ArxivRetriever(project_config=project_config)
# Get the function details
function_details = arxiv_retriever.get_function_details()
print(function_details)

# Now you can use function_details with your agent
# agent.equip_function(function_details)

# Or use the function directly

arxiv_retriever.func(**args)
# arxiv_retriever(project_config, message, n_results=3)

{'name': 'arxiv_retriever', 'description': 'Retrieve summeries of papers from arxiv for give query.', 'func': <function ArxivRetriever.__init__.<locals>.wrapper_func at 0x7f4522237ce0>}


'Dear Researcher, Database updated with on the following topics: Large Language Models, Assessing Language Models, AI safety and reliability. Please go ahead with your task.'

## get_pdfs

In [5]:
from autosearch.functions.get_pdfs import GetPDFs

args = {
"urls": ["http://arxiv.org/pdf/2305.13267v1", "http://arxiv.org/pdf/2305.06530v1"],
"reasons": ['factual_check'] * 2
}
   
# Usage example
get_pdfs_function = GetPDFs(project_config=project_config)
# Get the function details
function_details = get_pdfs_function.get_function_details()
print(function_details)

# Now you can use function_details with your agent
# agent.equip_function(function_details)

# Or use the function directly
result = get_pdfs_function.func(**args)
print(result)

Created 14 docs with a total of 5652 tokens. Largest doc has 2302 tokens.
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
Created 23 docs with a total of 13792 tokens. Largest doc has 2992 tokens.
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Insert of existing embedding ID: 4
Add of existing embedding ID: 4


[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m


Insert of existing embedding ID: 5
Add of existing embedding ID: 5


[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Insert of existing embedding ID: 4
Add of existing embedding ID: 4


[31m
>>>>>>>> USING AUTO REPLY...[0m


Insert of existing embedding ID: 6
Add of existing embedding ID: 6


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Insert of existing embedding ID: 5
Add of existing embedding ID: 5


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Insert of existing embedding ID: 10
Add of existing embedding ID: 10


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
2 articles have been read, so far.
The articles 
 Enhance Reasoning Ability of Visual-Language Models via Large Language Models [http://arxiv.org/pdf/2305.13267v1] updated on 2023-05-22T17:33:44+00:00, and 
How Good are Commercial Large Language Models on African Languages? [http://arxiv.org/pdf/2305.06530v1] up

## get_pdf

In [4]:
from autosearch.functions.get_pdf import GetPDF, get_pdf
from typing import get_type_hints
args = {
"url": "https://arxiv.org/pdf/2110.13711",
"reason": "factual_check",
"part": "full"
}

# Usage example
get_pdfs_function = GetPDF(project_config=project_config)
# Get the function details
function_details = get_pdfs_function.get_function_details()
print(function_details)

# Now you can use function_details with your agent
# agent.equip_function(function_details)

# Or use the function directly
# result = get_pdfs_function.func(**args)
hits = get_type_hints(get_pdf, include_extras=True)
hits


NameError: name 'project_config' is not defined

## chunk_pdf

In [16]:
# papers = [f for f in os.listdir("./papers") if os.path.isfile(os.path.join(f"./papers", f))]
metadata = {
    'pdf_url':"https://doi.org/10.1016/j.mser.2020.100595",
    'title':'Polymer Informatics: Current Status and Critical Next Steps',
    'authors':'Lihua Chena,Ghanshyam Pilaniab,Rohit Batrac,Tran Doan Huana,Chiho Kima,Christopher Kuennetha,Rampi Ramprasad',
    'published':'2020-03-01',
    'updated':'2020-03-01'
    }
pdf_file = "/home/alibina/repo/usecases/autosearch/notebooks/papers/1-s2.0-S0927796X2030053X-am.pdf"


config = {
    'paper_db': paper_db,
    'doc_analyzer': analyzer,
    'project_dir': Project_dir,
    'db_dir': db_dir,
    'config_list': config_list,
    'initiate_db': initiate_db
}


chunk_pdf(pdf_file, metadata, config)

Created 54 docs with a total of 31570 tokens. Largest doc has 2947 tokens.
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 40
Insert of existing embedding ID: 40


[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 40
Insert of existing embedding ID: 40


[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 41
Insert of existing embedding ID: 41


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 41
Insert of existing embedding ID: 41


[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 42
Insert of existing embedding ID: 42


[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 42
Insert of existing embedding ID: 42


[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m


Add of existing embedding ID: 43


[31m
>>>>>>>> USING AUTO REPLY...[0m


Insert of existing embedding ID: 43


[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m

Add of existing embedding ID: 43
Insert of existing embedding ID: 43



[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m


Add of existing embedding ID: 43


[92m    Location = ./project_test/db/uid_text_dict.pkl[0m


Insert of existing embedding ID: 43


[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 46
Insert of existing embedding ID: 46
Add of existing embedding ID: 46


[31m
>>>>>>>> USING AUTO REPLY...[0m


Insert of existing embedding ID: 46


[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 46
Insert of existing embedding ID: 46


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 48
Insert of existing embedding ID: 48


[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 49
Insert of existing embedding ID: 49


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m


Add of existing embedding ID: 54
Insert of existing embedding ID: 54


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 57
Insert of existing embedding ID: 57


[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 60
Insert of existing embedding ID: 60


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


Add of existing embedding ID: 62
Insert of existing embedding ID: 62


[31m
>>>>>>>> USING AUTO REPLY...[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m


## url_check

In [5]:
from autosearch.functions.url_check import UrlCheck

args = {
    "paper_url": "https://arxiv.org/pdf/2107.03012.pdf",
    "paper_title": "From algebra to analysis: new proofs of theorems by Ritt and Seidenberg"
}

# Usage example
get_pdfs_function = UrlCheck(project_config=project_config)
# Get the function details
function_details = get_pdfs_function.get_function_details()
print(function_details)

# Now you can use function_details with your agent
# agent.equip_function(function_details)

# Or use the function directly
url_check_res, message = get_pdfs_function.func(**args)
print(url_check_res, message)

{'name': 'url_check', 'description': 'Retrieve and process a PDF file from a given URL.', 'func': <function url_check at 0x7ff0ec8811c0>}


True The provided paper URL is from arxiv.org and is for the paper titled 'From algebra to analysis: new proofs of theorems by Ritt and Seidenberg'.


## factal check

In [6]:
from autosearch.functions.factual_check import FactualCheck


# Usage example
get_pdfs_function = FactualCheck(project_config=project_config)
# Get the function details
function_details = get_pdfs_function.get_function_details()
print(function_details)

# Now you can use function_details with your agent
# agent.equip_function(function_details)



args = [
    {
        "text": "The use of neural networks in quantum chemistry, particularly in predicting the properties of molecules and materials, has seen significant advancements. Machine learning models can now compute electronic properties [1][2] and potential energy surfaces [3] with an accuracy that contests traditional quantum chemical methods. Notably, the works of Smith et al. (2017) and Chmiela et al. (2017) have demonstrated how neural networks can predict molecular energies and forces, a process traditionally monopolized by density functional theory (DFT) but at a fraction of the computational cost.",
        "paper_title": "ANI-1: an extensible neural network potential with DFT accuracy at force field computational cost",
        "paper_url": "https://pubs.rsc.org/en/content/articlelanding/2017/sc/c6sc05720a",
        "reason": "To check the accuracy of the advancement statement regarding neural networks predicting properties in quantum chemistry."
    },
    {
        "text": "One major limitation is the quality and quantity of data required to effectively train neural networks. Accurate and diverse datasets of molecular structures and their corresponding properties are essential for developing reliable models, but such data can be scarce and expensive to produce due to the computational resources needed for high-level quantum mechanical calculations.",
        "paper_title": "Machine learning of accurate energy-conserving molecular force fields",
        "paper_url": "http://arxiv.org/pdf/2101.02930v1",
        "reason": "To validate the challenges related to the quality and quantity of data in the context of neural networks within quantum chemistry."
    },
    {
        "text": "The collaboration between these fields also birthed differentiable programming frameworks for quantum chemistry, like TorchANI and TensorMol [4]. These latter-day frameworks enable researchers to quickly prototype neural networks that learn quantum mechanical laws directly from data, providing a valuable tool to accelerate discovery. This integration has offered promising results in tasks like molecular dynamics simulations, which are key for understanding chemical reactions and material properties.", 
        "paper_title": "Automated Calculation of Thermal Rate Coefficients using Ring Polymer Molecular Dynamics and Machine-Learning Interatomic Potentials with Active Learning", 
        "paper_url": "http://arxiv.org/pdf/1805.11924v3", 
        "reason": "To confirm the factual information about differentiable programming frameworks and their impact on research in quantum chemistry and neural networks."
    }

]

for arg in args:    
    print(get_pdfs_function.func(**arg))

{'name': 'factual_check', 'description': 'Retrieve and process a PDF file from a given URL.', 'func': functools.partial(<function factual_check at 0x7ff0994485e0>, global_config={'paper_db': <autosearch.database.paper_database.PaperDatabase object at 0x7ff0995f6210>, 'doc_analyzer': <autosearch.analysis.document_analyzer.DocumentAnalyzer object at 0x7ff09955da00>, 'project_dir': './project_test', 'db_dir': './project_test/db', 'config_list': [{'model': 'gpt-4', 'api_key': '38ae4759658a4466b454666531283601', 'api_type': 'azure', 'base_url': 'https://aoai-gpt4-505.openai.azure.com/', 'api_version': '2023-08-01-preview'}, {'model': 'gpt-4-32k', 'api_key': '38ae4759658a4466b454666531283601', 'base_url': 'https://aoai-gpt4-505.openai.azure.com/', 'api_type': 'azure', 'api_version': '2023-08-01-preview'}], 'initiate_db': False})}
The provided paper URL, https://pubs.rsc.org/en/content/articlelanding/2017/sc/c6sc05720a, is not from arxiv.org. Please provide a valid arxiv URL.


The provided paper URL, http://arxiv.org/pdf/2101.02930v1, is not for the paper titled 'Machine learning of accurate energy-conserving molecular force fields'. Please provide a valid arxiv URL for the paper.
Created 15 docs with a total of 11954 tokens. Largest doc has 2993 tokens.
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m
[92m    Location = ./project_test/db/uid_text_dict.pkl[0m


Add of existing embedding ID: 4
Add of existing embedding ID: 5
Add of existing embedding ID: 4
Add of existing embedding ID: 6
Add of existing embedding ID: 5
Add of existing embedding ID: 10
Add of existing embedding ID: 26
Add of existing embedding ID: 27
Add of existing embedding ID: 28
Add of existing embedding ID: 38
Insert of existing embedding ID: 45


[31m
>>>>>>>> USING AUTO REPLY...[0m


Insert of existing embedding ID: 46


[31m
>>>>>>>> USING AUTO REPLY...[0m


Insert of existing embedding ID: 47


[31m
>>>>>>>> USING AUTO REPLY...[0m


Insert of existing embedding ID: 48


[31m
>>>>>>>> USING AUTO REPLY...[0m


Insert of existing embedding ID: 49


[31m
>>>>>>>> USING AUTO REPLY...[0m


Insert of existing embedding ID: 50


[31m
>>>>>>>> USING AUTO REPLY...[0m


Insert of existing embedding ID: 51


[31m
>>>>>>>> USING AUTO REPLY...[0m


Insert of existing embedding ID: 52


[31m
>>>>>>>> USING AUTO REPLY...[0m
max token limit: 31744
factual_check model: gpt-4-32k
[33mfactual_checker_user[0m (to factual_checker):


Below, you will find a passage labeled "TEXT" that references a specific paper: 'Automated Calculation of Thermal Rate Coefficients using Ring Polymer Molecular Dynamics and Machine-Learning Interatomic Potentials with Active Learning [http://arxiv.org/pdf/1805.11924v3]' alongside its corresponding "PAPER_CONTENT." Your task is to read the "PAPER_CONTENT" and verify the factual accuracy of the "TEXT" as it pertains to the paper.

Once you have assessed the factual accuracy, you MUST provide feedback, begining with 'FEEDBACK:'. Following your assessment, please write a summary of the paper. Begin this summary with 'Summary of Automated Calculation of Thermal Rate Coefficients using Ring Polymer Molecular Dynamics and Machine-Learning Interatomic Potentials with Active Learning [http://arxiv.org/pdf/1805.11924v3]: '

TEXT:
The collaboration be