In [None]:

from autosearch.functions.text_analysis import chunk_pdf

from autosearch.database.paper_database import PaperDatabase
from autosearch.analysis.document_analyzer import DocumentAnalyzer
from autosearch.functions.create_teachable_groupchat import create_teachable_groupchat

import autogen
from typing import List, Dict, Any


In [None]:
import os
from dotenv import load_dotenv
from azure.core.exceptions import HttpResponseError

# Load environment variables
load_dotenv()

# Retrieve Azure credentials from environment variables
api_key = os.getenv("DOCUMENT_INTELLIGENCE_KEY")
endpoint = os.getenv("DOCUMENT_INTELLIGENCE_ENDPOINT")

In [None]:
Project_dir = "./project_test"
os.makedirs(Project_dir, exist_ok=True)
paperdb_dir = f"{Project_dir}/paperdb"
db_dir = f"{Project_dir}/db"  
global initiate_db 
initiate_db = False
config_list = autogen.config_list_from_json(
    "OAI_CONFIG_LIST",
    file_location=".",
    filter_dict={
        "model": ["gpt-4", "gpt-4-32k"]#, "gpt-4", "gpt-35-turbo-16k", "gpt-4-0613", "gpt-3.5-turbo", "gpt-35-turbo", "gpt-35-turbo-0613"]
    },
)


llm_config = {
    "config_list": config_list,  # config_list should be defined or imported
    "timeout": 120,
    # "seed": 42,
}

paper_db = PaperDatabase(paperdb_dir)

# Initialize the DocumentAnalyzer
analyzer = DocumentAnalyzer(api_key, endpoint, project_dir=Project_dir)


project_config = {
    "paper_db": paper_db,
    "doc_analyzer": analyzer,
    "project_dir": Project_dir,
    "db_dir": db_dir,
    "config_list": config_list,
    "initiate_db": initiate_db
}


## create_teachable_groupchat

In [None]:
# initiate_db
prompt = """
For each memorization task, initiate your process with 'MEMORIZE_ARTICLE:'  
Delve into the passage to discern and assess its key insights. If the content presents noteworthy information, make a point to memorize these details. 
Conversely, if the passage does not offer significant insights, there's no need to commit it to memory. 
Upon choosing to memorize, you MUST finalize your notes by including both the article's title and its URL, employing the format '[source: article_title, article_url]' for efficient future access and verification.
"""
instract_assistant, instract_user = create_teachable_groupchat("instract_assistant", "instract_user", db_dir, config_list, verbosity=3)

instract_user.initiate_chat(instract_assistant, silent=True, message=prompt)




## Arxiv retieval, Arxiv search

In [None]:
text = "Human-Centred Learning Analytics and AI in Education: a Systematic Literature Review"

from autosearch.functions.academic_search import AcademicSearch


# Usage example
function = AcademicSearch(project_config=project_config)
# Get the function details
function_details = function.get_function_details()
print(function_details)

# Now you can use function_details with your agent
# agent.equip_function(function_details)

# Or use the function directly

function.func(text)


In [None]:
args = {
    "message":["Large Language Models", "Assessing Language Models", "AI safety and reliability"],
    "n_results":3
}

from autosearch.functions.academic_retriever import AcademicRetriever , academic_retriever
from typing import get_type_hints

# Usage example
academic_retriever = AcademicRetriever   (project_config=project_config)
# Get the function details
function_details = academic_retriever.get_function_details()
print(function_details)

# Now you can use function_details with your agent
# agent.equip_function(function_details)

# Or use the function directly

academic_retriever.func(**args)
# academic_retriever(project_config, message, n_results=3)

## get_pdfs

In [None]:
from autosearch.functions.get_pdfs import GetPDFs

args = {
"urls": ["http://arxiv.org/pdf/2305.13267v1", "http://arxiv.org/pdf/2305.06530v1"],
"reasons": ['factual_check'] * 2
}
   
# Usage example
get_pdfs_function = GetPDFs(project_config=project_config)
# Get the function details
function_details = get_pdfs_function.get_function_details()
print(function_details)

# Now you can use function_details with your agent
# agent.equip_function(function_details)

# Or use the function directly
result = get_pdfs_function.func(**args)
print(result)

## get_pdf

In [None]:
from autosearch.functions.get_pdf import GetPDF, get_pdf
from typing import get_type_hints
args = {
"url": "https://arxiv.org/pdf/2110.13711",
"reason": "factual_check",
"part": "full"
}

# Usage example
get_pdfs_function = GetPDF(project_config=project_config)
# Get the function details
function_details = get_pdfs_function.get_function_details()
print(function_details)

# Now you can use function_details with your agent
# agent.equip_function(function_details)

# Or use the function directly
# result = get_pdfs_function.func(**args)
hits = get_type_hints(get_pdf, include_extras=True)
hits


## chunk_pdf

In [None]:
# papers = [f for f in os.listdir("./papers") if os.path.isfile(os.path.join(f"./papers", f))]
metadata = {
    'pdf_url':"https://doi.org/10.1016/j.mser.2020.100595",
    'title':'Polymer Informatics: Current Status and Critical Next Steps',
    'authors':'Lihua Chena,Ghanshyam Pilaniab,Rohit Batrac,Tran Doan Huana,Chiho Kima,Christopher Kuennetha,Rampi Ramprasad',
    'published':'2020-03-01',
    'updated':'2020-03-01'
    }
pdf_file = "/home/alibina/repo/usecases/autosearch/notebooks/papers/1-s2.0-S0927796X2030053X-am.pdf"


config = {
    'paper_db': paper_db,
    'doc_analyzer': analyzer,
    'project_dir': Project_dir,
    'db_dir': db_dir,
    'config_list': config_list,
    'initiate_db': initiate_db
}


chunk_pdf(pdf_file, metadata, config)

## url_check

In [None]:
from autosearch.functions.url_check import UrlCheck

args = {
    "paper_url": "https://arxiv.org/pdf/2107.03012.pdf",
    "paper_title": "From algebra to analysis: new proofs of theorems by Ritt and Seidenberg"
}

# Usage example
get_pdfs_function = UrlCheck(project_config=project_config)
# Get the function details
function_details = get_pdfs_function.get_function_details()
print(function_details)

# Now you can use function_details with your agent
# agent.equip_function(function_details)

# Or use the function directly
url_check_res, message = get_pdfs_function.func(**args)
print(url_check_res, message)

## factal check

In [None]:
from autosearch.functions.factual_check import FactualCheck


# Usage example
get_pdfs_function = FactualCheck(project_config=project_config)
# Get the function details
function_details = get_pdfs_function.get_function_details()
print(function_details)

# Now you can use function_details with your agent
# agent.equip_function(function_details)



args = [
    {
        "text": "The use of neural networks in quantum chemistry, particularly in predicting the properties of molecules and materials, has seen significant advancements. Machine learning models can now compute electronic properties [1][2] and potential energy surfaces [3] with an accuracy that contests traditional quantum chemical methods. Notably, the works of Smith et al. (2017) and Chmiela et al. (2017) have demonstrated how neural networks can predict molecular energies and forces, a process traditionally monopolized by density functional theory (DFT) but at a fraction of the computational cost.",
        "paper_title": "ANI-1: an extensible neural network potential with DFT accuracy at force field computational cost",
        "paper_url": "https://pubs.rsc.org/en/content/articlelanding/2017/sc/c6sc05720a",
        "reason": "To check the accuracy of the advancement statement regarding neural networks predicting properties in quantum chemistry."
    },
    {
        "text": "One major limitation is the quality and quantity of data required to effectively train neural networks. Accurate and diverse datasets of molecular structures and their corresponding properties are essential for developing reliable models, but such data can be scarce and expensive to produce due to the computational resources needed for high-level quantum mechanical calculations.",
        "paper_title": "Machine learning of accurate energy-conserving molecular force fields",
        "paper_url": "http://arxiv.org/pdf/2101.02930v1",
        "reason": "To validate the challenges related to the quality and quantity of data in the context of neural networks within quantum chemistry."
    },
    {
        "text": "The collaboration between these fields also birthed differentiable programming frameworks for quantum chemistry, like TorchANI and TensorMol [4]. These latter-day frameworks enable researchers to quickly prototype neural networks that learn quantum mechanical laws directly from data, providing a valuable tool to accelerate discovery. This integration has offered promising results in tasks like molecular dynamics simulations, which are key for understanding chemical reactions and material properties.", 
        "paper_title": "Automated Calculation of Thermal Rate Coefficients using Ring Polymer Molecular Dynamics and Machine-Learning Interatomic Potentials with Active Learning", 
        "paper_url": "http://arxiv.org/pdf/1805.11924v3", 
        "reason": "To confirm the factual information about differentiable programming frameworks and their impact on research in quantum chemistry and neural networks."
    }

]

for arg in args:    
    print(get_pdfs_function.func(**arg))