# Alhazen Toolkit   

> A set of Langchain tools that populate and query a CEIFNS database by (1) Build collections of expressions; (2) locate and load items that represent expressions; (3) segregate the parts of items as 'fragments'; (4) analyze the fragments to generate notes that can then be summarized to provide summaries. 

In [None]:
#| default_exp toolkit

In [2]:
#| hide
from nbdev import *

In [8]:
#| export

import local_resources.linkml as linkml

from alhazen.core import OllamaRunner
from alhazen.tools.basic import *
from alhazen.tools.metadata_extraction_tool import MetadataExtractionTool, MetadataExtractionWithRAGTool 
from alhazen.tools.paperqa_emulation_tool import PaperQAEmulationTool 
from alhazen.utils.ceifns_db import *

from langchain.tools import BaseTool
from langchain.pydantic_v1 import BaseModel, Extra, Field, root_validator
from langchain.schema.prompt_template import format_document

from importlib_resources import files
import local_resources.prompt_elements as prompt_elements

from sqlalchemy.orm import sessionmaker
from typing import List

In [None]:
#| export
# NOTE - Use LangChain's SQL_DATABASE TOOLKIT AS A MODEL 
# https://github.com/langchain-ai/langchain/blob/535db72607c4ae308566ede4af65295967bb33a8/libs/community/langchain_community/agent_toolkits/sql/toolkit.py#L18
#
# Use environment variables to denote the database name + base file location
# os.environ['ALHAZEN_DB_NAME'] = 'em_tech'
# os.environ['LOCAL_FILE_PATH'] = '/Users/gburns/alhazen/'

class AlhazenToolkit(BaseModel):
    '''Toolkit for building and querying an Alhazen CEIFNS (pron. 'SAI-FiNS') database 
    (CEIFNS = Collection-Expression-Item-Fragment-Note-Summary).'''

    # The local literature database (Collections, Expressions, Items, and Fragments)
    db: Ceifns_LiteratureDb = Field(exclude=True)
    ollr : OllamaRunner = Field(exclude=True)
    
    class Config:
        """Configuration for this pydantic object."""
        arbitrary_types_allowed = True

    def get_tools(self) -> List[BaseTool]:
        """Get the tools in the toolkit."""
        add_collection_tool_description = (
            "This tool executes a search for scientific papers in the EPMC database based on a query"
            " and then builds a collection out of the papers returned. "
            "Input to this tool has three parameters: \n"
            "- 'id' which is string that denotes the identifier of the collection in the database.\n"
            "- 'query' which is string that defines a query using Boolean logic for search terms.\n"
            "- 'name' which is a string that defines a descriptive name for the collection.\n"
            "The tool will execute an query over the remote database, create the collection and add papers to the collection to our local database."
            "If successful, it will return 'Final Answer'. If not, it will return an error report."
        )
        add_collection_tool = AddCollectionFromEPMCTool(db=self.db, description=add_collection_tool_description)
        
        describe_collection_tool_description = (
            "This tool describes the contents of a collection in the database. "
            "Input to this tool has one parameters: \n"
            "- 'id' which is string that denotes the identifier of the collection in the database.\n"
            "If successful, it will return 'Final Answer'. If not, it will return an error report."
        )
        describe_collection_tool = DescribeCollectionCompositionTool(db=self.db, description=describe_collection_tool_description)

        delete_collection_tool_description = (
            "This tool deletes a collection from the database. "
            "Input to this tool has one parameters: \n"
            "- 'id' which is string that denotes the identifier of the collection in the database.\n"
            "The tool will delete the collection from our local database."
            "If successful, it will return 'Final Answer'. If not, it will return an error report."
        )
        delete_collection_tool = DeleteCollectionTool(db=self.db, description=delete_collection_tool_description)

        retrieve_full_text_tool_description = (
            "This tool invokes a web search for a copy of a full text paper from the web given a doi identifier. "
            "Input to this tool has one parameters: \n"
            "- 'paper_id' which is a string that denotes the doi identifier of the paper in question.\n"
            "The tool will search online for the paper, return it and add it's text to the database."
            "If successful, it will return 'Final Answer'. If not, it will return an error report."
        )
        retrieve_full_text_tool = RetrieveFullTextTool(db=self.db, description=retrieve_full_text_tool_description)

        metadata_extraction_tool_description = (
            "Input to this tool is a doi identifier, a search term for section titles in "
            "the paper, and the name of a type of experiment (drawn from a predefined list). "
            "The tool will execute an LLM over the paper to extract metadata from available text "
            " and then insert the metadata into the database. The output is a "
            "string that returns a completion message (either positive or an error report)."
        )
        metadata_extraction_tool = MetadataExtractionTool(
            db=self.db, ollr=self.ollr, llm=self.ollr.llm, description=metadata_extraction_tool_description
        )

        paperqa_emulation_tool_description = (
            "Input to this tool is a question, the ID value for the collection that the question "
            "will be asked over (collection_id), the number of documents to be sampled (n_sample_size), "
            "the number of documents to be synthesized in the final analysis."
            "The tool will execute an Map-Reduce RAG pipeline to query the vector store, and then "
            "summarize the information returned to write a response to the question."
        )
        paperqa_emulation_tool = PaperQAEmulationTool(
            db=self.db, ollr=self.ollr, llm=self.ollr.llm, description=paperqa_emulation_tool_description
        )
                
        return [
            add_collection_tool,
            describe_collection_tool,
            delete_collection_tool,
            retrieve_full_text_tool,
            metadata_extraction_tool,
            paperqa_emulation_tool
        ]

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        if self.db.session is None:
            session_class = sessionmaker(bind=self.db.engine)
            self.db.session = session_class()