#### Simple example of creating a data agent as a tool

In [1]:
import openai
from llama_index.agent import OpenAIAgent
from dotenv import load_dotenv
import os


In [2]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

In [3]:
openai.api_key = api_key

In [4]:
from llama_index.tools.tool_spec.base import BaseToolSpec
import datetime

class DateToolSpec(BaseToolSpec):
    spec_functions = ['get_date']
    
    def get_date(self):
        "A tool to return todays date"
        return datetime.date.today()

In [5]:
example_agent = OpenAIAgent.from_tools(DateToolSpec().to_tool_list(), verbose=True)
print(example_agent.chat("What is the date?"))

=== Calling Function ===
Calling function: get_date with args: {}
Got output: 2023-09-18
Today's date is September 18, 2023.


### Focusing on building an Advanced Search Tool 

this tool will allow agents to search and retrieve information from PubMed

In [59]:
from llama_index.tools.tool_spec.base import BaseToolSpec
import requests
from typing import Optional, List
import xml.etree.ElementTree as ET

SEARCH_URL_TMPL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&retmode=json&usehistory=y&term={term}&retmax={max_results}"


FETCH_URL_TMPL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&&id={ids}"

class PubMedSearchToolSpec(BaseToolSpec):
    
    spec_functions = ['search']
    
    def search(self, term: str, max_results: Optional[int] = 20): 
        """
        Search PubMed for biomedical research articles relating to a search term and receive the title and abstracts of those articles.

        args:
            term (str): the search term to filter PubMed articles 
            max_results Optional(int): the number of results to return from PubMed 

        """
        response = requests.post(SEARCH_URL_TMPL.format(term=term, max_results=max_results)).json()
        ids = response['esearchresult']['idlist']
        return self.retrieve_documents(ids) 
    
    def retrieve_documents(self, ids: List[str]): 
        response = requests.post(
            FETCH_URL_TMPL.format(ids=",".join(ids))
        ).text
        return self.parse_documents(response)
    
    def parse_documents(self, documents: str):
        documents_tree = ET.fromstring(documents)
        parsed_documents = []
        for document in documents_tree: 
            article = document[0].find('Article')
            title = article.find('ArticleTitle').text
            abstract = article.find('Abstract')
            
            abstract_text = ""
            if abstract is not None:
                abstract_text_elem = abstract.find('AbstractText')
                if abstract_text_elem is not None:
                    abstract_text = abstract_text_elem.text
                
                copyright_elem = abstract.find('CopyrightInformation')
                if copyright_elem:
                    copyright = copyright_elem.text
                else: 
                    copyright = None
            else:
                copyright = None
            
            date = article.find('ArticleDate')
            if date: 
                date = f"{date.find('Year').text}-{date.find('Month').text}-{date.find('Day').text}"
            else: 
                date = None
            
            parsed_documents.append({
                'date': date,
                'title': title,
                'abstract': abstract_text,
                'authors': [x.text for x in article.findall('AuthorList/Author/LastName')],
                'journal': article.find('Journal/Title').text,
                'doi': article.find('ELocationID').text,
                'copyright': copyright
            })

        return parsed_documents

            


tool = PubMedSearchToolSpec()
tool.search('nitrogen')

[{'date': '2023-09-18',
  'title': 'A Smartphone-Based Sensing for Portable and Sensitive Visual Detection of Hg (II) via Nitrogen Doped Carbon Quantum Dots Modified Paper Strip.',
  'abstract': 'The development of portable and cost-effective sensing system for Hg',
  'authors': ['Yin', 'Zhou', 'Guo', 'Sun', 'Zhu', 'Wang', 'Ma', 'Zhang'],
  'journal': 'Journal of fluorescence',
  'doi': '10.1007/s10895-023-03439-1',
  'copyright': None},
 {'date': '2023-09-18',
  'title': 'Modified physiology of burley tobacco plants genetically engineered to express Yb',
  'abstract': 'Transgenic overexpression of a NtEGY2 gene restores normal green color of burley tobacco plants, but does not increase nitrogen utilization efficiency beyond that exhibited by wild-type individuals. Nitrogen physiology is important in tobacco because of its role in generation of leaf yield and accumulation of nitrogen-containing alkaloids that can react with nitrosating agents in the formation of carcinogenic tobacco-sp

In [60]:
agent = OpenAIAgent.from_tools(PubMedSearchToolSpec().to_tool_list(), verbose=True)

In [62]:
print(agent.chat('what are the latest developments on curing cancer at the beginning of september 2023?'))

=== Calling Function ===
Calling function: search with args: {
  "term": "curing cancer",
  "max_results": 5
}
Got output: [{'date': '2023-09-11', 'title': 'Testicular tumours in children: a single-centre experience.', 'abstract': 'Testicular tumours in childhood have diverse characteristics for different age ranges. This study aimed to describe the pattern, presentation and outcomes of primary testicular tumours in a paediatric population.', 'authors': ['Ali', 'Latif', 'Sheikh', 'Perveen', 'Bilal', 'Sarwar'], 'journal': 'Singapore medical journal', 'doi': '10.4103/singaporemedj.SMJ-2021-380', 'copyright': None}, {'date': '2023-09-16', 'title': 'Ototoxicity associated with high-dose carboplatin for patients with previously treated germ cell tumors.', 'abstract': 'High-dose carboplatin is an essential part of curative high-dose chemotherapy (HDCT) for patients with previously treated germ cell tumors (GCTs). Although hearing loss (HL) is a known side effect of HDCT, data on its severity