# Setup

In [None]:
# Installs

%pip install -qU langchain-community langchain-core==0.2.40 langchain_experimental langchain-google-community langchain-google-genai langchain-huggingface==0.0.3 langchain-openai langchain-qdrant langchain-google-vertexai
%pip install -qU -q chainlit==1.1.302
%pip install -qU -q docx2txt
%pip install -qU google-cloud-aiplatform
%pip install -qU google-cloud-discoveryengine
%pip install -qU nltk
%pip install -qU openpyxl
%pip install -qU pymupdf
%pip install -qU python-dotenv
%pip install -qU ragas==0.1.20
%pip install -qU tqdm

# Verify installed packages have compatible dependencies
%pip check

In [None]:
# Download punkt_tab module that is used for sentence tokenizaiton

import nltk

nltk.download('punkt_tab')

# Environment Variables

In [15]:
# Get environment variables

import os
from dotenv import load_dotenv
import uuid

# Load environment variables from .env file
load_dotenv()

True

# Google Auth

In [16]:
# Set Google user permissions

import os
import sys

# If running in Colab, use the permissions of the currently authenticated user
if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

# If not, set the GOOGLE_APPLICATION_CREDENTIALS to the service account credentials file 
else:
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = 'credentials.json'

#####

def test_google_perms():
    from google.cloud import storage

    # Now, you can use the Google Cloud client libraries
    client = storage.Client()

    # List all buckets in your project
    buckets = list(client.list_buckets())
    print(buckets)

test_google_perms()

[<Bucket: 395640738565_us_import_content_with_faq_csv>, <Bucket: aie4-demo-docs>]


In [17]:
# Initialize Vertex AI

import vertexai

vertexai.init(project=os.environ['PROJECT_ID'], location=os.environ['REGION'])

In [18]:
# Verfify that our Google API key works

import requests

GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']
url = f'https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={GOOGLE_API_KEY}'
  
headers = {
    "Content-Type": "application/json",
}
  
data = {"contents":[{"parts":[{"text":"Explain how AI works"}]}]}

response = requests.post(url, headers=headers, json=data)
print(response.text)

{
  "candidates": [
    {
      "content": {
        "parts": [
          {
            "text": "## Understanding the Magic Behind AI: A Simple Explanation\n\nAI, or Artificial Intelligence, might sound like something out of a sci-fi movie, but it's actually based on some pretty straightforward principles. Here's a simplified breakdown:\n\n**1. Learning from Data:**\n\n* **Think of a baby learning to walk:** They observe, try, fall, and eventually learn the right movements.\n* **AI does the same, but with data:** It's fed vast amounts of information (like images, text, or code) and analyzes it to identify patterns and relationships.\n* **The more data it receives, the better it understands and learns.**\n\n**2. Algorithms: The Recipe for Intelligence:**\n\n* **Algorithms are like instructions, guiding AI on how to process and interpret data.**\n* **They define the \"thinking process\" for the AI, determining how it analyzes information and reaches conclusions.**\n* **Think of an algori

# Utils

In [None]:
# Load the docs

# TODO - add TextLoader refernece
# TODO - add CSVLoader reference
# https://python.langchain.com/docs/integrations/document_loaders/pymupdf/
# https://python.langchain.com/docs/integrations/document_loaders/microsoft_word/

from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.document_loaders import Docx2txtLoader

def process_file(path: str):

    docs = None

    # Select the right loader
    if 'txt' in path.lower():
        loader = TextLoader(path)
    elif 'pdf' in path.lower():
        loader = PyMuPDFLoader(path)
    elif 'docx' in path.lower():
        loader = Docx2txtLoader(path)
    else:
        print(f'No document loader found for {path}')

    docs = loader.load()

    return docs

#####

def test_process_file():
    doc = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')
    print(doc[0].metadata)

test_process_file()

In [None]:
# Create embeddings using Hugging Face 

from langchain_huggingface import HuggingFaceEmbeddings

def create_embeddings_huggingface(model='Snowflake/snowflake-arctic-embed-m') -> HuggingFaceEmbeddings:

    # Initialize the OpenAIEmbeddings class
    embeddings = HuggingFaceEmbeddings(model_name=model)

    return embeddings

#####

def test_create_embeddings_huggingface():
    text = 'What is my benefit for acupuncture?'
    embeddings = create_embeddings_huggingface()
    vector = embeddings.embed_query(text)
    print(vector)
    return embeddings

test_create_embeddings_huggingface()

In [None]:
# Create embeddings using OpenAI

from langchain_openai import OpenAIEmbeddings

def create_embeddings_openai(model='text-embedding-ada-002') -> OpenAIEmbeddings:

    # Initialize the OpenAIEmbeddings class
    embeddings = OpenAIEmbeddings(model=model)

    return embeddings

#####

def test_create_embeddings_openai():
    text = 'What is my benefit for acupuncture?'
    embeddings = create_embeddings_openai()
    vector = embeddings.embed_query(text)
    print(vector)
    return embeddings

test_create_embeddings_openai()

In [None]:
# Create embeddings using Vertex AI

# https://python.langchain.com/docs/integrations/text_embedding/google_vertex_ai_palm/

from langchain_google_vertexai import VertexAIEmbeddings

def create_embeddings_vertexai(model="text-embedding-004") -> VertexAIEmbeddings:

    # Initialize the VertexAIEmbeddings class
    embeddings = VertexAIEmbeddings(model_name='text-embedding-005')

    return embeddings

#####

def test_create_embeddings_vertexai():
    text = 'What is my benefit for acupuncture?'
    embeddings = create_embeddings_vertexai()
    vector = embeddings.embed_query(text)
    print(vector)
    return embeddings

test_create_embeddings_vertexai()

In [None]:
# Remove empty chunks 

def remove_empty_chunks(chunks_start: list) -> list:
    
    start = len(chunks_start)
    # print(f'start - {start} chunks')
    
    # Remove empty chunks
    chunks_end = [chunk for chunk in chunks_start if chunk.page_content.strip()]

    end = len(chunks_end)
    # print(f'end - {end} chunks')

    return chunks_end   

#####

from langchain.schema import Document

def test_remove_empty_chunks():
    docs = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')

    # Added a test doc
    doc = Document(
        page_content='',
        metadata=docs[0].metadata
    )
    docs.append(doc)
    print(len(docs))

    # Remove the empty doc (chunk)
    docs = remove_empty_chunks(docs)
    print(len(docs))

test_remove_empty_chunks()

In [None]:
# Create a text splitter using recursive character text splitter

# https://python.langchain.com/v0.1/docs/modules/data_connection/document_transformers/recursive_text_splitter/

from langchain.text_splitter import RecursiveCharacterTextSplitter

def chunk_docs_recursive(documents: list, chunk_size=500, chunk_overlap=50) -> list:

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)

    chunks_start = text_splitter.split_documents(documents)

    chunks_end = remove_empty_chunks(chunks_start=chunks_start)

    return chunks_end

#####

def test_chunk_docs_recursive(): 
    docs = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')
    chunks = chunk_docs_recursive(documents=docs)
    print(len(chunks))
    print(chunks[0].page_content)

test_chunk_docs_recursive()

In [None]:
# Create a text splitter using NLTK

# https://python.langchain.com/docs/how_to/split_by_token/

from langchain.text_splitter import NLTKTextSplitter

def chunk_docs_nltk(documents: list, chunk_size=500, chunk_overlap=50) -> list:

    text_splitter = NLTKTextSplitter(
    chunk_size=chunk_size, 
    chunk_overlap=chunk_overlap)

    chunks_start = text_splitter.split_documents(documents)

    chunks_end = remove_empty_chunks(chunks_start=chunks_start)

    return chunks_end

#####

def test_chunk_docs_nltk(): 
    docs = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')
    chunks = chunk_docs_nltk(documents=docs)
    print(len(chunks))
    print(chunks[0].page_content)

test_chunk_docs_nltk()


In [None]:
# Create a text splitter semantic chunking 

# https://python.langchain.com/v0.1/docs/modules/data_connection/document_transformers/semantic-chunker/

from langchain_experimental.text_splitter import SemanticChunker

def chunk_docs_semantic(documents: list, ) -> list:

    # TODO - Use embeddings parameter
    text_splitter = SemanticChunker(create_embeddings_openai(), breakpoint_threshold_type="percentile")

    chunks_start = text_splitter.split_documents(documents)

    # Remove empty chunks
    chunks_end = remove_empty_chunks(chunks_start)

    return chunks_end

#####

def test_chunk_docs_semantic():
    docs = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')
    chunks = chunk_docs_semantic(docs)
    print(len(chunks))
    print(chunks[0].page_content)

test_chunk_docs_semantic()

In [None]:
# Create a Qdrant vector store

from langchain_core.embeddings import Embeddings
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

def create_qdrant_vector_store(location: str, collection_name: str, vector_size: int, embeddings: Embeddings, documents: list) -> QdrantVectorStore:

    # Initialize the Qdrant client
    qdrant_client = QdrantClient(location=location)

    # Create a collection in Qdrant
    qdrant_client.create_collection(collection_name=collection_name, vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE))

    # Initialize QdrantVectorStore with the Qdrant client
    qdrant_vector_store = QdrantVectorStore(client=qdrant_client, collection_name=collection_name, embedding=embeddings)
    
    qdrant_vector_store.add_documents(documents)
    
    return qdrant_vector_store

#####

def test_create_qdrant_vector_store():
    embeddings = create_embeddings_openai()
    docs = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')
    print(len(docs))
    chunks = chunk_docs_recursive(docs)
    print(len(chunks))
    vector_store = create_qdrant_vector_store(":memory:", "test", 1536, embeddings, chunks)
    print(vector_store.collection_name)

test_create_qdrant_vector_store()

In [None]:
# Create a Qdrant retriever

# TODO - Add reference 

from langchain_core.retrievers import BaseRetriever
from langchain_qdrant import QdrantVectorStore

def create_retriever_qdrant(vector_store: QdrantVectorStore) -> BaseRetriever:

    retriever = vector_store.as_retriever()

    return retriever

#####

def test_create_retriever_qdrant(text: str = None):
    embeddings = create_embeddings_openai()
    docs = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')
    chunks = chunk_docs_recursive(docs)
    vector_store = create_qdrant_vector_store(":memory:", "test", 1536, embeddings, chunks)
    retriever = create_retriever_qdrant(vector_store)
    if text:
        docs = retriever.invoke(text)
        print(docs[0])

print('\nQDRANT')
test_create_retriever_qdrant('What is my benefit for acupuncture?')

In [None]:
# Create a Vertex AI retriever

# https://python.langchain.com/docs/integrations/retrievers/google_vertex_ai_search/

from langchain_google_community import VertexAISearchRetriever

def create_retriever_vertexai() -> VertexAISearchRetriever:

    retriever = VertexAISearchRetriever(project_id=os.environ['PROJECT_ID'], location_id=os.environ['LOCATION_ID'], data_store_id=os.environ['DATA_STORE_ID'], max_documents=3)

    return retriever

#####

def test_create_retriever_vertexai(text: str = None):
    retriever = create_retriever_vertexai()
    if text:
        docs = retriever.invoke(text)
        print(docs[0])

print('\nVERTEX AI')
test_create_retriever_vertexai('What is my benefit for acupuncture?')

In [None]:
# Create a prompt template

# https://python.langchain.com/v0.1/docs/modules/model_io/prompts/quick_start/#chatprompttemplate
# https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html

from langchain.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate

def create_chat_prompt_template(prompt: str = None) -> ChatPromptTemplate:
    
    template = '''
    You are a helpful conversational agent for the State of California.
    Your expertise is fully understanding the California Health & Wellness health  plan. 
    You need to answer questions posed by the member, who is trying to get answers about their health plan.  
    Your goal is to provide a helpful and detailed response, in at least 2-3 sentences. 

    You will be analyzing the health plan documents to derive a good answer, based on the following information:
    1. The question asked.
    2. The provided context, which comes from various documents of the pharmacy manuals repository. You will need to answer the question based on the provided context.

    Now it's your turn!

    {question}

    {context}

    '''
    
    prompt = PromptTemplate.from_template(template)

    return prompt

#####

def test_create_chat_prompt_template():
    prompt = create_chat_prompt_template()
    print(prompt)

test_create_chat_prompt_template()

In [None]:
# Create a Langchain chain..

# https://python.langchain.com/docs/integrations/llms/google_ai/
# https://python.langchain.com/docs/integrations/chat/google_generative_ai/
# https://ai.google.dev/gemini-api/docs/safety-settings 

from google.generativeai.types import HarmCategory, HarmBlockThreshold
from langchain_core.output_parsers import StrOutputParser
from langchain_core.retrievers import BaseRetriever
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI
from operator import itemgetter

def create_chain (model_name: str, prompt: ChatPromptTemplate, retriever: BaseRetriever):

    if "gemini" in model_name.lower():
        llm = ChatGoogleGenerativeAI(
            model=model_name,
            temperature=0,
            safety_settings={
                    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
                    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
                    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
                    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
                },
            )
    else:
        print("Unsuported model name")
        
    chain = (
        {"context": itemgetter("question") | retriever, "question": itemgetter("question")} 
        | RunnablePassthrough.assign(context=itemgetter("context")) 
        | {"response": prompt | llm, "context": itemgetter("context")}
        )

    return chain

#####

def test_create_chain_qdrant():
    embeddings = create_embeddings_openai()
    docs = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')
    chunks = chunk_docs_recursive(docs)
    vector_store = create_qdrant_vector_store(":memory:", "test", 1536, embeddings, chunks)
    retriever = create_retriever_qdrant(vector_store)
    chat_prompt_template = create_chat_prompt_template()
    chain = create_chain('gemini-1.5-flash', chat_prompt_template, retriever)
    result = chain.invoke({'question' : 'What is my benefit for acupuncture?'})
    print(result)

print('\nQDRANT')
test_create_chain_qdrant()

###

def test_create_chain_vertexai():
    retreiver = create_retriever_vertexai()
    chat_prompt_template = create_chat_prompt_template()
    chain = create_chain('gemini-1.5-flash', chat_prompt_template, retreiver)
    result = chain.invoke({'question' : 'What is my benefit for acupuncture?'})
    print(result)

print('\nVERTEX AI')
test_create_chain_vertexai()

In [56]:
# Generate answeers from a chain usin a list of questions

def generate_answers_contexts(chain, questions: list):
    
    answers = []
    contexts = []

    # Loop over the list of questions and call the chain to get the answer and context
    for question in questions:
        print(question)

        # Call the chain to get answers and contexts
        response = chain.invoke({"question" : question})
        print(response)
        
        # Capture the answer and context 
        answers.append(response["response"].content)
        contexts.append([context.page_content for context in response["context"]])

    return answers, contexts

In [None]:
# Run a Ragas evaluation 

from datasets import Dataset
from pandas import DataFrame
from ragas import evaluate
from ragas.metrics import (faithfulness, answer_relevancy, answer_correctness, context_recall, context_precision)

def run_ragas_evaluation(chain, questions: list, groundtruths: list, eval_metrics: list = [answer_correctness, answer_relevancy, context_recall, context_precision, faithfulness]):

  answers = []
  contexts = []
  answers, contexts = generate_answers_contexts(chain, questions)

  # Create the input dataset 
  input_dataset = Dataset.from_dict({
  "question" : questions,         # From the dataframe
  "answer" : answers,             # From the chain
  "contexts" : contexts,          # From the chain
  "ground_truth" : groundtruths   # From the dataframe
  })

  # Run the Ragas evaluation using the input dataset and eval metrics
  ragas_results = evaluate(input_dataset, eval_metrics)
  ragas_results_df = ragas_results.to_pandas()
  
  return ragas_results, ragas_results_df
  
  #####

def test_run_ragas_evaluation():
  print("test")    

test_run_ragas_evaluation()

In [None]:
# Query Vertex AI Search datastore using HTTP Post

import json
import requests
import google.auth
from google.auth.transport.requests import Request

credentials, project_id = google.auth.default()
credentials.refresh(Request())
access_token = credentials.token
print(access_token)

def query_chunks(query, n=5):
    
  if LOCATION_ID == 'us':
    api_endpoint = 'us-discoveryengine.googleapis.com'
  else:
    api_endpoint = 'discoveryengine.googleapis.com'

  url = f"https://{api_endpoint}/v1alpha/projects/{PROJECT_ID}/locations/{LOCATION_ID}/collections/default_collection/dataStores/{DATA_STORE_ID}/servingConfigs/default_search:search"
  print(url)
  
  headers = {
      "Authorization": f"Bearer {access_token}",
      "Content-Type": "application/json",
  }
  
  post_data = {
      "servingConfig": f"projects/{PROJECT_ID}/locations/{LOCATION_ID}/collections/default_collection/dataStores/{DATA_STORE_ID}/servingConfigs/default_search",
      "pageSize": n,
      "query": query,
      "contentSearchSpec": {"searchResultMode": "CHUNKS"},
  }
  
  response = requests.post(url, headers=headers, json=post_data)

  if response.status_code != 200:
    print(
        f"Error retrieving search results: {response.status_code} -"
        f" {response.text}"
    )

  return response.json()

#####

test = query_chunks('What is my benefit for acupuncture?')
print(test)

# Create RAG chain using Vertex AI vector store

In [None]:
# Build RAG chain using Vertex AI Agent Builder datastore

retreiver = create_retriever_vertexai()
chat_prompt_template = create_chat_prompt_template()
chain = create_chain('gemini-1.5-flash', chat_prompt_template, retreiver)

In [None]:
# Test the chain 

questions = ['What is my benefit for acupuncture?',
'Who should I call if I have an emergency?',
'What are the responsibilities of my PCP?',]

for question in questions:
    print(question)
    result = chain.invoke({"question" : question})
    print(result)
    print(result["response"].content)

# Generate answers from the golden Q&A

In [None]:
import pandas as pd

# Load the Golden Q&A
testset_df = pd.read_excel('golden_qa/KN Virtual Assist POC_08.09.24 1_mk.xlsx', 'Consolidated Golden QnA')

questions = testset_df["Question"].values.tolist()
questions = [str(question) for question in questions]

answers, contexts = generate_answers_contexts(chain, questions)

# Create customer testset and evaluate using Ragas

In [24]:
# Load the golden Q&A and get questions and ground truths

import pandas as pd

testset_df = pd.read_excel('golden_qa/KN Virtual Assist POC_08.09.24 1_mk.xlsx', 'Consolidated Golden QnA')

questions = testset_df["Question"].values.tolist()
questions = [str(question) for question in questions]

groundtruths = testset_df["Answer"].values.tolist()
groundtruths = [str(ground_truth) for ground_truth in groundtruths]

print("Writing customer_testset.csv")
testset_df.to_csv("testsets/customer_testset.csv")
testset_df

In [None]:
# Evaluate the customer testset using Ragas

import pandas as pd

# Get the questions and groundtruths from the dataframe
testset_df = pd.read_csv("testsets/customer_testset.csv")

questions = testset_df["Question"].values.tolist()
questions = [str(question) for question in questions]

groundtruths = testset_df["Answer"].values.tolist()
groundtruths = [str(ground_truth) for ground_truth in groundtruths]  

# Specify the eval metrics
eval_metrics = [answer_correctness, answer_relevancy, context_precision, context_recall, faithfulness]

# Run the Ragas evaluation and show the results
ragas_results, ragas_results_df = run_ragas_evaluation(chain, questions, groundtruths, eval_metrics)

# Write the results to disk
print("Writing customer_testset_ragas_results.csv")
ragas_results_df.to_csv("ragas/customer_testset_ragas_results.csv")

# Show the resutls
ragas_results

# Create synthetic testset and evaluate using Ragas

In [None]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

from ragas.testset.evolutions import simple, reasoning, multi_context
from ragas.testset.generator import TestsetGenerator

# Load the docs
documents = []

paths = ['docs/CHW_EOC_04-21-2017_ENG.pdf']

for path in paths:
    documents.extend(process_file(path=path))

# Chunk the docs 
# chunks = chunk_docs_nltk(documents, 1500, 150)
chunks = chunk_docs_semantic(documents)

# Set up the parameters for generating the testset
generator_llm = ChatOpenAI(model="gpt-3.5-turbo")
critic_llm = ChatOpenAI(model="gpt-4o-mini")
embeddings = OpenAIEmbeddings()
generator = TestsetGenerator.from_langchain(generator_llm, critic_llm, embeddings)
distributions = {simple: 0.5, multi_context: 0.4, reasoning: 0.1}

# Generate the testset and save to disk 
testset = generator.generate_with_langchain_docs(documents=chunks, test_size=50, distributions=distributions)
testset_df = testset.to_pandas()

print("Writing synthetic_testset.csv")
testset_df.to_csv("testsets/synthetic_testset.csv")
testset_df

In [None]:
# Evaluate the synthetic testset using Ragas

import pandas as pd

# Run the Ragas evaluation and show the results
# Get the questions and groundtruths from the dataframe
testset_df = pd.read_csv("testsets/synthetic_testset.csv")

questions = testset_df["question"].values.tolist()
questions = [str(question) for question in questions]

groundtruths = testset_df["ground_truth"].values.tolist()
groundtruths = [str(ground_truth) for ground_truth in groundtruths]  

# Specify the eval metrics
eval_metrics = [answer_correctness, answer_relevancy, context_precision, context_recall, faithfulness]

# Run the Ragas evaluation and show the results
ragas_results, ragas_results_df = run_ragas_evaluation(chain, questions, groundtruths, eval_metrics)

# Write the results to disk
print("Writing synthetic_testset_ragas_results.csv")
ragas_results_df.to_csv("ragas/synthetic_testset_ragas_results.csv")

# Show the resutls
ragas_results

# Create sample ReAct agent

In [21]:
from langchain import hub
from langchain.agents import AgentExecutor, create_react_agent, load_tools
from langchain_google_genai import ChatGoogleGenerativeAI
from google.generativeai.types import HarmCategory, HarmBlockThreshold

llm = ChatGoogleGenerativeAI(
            model="gemini-1.5-flash",
            temperature=0,
            safety_settings={
                    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
                    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
                    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
                    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
                },
            )

tools = load_tools(["arxiv"])

prompt = hub.pull("hwchase17/react")

agent = create_react_agent(llm, 
                           tools, 
                           prompt)

agent_executor = AgentExecutor(agent=agent, 
                               tools=tools, 
                               verbose=True)

agent_executor.invoke(
    {
        "input": "What's the paper 1605.08386 about?",
        }
)



[1m> Entering new AgentExecutor chain...[0m


I0000 00:00:1729533778.578926 2869469 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers


[32;1m[1;3mThought: I need to find the paper on arXiv.org.
Action: arxiv
Action Input: 1605.08386[0m[36;1m[1;3mPublished: 2016-05-26
Title: Heat-bath random walks with Markov bases
Authors: Caprice Stanley, Tobias Windisch
Summary: Graphs on lattice points are studied whose edges come from a finite set of
allowed moves of arbitrary length. We show that the diameter of these graphs on
fibers of a fixed integer matrix can be bounded from above by a constant. We
then study the mixing behaviour of heat-bath random walks on these graphs. We
also state explicit conditions on the set of moves so that the heat-bath random
walk, a generalization of the Glauber dynamics, is an expander in fixed
dimension.[0m[32;1m[1;3mThought: I now know the final answer
Final Answer: The paper "Heat-bath random walks with Markov bases" by Caprice Stanley and Tobias Windisch studies graphs on lattice points with edges defined by a finite set of allowed moves. It focuses on bounding the diameter of these 

{'input': "What's the paper 1605.08386 about?",
 'output': 'The paper "Heat-bath random walks with Markov bases" by Caprice Stanley and Tobias Windisch studies graphs on lattice points with edges defined by a finite set of allowed moves. It focuses on bounding the diameter of these graphs on fibers of a fixed integer matrix and analyzing the mixing behavior of heat-bath random walks on them. The paper also provides conditions for the heat-bath random walk to be an expander in fixed dimension. \n of heat-bath random walks on them. The paper also provides conditions for the heat-bath random walk to be an expander in fixed dimension.'}