## Define a tool
We define a search tool with GoogleSerperAPI

In [None]:
import os
from langchain.utilities import GoogleSerperAPIWrapper
os.environ["SERPER_API_KEY"] = 'fbac5061b434c6b0e5f55968258b144209993ab2'
search = GoogleSerperAPIWrapper()

In [None]:
import guidance
llama = guidance.llms.LlamaCpp(
    model ="/home/karajan/Downloads/open-llama-3b-q8_0.bin",
    tokenizer = "openaccess-ai-collective/manticore-13b-chat-pyg",
    before_role = "<|",
    after_role = "|>",
    n_gpu_layers=300,
    n_threads=12,
    caching=False, )
guidance.llm = llama

In [None]:
from langchain.llms import LlamaCpp

model_type = "LlamaCpp"
model_path = "/home/karajan/Downloads/open-llama-3b-q8_0.bin"
model_n_ctx =1000
target_source_chunks = 4
n_gpu_layers = 500
use_mlock = 0
n_batch = os.environ.get('N_BATCH') if os.environ.get('N_BATCH') != None else 512
callbacks = []
qa_prompt = ""
llm = LlamaCpp(model_path=model_path, n_ctx=model_n_ctx, callbacks=callbacks, verbose=False,n_gpu_layers=n_gpu_layers, use_mlock=use_mlock,top_p=0.9, n_batch=n_batch)


In [None]:
from langchain.text_splitter import CharacterTextSplitter, TokenTextSplitter, RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings
import os
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
import re 
from colorama import Fore, Style

retriver=""
EMBEDDINGS_MAP = {
    **{name: HuggingFaceInstructEmbeddings for name in ["hkunlp/instructor-xl", "hkunlp/instructor-large"]},
    **{name: HuggingFaceEmbeddings for name in ["all-MiniLM-L6-v2", "sentence-t5-xxl"]}
}
EMBEDDINGS_MODEL = "all-MiniLM-L6-v2"

def clean_text(text):
    # Remove line breaks
    text = text.replace('\n', ' ')

    # Remove special characters
    text = re.sub(r'[^\w\s]', '', text)
    
    return text

def load_unstructured_document(document: str) -> list[Document]:
    with open(document, 'r') as file:
        text = file.read()
    title = os.path.basename(document)
    return [Document(page_content=text, metadata={"title": title})]

def split_documents(documents: list[Document], chunk_size: int = 250, chunk_overlap: int = 20) -> list[Document]:
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    return text_splitter.split_documents(documents)



def ingest_file(file_path):
        documents = load_unstructured_document(file_path)
        documents = split_documents(documents, chunk_size=250, chunk_overlap=100)
        EmbeddingsModel = EMBEDDINGS_MAP.get(EMBEDDINGS_MODEL)
        if EmbeddingsModel is None:
            raise ValueError(f"Invalid embeddings model: {EMBEDDINGS_MODEL}")
        
        model_kwargs = {"device": "cuda:0"} if EmbeddingsModel == HuggingFaceInstructEmbeddings else {}
        embedding = EmbeddingsModel(model_name=EMBEDDINGS_MODEL, model_kwargs=model_kwargs)
        vectordb = Chroma.from_documents(documents=documents, embedding=embedding)

        retriever = vectordb.as_retriever(search_kwargs={"k":4})

        print(file_path)
        print(retriever)

        return retriever, file_path


def checkQuestion(question: str, retriever, llm):
    global qa_prompt
    DOCUMENTS_SUMMARY_PROMPT_TEMPLATE = """###Instruction: You are an AI assistant, and you've been given a list of documents. These documents are presented in a continuous string, separated by spaces. Your task is to parse this string, identify individual documents, and create a summarized list describing the content of each document.
Documents:{context}
### Response:"""
    QUESTION_CHECK_PROMPT_TEMPLATE = """###Instruction: You are an AI assistant who uses document information to answer questions. Given the following pieces of context, determine if there are any elements related to the question in the context. To assist me in this task, you have access to a vector database context that contains various documents related to different topics.Don't forget you MUST answer with 'yes' or 'no'
 
Context:{context}
Question: Do you think it would be possible to infer an answer to ""{question}"" from the information in the context? You MUST include'yes' or 'no' in your answer.
### Response: """
    question = question.replace("Action Input: ", "")
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
    # Answer the question
    answer_data = qa({"query": question})

    # Check if 'answer' is in answer_data, if not print it in bold red
    if 'result' not in answer_data:
        print(f"\033[1;31m{answer_data}\033[0m")
        return "Issue in retrieving the answer."

    context_documents = answer_data['source_documents']

    # Combine all contexts into one
    context = " ".join([clean_text(doc.page_content) for doc in context_documents])
    documents_summary_prompt = DOCUMENTS_SUMMARY_PROMPT_TEMPLATE.format(context=context)

    summary = llm(documents_summary_prompt)

    # Formulate the prompt for the LLM
    question_check_prompt = QUESTION_CHECK_PROMPT_TEMPLATE.format(context=summary, question=question)

    print(Fore.GREEN + Style.BRIGHT + question_check_prompt + Style.RESET_ALL)
    
    return context


In [None]:

def can_answer(question: str, retriever, llm):
    global qa_prompt
    question = question.replace("Action Input: ", "")

    DOCUMENTS_SUMMARY_PROMPT_TEMPLATE = """###Instruction: You are an AI assistant, and you've been given a list of documents. These documents are presented in a continuous string, separated by spaces. Your task is to parse this string, identify individual documents, and create a summarized list describing the content of each document.
Documents:{context}
### Response:"""
    QUESTION_CHECK_PROMPT_TEMPLATE = """###Instruction: You are an AI assistant who uses document information to answer questions. Given the following pieces of context, determine if there are any elements related to the question in the context. To assist me in this task, you have access to a vector database context that contains various documents related to different topics.Don't forget you MUST answer with 'yes' or 'no'
 
Context:{context}
Question: Do you think it would be possible to infer an answer to ""{question}"" from the information in the context? You MUST include'yes' or 'no' in your answer.
### Response:
"""
    QUESTION_QA_PROMPT_TEMPLATE = """### Human: You are an helpful assistant that tries to answer questions concisely and precisely. Your answer must ONLY be based on the context information provided. Inclue 'yes' in your answer if it's positive.
    Context:{context}
    Question: {question}
    ### Assistant:
"""
    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True)
    # Answer the question
    answer_data = qa({"query": question})

    # Check if 'answer' is in answer_data, if not print it in bold red
    if 'result' not in answer_data:
        print(f"\033[1;31m{answer_data}\033[0m")
        return "Issue in retrieving the answer."

    answer = answer_data['result']
    context_documents = answer_data['source_documents']

    # Combine all contexts into one
    context = " ".join([clean_text(doc.page_content) for doc in context_documents])
    # Formulate the prompt for the LLM
    documents_summary_prompt = DOCUMENTS_SUMMARY_PROMPT_TEMPLATE.format(context=context)
    summary = llm(documents_summary_prompt)
    
    question_check_prompt = QUESTION_CHECK_PROMPT_TEMPLATE.format(context=summary, question=question)
    qa_prompt = QUESTION_QA_PROMPT_TEMPLATE.format(context=summary, question=question)

    print(Fore.GREEN + Style.BRIGHT + question_check_prompt + Style.RESET_ALL)
    # Submit the prompt to the LLM directly
    answerable = llm(question_check_prompt)
    print(Fore.RED + Style.BRIGHT + context + Style.RESET_ALL)
    print(Fore.RED + Style.BRIGHT + answerable + Style.RESET_ALL)
    if "yes" in answerable.lower():
        return True
    else:
        return False

In [None]:
retriever, file_patch = ingest_file("/home/karajan/Documents/notion.txt")

In [None]:
print(checkQuestion("What's the wifi code", retriever, llm))

In [None]:
valid_answers = ['Action', 'Final Answer']
valid_tools = ['Google Search']


valid_answers = ['Action', 'Final Answer']
valid_tools = ["Check Question", "Google Search"]

dict_tools = {
    'Check Question': search,
    'Google Search': search
}

prompt_template = """
{{#system~}}
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
Answer the following questions as best you can. You have access to the following tools:
Google Search: A wrapper around Google Search. Useful for when you need to answer questions about current events. The input is the question to search relevant information.
{{~/system}}

{{#user~}}
Question: {{question}}
{{~/user}}

{{#assistant~}}
Thought: Let's first check our database.
Action: Check Question
Action Input: {{question}}
{{~/assistant}}

{{#user~}}
Here are the relevant documents from our database:{{search question}}
{{~/user}}address?

{{#assistant~}}
Observation: Based on the documents, I think I can reach a conclusion.
{{#if (can_answer)}} 
Thought: I believe I can answer the question based on the information contained in the returned documents.
Final Answer: {{gen 'answer' temperature=0.7 max_tokens=500}}
{{else}}
Thought: I don't think I can answer the question based on the information contained in the returned documents.
Final Answer: I'm sorry, but I don't have sufficient information to provide an answer to this question.
{{/if}}
{{~/assistant}}

"""

question="What's the wifi network code?"

def searchQA(t):    
    return checkQuestion(question, retriever, llm)

prompt = guidance(prompt_template)
result = prompt(question=question, search=searchQA, can_answer=can_answer(question, retriever, llm),valid_answers=valid_answers, valid_tools=valid_tools)

## Let's test

In [None]:
guidance.llm.cache.clear()
print('Done.')

### Define prompt templates

In [None]:
question="What is the code of the wifi?"

### Our agent with Guidance

In [None]:
guidance.llm.cache.clear()
print('Done.')