# This is a basic test from LangChain framework

## Loading variables

In [None]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

## Implementing the Retrieval-augmented generation ([RAG](https://python.langchain.com/docs/use_cases/question_answering/))

![Retrieval-augmented generation](RAG.png)

### Document Loading

In [None]:
#from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import WhatsAppChatLoader

In [None]:
#directory = "/Users/bernal/Documents/ext/GitRepos"
#loader = DirectoryLoader(directory, glob="**/README.md")
path = "/Users/bernal/Documents/ext/GitRepos/generative-ai-knowledge/whatsapp/McFlanagan.txt"
loader = WhatsAppChatLoader(path)

### Splitting (optional)

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 0)
splits = text_splitter.split_documents(loader.load())
#splits = loader.load()

### Storage (Embed and store splits)

#### Embeddings cases

##### Embeddings: OpenAI case

In [None]:
from langchain.embeddings import OpenAIEmbeddings
emb = OpenAIEmbeddings()

##### Embeddings: HuggingFaceBgeEmbeddings case

In [None]:
from langchain.embeddings import HuggingFaceBgeEmbeddings

#emb = OpenAIEmbeddings()
#emb = GPT4AllEmbeddings()
model_name = "BAAI/bge-small-en"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
emb = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

##### Embeddings: GPT4All case

In [None]:
from langchain.embeddings import GPT4AllEmbeddings
emb = GPT4AllEmbeddings()

#### Embeddings Storage

In [None]:
from langchain.vectorstores import Chroma
vectorstore = Chroma.from_documents(documents=splits,embedding=emb)

#### Retrieve fast test

In [None]:
#question = "bla bla bla"
#docs = vectorstore.similarity_search_with_relevance_scores(question, k=8)

### Retrieval

In [None]:
retriever = vectorstore.as_retriever()

#### retrieve fast check from retriever

In [None]:
#question = "bla bla bla?"
#docs = retriever.invoke(question)

### Output (Generate response)

##### LLM: OpenAI model case

In [None]:
from langchain.chat_models import ChatOpenAI
lmm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.7)

##### LLM: LLamaCpp case (Local Execution)

In [None]:
# llama-cpp-python
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

def get_llm():
    #return ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

    n_gpu_layers = 0  # Metal set to 1 is enough.
    n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
    
    # Make sure the model path is correct for your system!
    llm = LlamaCpp(
        model_path="/Users/bernal/Documents/ext/GitRepos/genrative-ai-knowledge/notebooks/llama2/llama2-13b-tiefighter.Q3_K_S.gguf",
        n_gpu_layers=n_gpu_layers,
        n_batch=n_batch,
        n_ctx=4096,
        f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
        callback_manager=callback_manager,
        verbose=True,
    )
    return llm

llm = get_llm()

##### LMM: chat4all (local execution. No GPU)

In [None]:
# gpt4all
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

def get_llm():
    local_path = (
        "/Users/bernal/Documents/ext/GitRepos/generative-ai-knowledge/notebooks/gpt4all/mistral-7b-openorca.Q4_0.gguf"  # replace with your desired local file path
    )

    # Callbacks support token-wise streaming
    callbacks = [StreamingStdOutCallbackHandler()]
    
    # Verbose is required to pass to the callback manager
    llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)
    return llm

llm = get_llm()

##### Simple LMM test

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=llm)

#question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
question = "Tell in just a sentence what 'active metadata' is in the context of data governance"
llm_chain.run(question)

#### Generating execution in chat mode

We will use a conversational agent

In [None]:
# Creating tools
from langchain.agents.agent_toolkits import create_retriever_tool

retrieval_tool = create_retriever_tool(
    retriever, 
    "search-readme-files",
    "Searches and returns documents regarding the readme-files."
)
# ... more tools ...
tools = [retrieval_tool]

In [None]:
# System message for the chat
from langchain.schema.messages import SystemMessage

system_message = SystemMessage(
    content=(
        "Do your best to answer the questions. "
        "Feel free to use any tools available to look up "
        "relevant information, only if necessary"
    )
)

In [None]:
# Set memory key variable
memory_key='chat_history'

###### Build the agent. Simple way

In [None]:
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent

agent_executor = create_conversational_retrieval_agent(llm, tools, memory_key=memory_key, system_message=system_message, verbose=True)

###### Build the agent. Detailed way

In [None]:
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import AgentTokenBufferMemory
from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
from langchain.prompts import MessagesPlaceholder
from langchain.agents import AgentExecutor

memory = AgentTokenBufferMemory(memory_key=memory_key, llm=llm, max_token_limit=2000)

prompt = OpenAIFunctionsAgent.create_prompt(
    system_message=system_message,
    extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)],
)

agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools,
    memory=memory,
    verbose=True,
    return_intermediate_steps=True,
)

##### Execute chat input

In [None]:
# Chat line
result = agent_executor({"input":"Hi, I am Jorge"})