# Chatbot com LangChain
Um simples chatbot usando LangChain capaz de recuperar informações de documentos (.pdf) e usar memória da conversa.

## Como utilizar
Para utilizar o chatbot basta rodar todas as células. A última célula contém o chat.

### Comentários
- Foram utilizados modelos pequenos, então algumas respostas podem ser "estranhas".
- Em razão das limitações de hardware, cada iteração do chat pode demorar cerca de 1 minuto para responder no Google Colab.


In [None]:
!pip install langchain tiktoken chromadb==0.4.15 pypdf huggingface_hub transformers sentence_transformers ctransformers ctransformers[cuda]

In [None]:
!git clone https://github.com/FelipeVein/solinftec-teste-llm

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.memory import ConversationSummaryMemory
from langchain.llms import CTransformers
from langchain.chains import ConversationalRetrievalChain
import glob
import os

In [None]:
# load data and split in chunks
data_path = '/content/solinftec-teste-llm/data'
pdf_files = glob.glob(os.path.join(data_path, "*.pdf"))
documents = []
for pdf_file in pdf_files:
  loader = PyPDFLoader(pdf_file)
  documents.extend(loader.load())
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=0)
all_splits = text_splitter.split_documents(documents)

In [None]:
# load embedding model
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': False}
embedding_model = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [None]:
# transform data chunks into vector embeddings and store
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding_model)

In [None]:
# load large language model
llm = CTransformers(model="TheBloke/Llama-2-7B-Chat-GGML", config={"gpu_layers":50, "context_length":4096})

In [None]:
# memory component for chatbot
memory = ConversationSummaryMemory(
    llm=llm, memory_key="chat_history", return_messages=True
)
retriever = vectorstore.as_retriever()

In [None]:
class Chatbot:
  def __init__(self, llm, retriever, memory):
    self.chatbot = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory)
  def run(self):
    while(True):
      question = input("You:\n")
      chat_response = self.chatbot(question)
      print("Bot:" + chat_response['answer'])

In [None]:
chatbot = Chatbot(llm, retriever, memory)

In [None]:
chatbot.run()