In [1]:
# Utilizando o -q para apareccer menos informações
#pip install langchain
#pip install langdetect
#pip install tiktoken
#pip install gradio
#pip install pypdf
#pip install openai
#pip install chromadb
#pip install pypdf -q

In [195]:
# from Bibliotecas ou Pacotes . Módulo import Classe 

# Importando as classes que usaremos

import os                                                                 # Biblioteca para manipular o SO
from dotenv import load_dotenv, find_dotenv                               # Carregar arquivo .env
from langchain.document_loaders import PyPDFLoader                        # Carregar o PDF
from langchain.text_splitter import RecursiveCharacterTextSplitter        # Separar as palavras
from langchain.vectorstores import Chroma                                 # Banco de Dados de Vetores
from langchain.embeddings.openai import OpenAIEmbeddings                  # Embedding das palavras
from langchain.chains import RetrievalQA                                  # Perguntas e Respostas
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI                              # Modelo utilizado
import gradio as gr                                                       # Criar o chatbot
import random
from langchain.memory import ChatMessageHistory

In [118]:
load_dotenv(find_dotenv(), override=True)

True

In [197]:
history = ChatMessageHistory()

history.add_user_message("hi!")

history.add_ai_message("whats up?")

history

ChatMessageHistory(messages=[HumanMessage(content='hi!'), AIMessage(content='whats up?')])

### Backend

In [208]:
class PerguntasRespostas():
    
    def __init__(self, arquivo=None, documento=None, text_splitter=None, all_qa=None):
        self.arquivo = arquivo
        self.documento = documento
        self.text_splitter = text_splitter
        self.chunk_size = 1000
        self.chunk_overlap = 0
        #self.all_split = all_split
        #self.all_embedding = all_embedding
        self.all_qa = all_qa
        
    def chunk(self, documento):
        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=self.chunk_size,chunk_overlap=self.chunk_overlap)
        split = self.text_splitter.split_documents(documento) 
        return split

    def embeddings(self, split):
        embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')  
        vector_store = Chroma.from_documents(split, embeddings)
        return vector_store
    
    def qa(self, vector_store):
        llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0.1)
        retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3}) 
        chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever,return_source_documents=True) 
        return chain
            
    def carregar(self, arquivo):
        try:
            try:
                loader = PyPDFLoader(arquivo)
                documento = loader.load()
            except Exception as e:
                print('Erro Carregamento')
                print(e)
            
            #Chunk
            try:
                self.all_split = self.chunk(documento)
            except Exception as e:
                print('Erro split')
                print(e)
            
            #Embeddings
            try:
                self.all_embedding = self.embeddings(self.all_split)
            except Exception as e:
                print('Erro embedding')
                print(e)
            
            #QA
            try:
                self.all_qa = self.qa(self.all_embedding)
            except Exception as e:
                print('Erro qa')
                print(e)

            nome_do_arquivo = os.path.basename(arquivo)    
            return nome_do_arquivo
                
        except Exception as e:
            print(e)
        
    def resposta(self, query):
        if self.all_qa is not None:
            result = self.all_qa(query)
            result = result['result']
        else:
            result = random.choice(["Faca o upload de um pdf", "Por favor, coloque um arquivo", "Coloque um arquivo"])
            
        return result

In [209]:
perguntasrespostas = PerguntasRespostas()

In [210]:
perguntasrespostas.carregar('unicapagina.pdf')

'unicapagina.pdf'

In [211]:
perguntasrespostas.resposta('Resuma do que trata o pdf')

'O PDF trata da defesa das cidades de Boulogne e Calais durante a Segunda Guerra Mundial. Descreve a resistência das tropas britânicas e francesas contra os ataques alemães, destacando a coragem e determinação dos soldados. Apesar dos esforços, as cidades acabaram sendo tomadas pelos alemães após intensos combates.'

### Frontend

In [213]:
with gr.Blocks() as demo:
    
    with gr.Row():
        input_file = gr.File(label='Suporte para PDF', file_types=['.pdf'], scale=2)
        textbox_files = gr.Textbox(label='Arquivo Carregado', scale=2)
        input_button = gr.Button(value='Carregar Documento')
        
    with gr.Column():
        chatbot = gr.Chatbot(label='Chat')
        
    with gr.Row():
        prompt_button = gr.Textbox(label='Mensagem', scale=10)
        
        with gr.Column():
            submit_button = gr.Button(value='Enviar')
            clear_button = gr.ClearButton(value='Apagar Historico', components=[prompt])
        
    input_button.click(perguntasrespostas.carregar,inputs=[input_file],outputs=[textbox_files])
    submit_button.click(perguntasrespostas.resposta,inputs=[prompt_button], outputs=[chatbot])
    prompt_button.submit(perguntasrespostas.resposta, inputs=[prompt_button], outputs=[chatbot])
    
    
demo.launch()

Running on local URL:  http://127.0.0.1:7938

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "C:\Users\gabriel.bezerra\AppData\Local\anaconda3\Lib\site-packages\gradio\queueing.py", line 455, in call_prediction
    output = await route_utils.call_process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\gabriel.bezerra\AppData\Local\anaconda3\Lib\site-packages\gradio\route_utils.py", line 232, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\gabriel.bezerra\AppData\Local\anaconda3\Lib\site-packages\gradio\blocks.py", line 1542, in process_api
    data = self.postprocess_data(fn_index, result["prediction"], state)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\gabriel.bezerra\AppData\Local\anaconda3\Lib\site-packages\gradio\blocks.py", line 1427, in postprocess_data
    prediction_value = block.postprocess(prediction_value)
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "