In [5]:
import os                                                                
from dotenv import load_dotenv, find_dotenv                               
from langchain.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter        
from langchain.vectorstores import Chroma                                 
from langchain.embeddings.openai import OpenAIEmbeddings                  
from langchain.chains import RetrievalQA                                 
from langchain.chat_models import ChatOpenAI      
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.schema import SystemMessage, HumanMessage, AIMessage
import gradio as gr      

In [6]:
load_dotenv(find_dotenv(), override=True)

True

### Tentativas

In [7]:
class PerguntasRespostas():
    
    def __init__(self, arquivo=None, documento=None, text_splitter=None, all_qa=None):
        self.arquivo = arquivo
        self.documento = documento
        self.text_splitter = text_splitter
        self.all_qa = all_qa
        self.chunk_size = 1000
        self.chunk_overlap = 0
        self.chat_history = []
        
    def chunk(self, documento):
        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=self.chunk_size,chunk_overlap=self.chunk_overlap)
        split = self.text_splitter.split_documents(documento) 
        return split

    def embeddings(self, split):
        embeddings = SentenceTransformerEmbeddings(model_name='all-mpnet-base-v2')
        vector_store = Chroma.from_documents(split, embeddings)
        return vector_store
    
    def qa(self, vector_store):
        llm = ChatOpenAI(model='gpt-3.5-turbo', temperature=0.1)
        retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3}) 
        chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever,return_source_documents=True) 
        return chain
            
    def carregar(self, arquivo):
        try:
            
            file_name = os.path.basename(arquivo)
            base, extensao = os.path.splitext(file_name)
            
            try:
                if extensao == '.pdf':
                    loader = PyPDFLoader(arquivo)
                elif extensao == '.txt':
                    loader = TextLoader(arquivo)
                elif extensao == '.docx':
                    loader = Docx2txtLoader(arquivo)
                else:
                    print('Coloque um Arquivo') 

                documento = loader.load()
        
            except Exception as e:
                print('Erro Carregamento')
                print(e)
            
            #Chunk
            try:
                self.all_split = self.chunk(documento)
            except Exception as e:
                print('Erro split')
                print(e)
            
            #Embeddings
            try:
                self.all_embedding = self.embeddings(self.all_split)
            except Exception as e:
                print('Erro embedding')
                print(e)
            
            #QA
            try:
                self.all_qa = self.qa(self.all_embedding)
            except Exception as e:
                print('Erro qa')
                print(e)

            return base
                
        except Exception as e:
            print(e)
        
    def resposta(self, query):
        if self.all_qa is not None:
            result = self.all_qa(query)
            result = result['result']
        else:
            chat = ChatOpenAI(model='gpt-3.5-turbo', temperature=0.1)
            prompt_system = '''
            You are a chatbot.
            Answer the question that was asked.
            After responding, suggest that the user upload a PDF to the chatbot.
            '''
            result = chat([SystemMessage(content=prompt_system),HumanMessage(content=query)]).content
            
        self.chat_history.append((query, result))
        return self.chat_history

In [10]:
# Gradio

perguntas = PerguntasRespostas()

with gr.Blocks() as demo:
    
    with gr.Row():
        inp_file = gr.File(label='Suporte apenas para PDF',file_types=['.pdf', '.txt', '.docx'], interactive=True, scale=2) 
        out_file = gr.Textbox(label='Arquivo upado', scale=2) 
        inp_button = gr.Button(value='Upar documento', scale=1)
        
    with gr.Column():
        out = gr.Chatbot(show_label=False, 
                         show_copy_button=True, 
                         likeable=True,
                         layout="panel")
        
    with gr.Row(): 
        inp = gr.Textbox(label='Prompt') 
        inp.submit(perguntas.resposta,[inp],[out])
        
        with gr.Column(scale=1):
            submit_button = gr.Button(value='Enviar')
            clear_button = gr.ClearButton(components=[inp, out], value='Apagar Histórico')
            
    with gr.Row():
        
        gr.Slider(
        label="Novos Tokens",
        value=1000,
        minimum=0,
        maximum=2000,
        step=100,
        interactive=True,
        visible=True,
        info="Número máximo de novos tokens")
        
        gr.Slider(
        label='Temperatura',
        value=0.1,
        minimum=0.0,
        maximum=2.0,
        step=0.1,
        interactive=True,
        visible=True,
        info='Valores mais altos produzem resultados mais criativos'
        )
        
        
    inp_button.click(perguntas.carregar,inputs=[inp_file], outputs=[out_file])
    submit_button.click(perguntas.resposta, inputs=[inp],outputs=[out])

        
demo.launch()

Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.


