In [1]:
import os
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"

In [2]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from haystack.document_stores import InMemoryDocumentStore
from haystack.nodes import DensePassageRetriever, FARMReader
from haystack.pipelines import ExtractiveQAPipeline
from haystack.schema import Document  # Corrigir import do Document

In [3]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

# Forçar o uso da CPU, independentemente de MPS estar disponível
device = torch.device("cpu")

In [4]:
# 1. Criar um armazenamento de documentos
document_store = InMemoryDocumentStore()

In [5]:
# 2. Indexar documentos (exemplo simples)
documents = [
    {"content": "A linguagem Python é popular para machine learning."},
    {"content": "O framework Haystack é usado para criar pipelines de NLP."},
    {"content": "GPT é um modelo de linguagem desenvolvido pela OpenAI."},
]

In [6]:
# Adiciona documentos ao armazenamento
document_store.write_documents(documents)

In [7]:
# 3. Configurar um recuperador DPR (Dense Passage Retriever)
retriever = DensePassageRetriever(
    document_store=document_store,
    query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
    passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base"
)



In [8]:
# 4. Atualizar o armazenamento de documentos com embeddings do retriever
document_store.update_embeddings(retriever)

Documents Processed: 10000 docs [00:00, 21201.84 docs/s]     


In [9]:
# 5. Criar um pipeline de perguntas e respostas
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2")
qa_pipeline = ExtractiveQAPipeline(reader=reader, retriever=retriever)

In [10]:
# Carregar o modelo e o tokenizer do GPT-2 (ou GPT-Neo para um modelo maior)
model_name = "gpt2"  # ou "EleutherAI/gpt-neo-125M" para GPT-Neo
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name).to(device)

In [11]:
# Função para geração de texto usando GPT-2 local
def generate_response(context, user_query, max_length=100):
    prompt = f"User question: {user_query}\n\nContext: {context}\n\nAI response:"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True).to(device)

    # Geração de texto usando o modelo local
    with torch.no_grad():
        outputs = model.generate(
            inputs["input_ids"], 
            max_length=max_length, 
            num_return_sequences=1,
            pad_token_id=tokenizer.eos_token_id
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

In [12]:
# Função para recuperação de documentos
def retrieve_documents(query):
    results = qa_pipeline.run(query=query, params={"Retriever": {"top_k": 3}, "Reader": {"top_k": 1}})
    return results['answers'][0].answer

# Função para recuperação de documentos
#def retrieve_documents(query):
#    results = qa_pipeline.run(query=query, top_k_retriever=3, top_k_reader=1)
#    return results['answers'][0].answer

In [13]:
# Função principal do chatbot usando RAG com LLM local
def chatbot(user_query):
    # Passo 1: Recuperar contexto relevante
    retrieved_context = retrieve_documents(user_query)
    
    # Passo 2: Gerar resposta com base no contexto recuperado
    response = generate_response(retrieved_context, user_query)
    
    return response

In [14]:
# Exemplo de interação com o chatbot
while True:
    user_input = input("Pergunta: ")
    if user_input.lower() in ['sair', 'exit']:
        break
    answer = chatbot(user_input)
    print(f"Chatbot: {answer}")

  indices = torch.tril_indices(max_seq_len, max_seq_len, offset=-1, device=start_end_matrix.device)
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  2.29 Batches/s]


Chatbot: User question: oi

Context: Python é popular para machine learning.

AI response:

The following is a list of questions that I have been asked by people who have been asked this question.

Question 1: What is the most important thing you want to learn about AI?

Answer: I want to learn about AI.

Question 2: What is the most important thing you want to learn about AI?

Answer: I want to learn about


Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  3.98 Batches/s]


Chatbot: User question: qual linguagem é mais popular?

Context: Python

AI response:

I'm not sure if you can say that I'm a linguagem é mais popular. I'm not sure if you can say that I'm a linguagem é mais popular.

I'm not sure if you can say that I'm a linguagem é mais popular.

I'm not sure if you can say that I'm a
