In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np



In [4]:
# Load a language model (you can use GPT-3.5, GPT-4, or other causal LLMs)
llm_model_name = "EleutherAI/gpt-neo-125M"  # A small model for demonstration
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
llm = AutoModelForCausalLM.from_pretrained(llm_model_name)

# Load a Sentence Transformer for encoding text into embeddings
sentence_model = SentenceTransformer("paraphrase-MiniLM-L6-v2")

# Sample corpus of documents
documents = [
    "The Eiffel Tower is located in Paris, France.",
    "The Statue of Liberty is in New York City, USA.",
    "Mount Everest is the highest mountain in the world.",
    "Python is a popular programming language for data science.",
    "Bitcoin is a type of cryptocurrency."
]

# Encode the documents into embeddings
document_embeddings = sentence_model.encode(documents)

# Create a FAISS index for vector-based retrieval
index = faiss.IndexFlatL2(document_embeddings.shape[1])
index.add(document_embeddings)

In [11]:
# Function to retrieve relevant documents based on a query
def retrieve_relevant_documents(query, top_k=3):
    # Encode the query into an embedding
    query_embedding = sentence_model.encode([query])
    # Search for the most similar documents
    distances, indices = index.search(query_embedding, top_k)
    
    # Get the index of the smallest distance
    least_dist_index = np.argmin(distances[0])
    
    # Return the corresponding documents
    #relevant_documents = [documents[idx] for idx in indices[0]]
    relevant_documents = [documents[least_dist_index]]
    return relevant_documents

# Function to generate a response using the LLM with retrieved information
def generate_response(query):
    # Retrieve relevant documents based on the query
    relevant_documents = retrieve_relevant_documents(query)
    # Create a prompt for the LLM using the retrieved information
    prompt = f"\n\nAnswer the following question based on the above information:\n{query}\n"
    # Generate a response from the LLM
    inputs = tokenizer.encode(prompt, return_tensors="pt")
    outputs = llm.generate(inputs, max_length=100, num_return_sequences=1)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

In [12]:
# Example query
query = "Where is the Eiffel Tower located?"
response = generate_response(query)

print("Query:", query)
print("Response:", response)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Query: Where is the Eiffel Tower located?
Response: 

Answer the following question based on the above information:
Where is the Eiffel Tower located?

Answer the following question based on the above information:

Where is the Eiffel Tower located?

Answer the following question based on the above information:

Where is the Eiffel Tower located?

Answer the following question based on the above information:

Where is the Eiffel Tower located?

Answer the following question based on the


In [18]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Load the language model and tokenizer
llm_model_name = "EleutherAI/gpt-neo-125M"
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)

# Ensure the tokenizer has a pad token
if tokenizer.pad_token is None:
    if tokenizer.eos_token is not None:
        tokenizer.pad_token = tokenizer.eos_token
    else:
        tokenizer.add_special_tokens({'pad_token': '[PAD]'})  # Add a new pad token

# Reload the model to ensure it recognizes the pad token
llm = AutoModelForCausalLM.from_pretrained(llm_model_name)

# Load a Sentence Transformer for encoding text into embeddings
sentence_model = SentenceTransformer("paraphrase-MiniLM-L6-v2")

# Sample corpus of documents
documents = [
    "The Eiffel Tower is located in Paris, France.",
    "The Statue of Liberty is in New York City, USA.",
    "Mount Everest is the highest mountain in the world.",
    "Python is a popular programming language for data science.",
    "Bitcoin is a type of cryptocurrency."
]

# Encode the documents into embeddings
document_embeddings = sentence_model.encode(documents)

# Create a FAISS index for vector-based retrieval
index = faiss.IndexFlatL2(document_embeddings.shape[1])
index.add(document_embeddings)

# Function to retrieve relevant documents based on a query
def retrieve_relevant_documents(query, top_k=3):
    # Encode the query into an embedding
    query_embedding = sentence_model.encode([query])
    # Search for the most similar documents
    distances, indices = index.search(query_embedding, top_k)
    # Return the corresponding documents
    # Get the index of the smallest distance
    least_dist_index = np.argmin(distances[0])
    
    #relevant_documents = [documents[idx] for idx in indices[0]]
    relevant_documents = [documents[least_dist_index]]
    return relevant_documents

# Function to generate a response using the LLM with retrieved information
def generate_response(query):
    # Retrieve relevant documents based on the query
    relevant_documents = retrieve_relevant_documents(query)
    # Create a simplified context with only the most relevant information
    relevant_text = "\n".join(relevant_documents)

    # Create a straightforward prompt asking for a specific answer
    prompt = f"Relevant information:\n{relevant_text}"

    # Tokenize with proper padding and attention mask
    inputs = tokenizer.encode_plus(
        prompt,
        return_tensors="pt",
        padding="longest",  # Ensure proper padding
        truncation=True,
        return_attention_mask=True
    )

    # Generate the response with appropriate generation parameters
    outputs = llm.generate(
        inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=30,
        num_return_sequences=1,
        pad_token_id=tokenizer.pad_token_id,  # Correctly pass the pad token
        temperature=0.7,
        top_k=3,
        top_p=0.95
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
    return response




In [19]:
# Example query
query = "Where is the Eiffel Tower located?"
response = generate_response(query)

print("Query:", query)
print("Response:", response)

Query: Where is the Eiffel Tower located?
Response: Relevant information:
The Eiffel Tower is located in Paris, France. The tower is a landmark in the city of Paris, France.
