In [12]:
#https://sbert.net/
#https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
#https://huggingface.co/openai/whisper-large-v3
#https://pypi.org/project/faiss-cpu/

In [13]:
import json

# Load the list from the JSON file
with open('speech_total.json', 'r') as file:
    speech_total = json.load(file)

len(speech_total)

828

In [17]:
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import faiss
import pickle
import torch

# Step 1: Load the embedding model and create embeddings
embed_model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings
embeddings = embed_model.encode(speech_total, show_progress_bar=True)
print(f"Embeddings shape: {embeddings.shape}")

# Step 2: Create and save FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings.astype('float32'))

# Save the index and texts
faiss.write_index(index, "faiss_index.bin")
with open('original_texts.pkl', 'wb') as f:
    pickle.dump(speech_total, f)

# Step 3: Load the language model and tokenizer
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Create a text generation pipeline
generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

def retrieve_context(query, k=3):
    query_embedding = embed_model.encode([query])[0]
    D, I = index.search(np.array([query_embedding]).astype('float32'), k)
    retrieved_texts = [speech_total[i] for i in I[0]]
    return " ".join(retrieved_texts)

def generate_response(query, max_new_tokens=50):
    context = retrieve_context(query)
    prompt = f"Context: {context}\n\nQuery: {query}\n\nResponse:"
    
    # Tokenize the prompt
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
    
    # Generate the response
    output = model.generate(
        input_ids, 
        max_new_tokens=max_new_tokens,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        temperature=0.7
    )
    
    # Decode the response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    
    # Extract only the generated part
    generated_text = response.split("Response:")[-1].strip()
    return generated_text

# Example usage
query = "What are Elon Musk's thoughts on AI?"
response = generate_response(query)
print(f"Query: {query}")
print(f"Response: {response}")

Batches: 100%|████████████████████████████████████████████████████████████████████████████| 26/26 [00:03<00:00,  7.19it/s]


Embeddings shape: (828, 384)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Query: What are Elon Musk's thoughts on AI?
Response: Well I‪m not sure about Elon, but I am sure he has some thoughts. Elon is going to say something. But I don‖m sure that Elon has a lot of ideas about how to best manage a society and how we


In [19]:
import random
from tqdm import tqdm

def evaluate_rag_system(num_samples=100):
    # Sample queries or create a test set
    test_queries = [
        "What does Elon Musk think about renewable energy?",
        "How does Elon Musk view the future of space exploration?",
        # Add more diverse queries here
    ]

    results = []
    for query in tqdm(random.sample(test_queries, min(num_samples, len(test_queries)))):
        response = generate_response(query)
        results.append({
            "query": query,
            "response": response,
            # You might add more metrics here, like response time, etc.
        })

    # Here you would typically add code to calculate metrics
    # such as relevance, coherence, factual accuracy, etc.
    # This often requires human evaluation or comparison against known ground truths

    return results

# Run evaluation
eval_results = evaluate_rag_system()
print(f"Evaluated {len(eval_results)} queries")
# Add code here to analyze and display results

  0%|                                                                                               | 0/2 [00:00<?, ?it/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
 50%|███████████████████████████████████████████▌                                           | 1/2 [00:09<00:09,  9.10s/it]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
100%|███████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:10<00:00,  5.41s/it]

Evaluated 2 queries





In [25]:
from IPython.display import clear_output

def interactive_qa():
    while True:
        query = input("Enter your question (or type 'exit' to quit): ")
        if query.lower() == 'exit':
            print("Thank you for using the Q&A system. Goodbye!")
            break
        
        if query.strip():
            print(f"\nQ: {query}")
            try:
                response = generate_response(query)
                print(f"\nA: {response}\n")
            except Exception as e:
                print(f"An error occurred: {str(e)}\n")
        else:
            print("Please enter a question.\n")
        
        # Optional: Uncomment the next line if you want to clear output after each Q&A
        # clear_output(wait=True)

print("The Q&A system is ready. Type your question and press Enter.")
interactive_qa()

The Q&A system is ready. Type your question and press Enter.


Enter your question (or type 'exit' to quit):  Can you explain what SpaceX is to me



Q: Can you explain what SpaceX is to me


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



A: Yeah. I mean, you‪ve been here for a while and I am a huge fan of SpaceX, and you talked about the need to get people into space and get them into the business of getting astronauts and then making them in the U



Enter your question (or type 'exit' to quit):  exit


Thank you for using the Q&A system. Goodbye!
