In [1]:
# List of sites used for models
#https://sbert.net/
#https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
#https://huggingface.co/openai/whisper-large-v3
#https://pypi.org/project/faiss-cpu/

#Text generation options: 
#https://huggingface.co/models?pipeline_tag=text-generation

In [2]:
import json

# Load the list from the JSON file
with open('speech_total.json', 'r') as file:
    speech_total = json.load(file)

len(speech_total)

2227

In [3]:
# Setting up Groq API
import os
from dotenv import load_dotenv
from groq import Groq

load_dotenv()
client = Groq(api_key=os.getenv("GROQ_API"))

def get_groq_response(query):
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": query,
            }
        ],
        model="mixtral-8x7b-32768",
        temperature=0.5,
        max_tokens=1024,
    )
    return chat_completion.choices[0].message.content

In [4]:
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import GPT2LMHeadModel, GPT2Tokenizer, TextDataset, DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments
import faiss
import pickle
import torch

# Step 1: Load the embedding model and create embeddings
embed_model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = embed_model.encode(speech_total, show_progress_bar=True)
print(f"Embeddings shape: {embeddings.shape}")

# Step 2: Create and save FAISS index
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings.astype('float32'))
faiss.write_index(index, "faiss_index.bin")
with open('original_texts.pkl', 'wb') as f:
    pickle.dump(speech_total, f)

# Step 3: Prepare data for fine-tuning
def prepare_data_for_finetuning(speeches):
    with open('elon_speeches.txt', 'w', encoding='utf-8') as f:
        for speech in speeches:
            f.write(speech + '\n\n')

prepare_data_for_finetuning(speech_total)

# Step 4: Load the pre-trained model and tokenizer
model_name = "distilgpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Set the pad token
tokenizer.pad_token = tokenizer.eos_token

# Step 5: Prepare the dataset
dataset = TextDataset(
    tokenizer=tokenizer,
    file_path="elon_speeches.txt",
    block_size=128
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# Step 6: Set up training arguments
training_args = TrainingArguments(
    output_dir="./elon_gpt2",
    overwrite_output_dir=True,
    num_train_epochs=3, # Can increase epoch number to improve training
    per_device_train_batch_size=4,
    save_steps=10_000,
    save_total_limit=2,
)

# Step 7: Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=dataset,
)

# Step 8: Train the model
trainer.train()

# Step 9: Save the fine-tuned model
trainer.save_model()

# Step 10: Load the fine-tuned model for inference
fine_tuned_model = GPT2LMHeadModel.from_pretrained("./elon_gpt2")
fine_tuned_model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# Function to retrieve context
def retrieve_context(query, k=3):
    query_embedding = embed_model.encode([query])[0]
    D, I = index.search(np.array([query_embedding]).astype('float32'), k)
    retrieved_texts = [speech_total[i] for i in I[0]]
    return " ".join(retrieved_texts)

# Updated elonify function
def elonify(content):
    prompt = f"Elon Musk's response to '{content}': "
    inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True)
    input_ids = inputs['input_ids'].to(fine_tuned_model.device)
    attention_mask = inputs['attention_mask'].to(fine_tuned_model.device)
    
    try:
        output = fine_tuned_model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_new_tokens=200,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            top_k=50,  # Add top_k sampling
            top_p=0.95,  # Add top_p (nucleus) sampling
            repetition_penalty=1.2,  # Add repetition penalty
            bad_words_ids=[[tokenizer.unk_token_id]]  # Prevent generation of unknown tokens
        )
    except RuntimeError as e:
        print(f"Error during generation: {e}")
        return "I apologize, but I couldn't generate a response at this time."
    
    return tokenizer.decode(output[0], skip_special_tokens=True)[len(prompt):]


# Updated generate_response function
def generate_response(query, max_new_tokens=200):
    # Retrieve context from your existing data
    context = retrieve_context(query)
    
    # Combine query with retrieved context
    combined_input = f"Context: {context}\n\nQuery: {query}\n\nResponse:"
    
    # Use the elonify function to generate response
    response = elonify(combined_input)
    
    return response

# Example usage
query = "What are your thoughts on AI?"
response = generate_response(query)
print(f"Query: {query}")
print(f"Response: {response}")

  from tqdm.autonotebook import tqdm, trange
  Referenced from: '/Users/brandonamarasingam/anaconda3/envs/pytorch/lib/python3.11/site-packages/torchvision/image.so'
  warn(


Batches:   0%|          | 0/70 [00:00<?, ?it/s]

Embeddings shape: (2227, 384)




  0%|          | 0/1278 [00:00<?, ?it/s]

{'loss': 0.0, 'grad_norm': nan, 'learning_rate': 3.0438184663536777e-05, 'epoch': 1.17}
{'loss': 0.0, 'grad_norm': nan, 'learning_rate': 1.0876369327073553e-05, 'epoch': 2.35}
{'train_runtime': 1283.8796, 'train_samples_per_second': 3.979, 'train_steps_per_second': 0.995, 'train_loss': 0.0, 'epoch': 3.0}


RuntimeError: probability tensor contains either `inf`, `nan` or element < 0

In [None]:
from IPython.display import clear_output

def interactive_qa():
    while True:
        query = input("Enter your question (or type 'exit' to quit): ")
        if query.lower() == 'exit':
            print("Thank you for using ElonAI, goodbye!")
            break
        
        if query.strip():
            print(f"\nQ: {query}")
            try:
                response = generate_response(query)
                print(f"\nA: {response}\n")
            except Exception as e:
                print(f"An error occurred: {str(e)}\n")
        else:
            print("Please enter a question.\n")

print("Type your question and press Enter to speak with.")
interactive_qa()

Type your question and press Enter to speak with.


Enter your question (or type 'exit' to quit):  what is the nba



Q: what is the nba

A: !"!#!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A!B!C!D!E!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X!Y!Z![!\!]!^!_!`!a!b!c!d!e!f!g!h!i!j!k!l!m!n!o!p!q!r!s!t!u!v!w!x!y!z!{!|!}!~!�!�!�!�!�!�!



Enter your question (or type 'exit' to quit):  exit


Thank you for using ElonAI, goodbye!


In [None]:
# Testing and Improving RAG System

import random # Used for sampling
from tqdm import tqdm # Provides progress bars

def evaluate_rag_system(num_samples=100):
    # Sample queries or create a test set
    test_queries = [
        "What does Elon Musk think about renewable energy?",
        "How does Elon Musk view the future of space exploration?",
        # Add more diverse queries here
    ]

    results = []
    for query in tqdm(random.sample(test_queries, min(num_samples, len(test_queries)))):
        response = generate_response(query)
        results.append({
            "query": query,
            "response": response,
            # You might add more metrics here, like response time, etc.
        })

    # Here you would typically add code to calculate metrics
    # such as relevance, coherence, factual accuracy, etc.
    # This often requires human evaluation or comparison against known ground truths

    return results

# Run evaluation
eval_results = evaluate_rag_system()
print(f"Evaluated {len(eval_results)} queries")
# Add code here to analyze and display results