In [None]:
# Imports cell
import os
import openai
import numpy as np
import pandas as pd
from pathlib import Path
from pypdf import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from dotenv import load_dotenv

# Load API key from .env file
load_dotenv()

# Set OpenAI API key from environment variable
openai.api_key = os.getenv("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

# Verify API key is loaded
if not openai.api_key or openai.api_key == "your-openai-api-key":
    raise ValueError("Please set your OpenAI API key in the .env file")

In [None]:
def extract_text_from_pdf(pdf_path):
    """Extract text from a PDF file"""
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text

# Load the book
# pdf_path = "data/Eric-Jorgenson_The-Almanack-of-Naval-Ravikant_Final.pdf"  # Update with your actual file path
pdf_path = "../data/Eric-Jorgenson_The-Almanack-of-Naval-Ravikant_Final.pdf"

raw_text = extract_text_from_pdf(pdf_path)

# Preview the first 1000 characters
print(raw_text[:1000])

In [None]:
def preprocess_text(text):
    """Clean and normalize text"""
    # Remove excessive newlines and whitespace
    cleaned_text = " ".join(text.split())
    # Add more preprocessing as needed
    return cleaned_text

def chunk_text(text, chunk_size=1000, chunk_overlap=200):
    """Split text into manageable chunks"""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
    )
    chunks = text_splitter.split_text(text)
    return chunks

# Clean and chunk the text
cleaned_text = preprocess_text(raw_text)
text_chunks = chunk_text(cleaned_text)

# Display statistics
print(f"Total chunks created: {len(text_chunks)}")
print(f"Average chunk length: {sum(len(chunk) for chunk in text_chunks) / len(text_chunks)}")

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

def create_embeddings(chunks):
    """Create embeddings for text chunks using BAAI/bge-large-en model"""
    # Initialize the HuggingFace embeddings with BAAI/bge-large-en
    model_name = "BAAI/bge-large-en"
    model_kwargs = {'device': 'cpu'}  # Use 'cuda' if you have GPU
    encode_kwargs = {'normalize_embeddings': True}
    
    embeddings = HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs
    )
    
    # Create the vector store with FAISS
    vector_store = FAISS.from_texts(chunks, embeddings)
    return vector_store

# Create vector store
vector_store = create_embeddings(text_chunks)

# Optional: Show a message when embeddings are complete
print(f"Vector store created successfully with {len(text_chunks)} chunks using BAAI/bge-large-en model.")

In [None]:
# Save the vector store to disk for future use
import pickle

def save_vector_store(vector_store, file_path="naval_vector_store.pkl"):
    """Save vector store to disk"""
    with open(file_path, "wb") as f:
        pickle.dump(vector_store, f)

def load_vector_store(file_path="naval_vector_store.pkl"):
    """Load vector store from disk"""
    with open(file_path, "rb") as f:
        vector_store = pickle.load(f)
    return vector_store

# Save for future use
save_vector_store(vector_store)

In [None]:
def get_relevant_context(query, vector_store, k=5):
    """Retrieve the most relevant chunks for a query"""
    docs = vector_store.similarity_search(query, k=k)
    contexts = [doc.page_content for doc in docs]
    return contexts

# Test the retrieval system
test_query = "What does Naval say about fundamental delusion?"
relevant_contexts = get_relevant_context(test_query, vector_store)

# Display the first retrieved context
print(relevant_contexts[0])

In [None]:
relevant_contexts

In [None]:
def rank_contexts(contexts, query):
    """Rank contexts by relevance to query"""
    # This could be enhanced with a more sophisticated ranking algorithm
    return contexts

def format_contexts(contexts):
    """Format the contexts for inclusion in the prompt"""
    formatted_context = "\n\n---\n\n".join(contexts)
    return f"RELEVANT PASSAGES FROM 'THE ALMANACK OF NAVAL RAVIKANT':\n\n{formatted_context}"

In [None]:
def create_system_prompt():
    """Create the system prompt that defines the chatbot's behavior"""
    return """You are a conversational AI assistant who has thoroughly studied 'The Almanack of Naval Ravikant'. 
    
    Your purpose is to engage in conversations about Naval's philosophy, perspectives, and wisdom as presented in the book.
    
    Guidelines:
    1. Base your responses primarily on the provided context from the book.
    2. Maintain Naval's distinctive voice and communication style.
    3. Use Naval's actual quotes when available and appropriate.
    4. Be honest about the limitations of your knowledge. If a user asks about something not covered in the book, acknowledge this.
    5. Don't make up quotes or attribute ideas to Naval that aren't supported by the book.
    6. Keep responses concise and clear, similar to Naval's communication style.
    
    Remember, your goal is to accurately represent Naval's ideas as presented in 'The Almanack of Naval Ravikant', not to provide general wisdom or advice."""

def create_prompt(query, contexts, chat_history):
    """Create a prompt for the GPT-4 model"""
    system_prompt = create_system_prompt()
    formatted_context = format_contexts(contexts)
    
    # Format chat history
    formatted_history = ""
    if chat_history:
        formatted_history = "PREVIOUS CONVERSATION:\n"
        for q, a in chat_history:
            formatted_history += f"User: {q}\nNaval Bot: {a}\n\n"
    
    user_prompt = f"""QUERY: {query}

{formatted_context}

Based on the provided passages from Naval Ravikant's book, please respond to the query in Naval's voice and perspective."""
    
    return system_prompt, user_prompt

In [None]:
def get_gpt4_response(system_prompt, user_prompt):
    """Get a response from GPT-4 API"""
    response = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        temperature=0.7,
        max_tokens=800
    )
    return response.choices[0].message.content

5.3 Create Conversation Manager

In [None]:
class NavalChatbot:
    def __init__(self, vector_store):
        self.vector_store = vector_store
        self.chat_history = []
    
    def ask(self, query, k=5):
        """Process a user query and return a response"""
        # Get relevant contexts
        contexts = get_relevant_context(query, self.vector_store, k=k)
        
        # Create prompt
        system_prompt, user_prompt = create_prompt(query, contexts, self.chat_history)
        
        # Get response
        response = get_gpt4_response(system_prompt, user_prompt)
        
        # Update chat history
        self.chat_history.append((query, response))
        
        return response
    
    def reset_conversation(self):
        """Reset the conversation history"""
        self.chat_history = []

# Initialize chatbot
naval_bot = NavalChatbot(vector_store)

# Test the chatbot
response = naval_bot.ask("What does Naval believe about happiness?")
print(response)

In [None]:
def create_chat_ui():
    """Create an interactive chat UI using IPython widgets"""
    # Create the widgets
    output = widgets.Output(layout={'border': '1px solid #666', 'min_height': '400px', 'width': '100%'})
    input_box = widgets.Text(placeholder='Type your message here...', layout={'width': '80%'})
    send_button = widgets.Button(description='Send', layout={'width': '19%'})
    reset_button = widgets.Button(description='Reset Chat', layout={'width': '100%'})
    
    # Layout the widgets
    input_area = widgets.HBox([input_box, send_button])
    container = widgets.VBox([output, input_area, reset_button])
    
    # Define callback functions
    def send_message(_):
        query = input_box.value
        input_box.value = ''
        
        with output:
            # User message with higher contrast
            display(HTML(f"<div style='margin: 5px; padding: 5px; background-color: #4a4a4a; color: white; border-radius: 5px;'><b>You:</b> {query}</div>"))
            
            # Get response from bot
            response = naval_bot.ask(query)
            
            # Bot message with higher contrast
            display(HTML(f"<div style='margin: 5px; padding: 5px; background-color: #1a5276; color: white; border-radius: 5px;'><b>Naval Bot:</b> {response}</div>"))
    
    def reset_chat(_):
        naval_bot.reset_conversation()
        with output:
            clear_output()
            display(HTML("<div style='margin: 5px; padding: 5px; background-color: #1a5276; color: white; border-radius: 5px;'><b>Naval Bot:</b> Hello! I'm Naval Bot. Ask me anything about Naval Ravikant's philosophy from 'The Almanack of Naval Ravikant'.</div>"))
    
    # Connect callbacks to widgets
    send_button.on_click(send_message)
    input_box.on_submit(send_message)
    reset_button.on_click(reset_chat)
    
    # Display initial message
    with output:
        display(HTML("<div style='margin: 5px; padding: 5px; background-color: #1a5276; color: white; border-radius: 5px;'><b>Naval Bot:</b> Hello! I'm Naval Bot. Ask me anything about Naval Ravikant's philosophy from 'The Almanack of Naval Ravikant'.</div>"))
    
    return container

# Create and display the chat UI
chat_ui = create_chat_ui()
display(chat_ui)

In [None]:
def evaluate_responses(test_questions, naval_bot):
    """Evaluate the quality of responses for a set of test questions"""
    results = []
    
    for question in test_questions:
        response = naval_bot.ask(question)
        
        # Add results to list
        results.append({
            "question": question,
            "response": response,
            # You could add manual or automated evaluation metrics here
        })
    
    return pd.DataFrame(results)

# Define test questions
test_questions = [
    "What does Naval say about wealth creation?",
    "How does Naval define happiness?",
    "What are Naval's thoughts on reading?",
    "What does Naval believe about the meaning of life?",
    "How does Naval approach decision-making?"
]

# Reset the chat history before evaluation
naval_bot.reset_conversation()

# Run evaluation
evaluation_results = evaluate_responses(test_questions, naval_bot)
evaluation_results

In [None]:
evaluation_results['response'][3]

In [None]:
# Example: Adjust the number of chunks retrieved based on evaluation
def optimize_chunk_retrieval(naval_bot, test_questions, k_values=[3, 5, 7, 10]):
    """Optimize the number of chunks to retrieve"""
    results = []
    
    for k in k_values:
        naval_bot.reset_conversation()
        
        # Test with different k values
        for question in test_questions:
            response = naval_bot.ask(question, k=k)
            results.append({
                "k": k,
                "question": question,
                "response": response
            })
    
    return pd.DataFrame(results)

# Run optimization (this would typically require manual review of results)
# optimization_results = optimize_chunk_retrieval(naval_bot, test_questions[:2])

In [None]:
# Integrate all components into a single function
def launch_naval_chatbot():
    """Launch the complete Naval Ravikant chatbot"""
    # Load the vector store (or create it if not available)
    try:
        vector_store = load_vector_store("models/naval_vector_store.pkl")
        print("Loaded existing vector store.")
    except:
        print("Creating new vector store...")
        # Load and process the book
        pdf_path = "data/almanack_of_naval_ravikant.pdf"  # Update with your file path
        raw_text = extract_text_from_pdf(pdf_path)
        cleaned_text = preprocess_text(raw_text)
        text_chunks = chunk_text(cleaned_text)
        
        # Create embeddings
        vector_store = create_embeddings(text_chunks)
        save_vector_store(vector_store, "models/naval_vector_store.pkl")
    
    # Initialize chatbot
    naval_bot = NavalChatbot(vector_store)
    
    # Display chat UI
    chat_ui = create_chat_ui()
    display(chat_ui)
    
    return naval_bot

# Launch the chatbot
naval_bot = launch_naval_chatbot()