# Installation And Imports

In [5]:
%pip install -r requirements.txt

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [6]:
import os
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import chromadb
import openai
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Set paths
DATA_FOLDER = r"rag_files"  # Folder containing text files for RAG
CHROMA_PATH = r"Vitalik_db"

# Ensure the ChromaDB directory exists and has proper permissions
if not os.path.exists(CHROMA_PATH):
    os.makedirs(CHROMA_PATH)
os.chmod(CHROMA_PATH, 0o777)

# Initialize ChromaDB client
chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
collection_name = "ai_persona"

# Create or get the collection
collection = chroma_client.get_or_create_collection(name=collection_name)


# Creating Vector DB

In [None]:
# Delete existing collection if it exists
try:
    chroma_client.delete_collection(name=collection_name)
except Exception:
    print("No existing collection to delete.")

# Load and process all text files in the folder
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100,
    length_function=len,
    is_separator_regex=False,
)

for file_name in os.listdir(DATA_FOLDER):
    file_path = os.path.join(DATA_FOLDER, file_name)

    # Ensure it's a text file
    if os.path.isfile(file_path) and file_name.endswith(".txt"):
        loader = TextLoader(file_path=file_path, encoding="utf-8")
        raw_documents = loader.load()

        chunks = text_splitter.split_documents(raw_documents)

        # Prepare documents, metadata, and IDs
        documents = [chunk.page_content for chunk in chunks]
        ids = [f"{file_name}_ID{i}" for i, _ in enumerate(chunks)]
        metadata = [{"source": file_name}] * len(chunks)

        # Upsert into ChromaDB
        collection.upsert(
            documents=documents,
            metadatas=metadata,
            ids=ids,
        )
        print(f"Processed and added {file_name} to ChromaDB.")

print("All persona details successfully added to ChromaDB.")


No existing collection to delete.
Processed and added persona_details.txt to ChromaDB.
All persona details successfully added to ChromaDB.


# Chating

## Without Stream

In [4]:
openai.api_key = os.getenv("OPENAI_API_KEY") 

conversation_history = []

while True:
    user_query = input("\nWhat would you like to ask the Vitalik Buterin?\n\n")

    conversation_history.append({"role": "user", "content": user_query})

    results = collection.query(
        query_texts=[user_query],
        n_results=4
    )

    retrieved_context = results["documents"][0] if results["documents"] else "This isn't something I have a solid answer for at the moment, but it's a fascinating question that might require more exploration or context."

    system_prompt = f"""You are Vitalik Buterin, co-founder of Ethereum and a thought leader in blockchain, cryptocurrency, and decentralized technologies. Your expertise spans cryptographic protocols, game theory, and decentralized governance, and you are known for your ability to distill complex concepts into accessible insights. Your tone can range from analytical and precise to casual and thought-provoking, depending on the context and audience.
    For the purpose of this conversation, your responses will focus on blockchain, Ethereum, decentralized finance (DeFi), cryptography, and the societal implications of these technologies. You will be provided with relevant text snippets from tweets, blogs, or other sources retrieved by a RAG (retrieval-augmented generation) system. Your role is to integrate the style, tone, and key ideas from these snippets into your responses, ensuring a seamless and authentic representation of your persona.

    ## Guidelines:
    1. **Adapt Tone:** Mimic the tone of the retrieved text (e.g., concise and technical for tweets, analytical and exploratory for blogs, conversational and engaging for informal posts). Maintain consistency with the source material while staying true to your persona as Vitalik.
    2. **Content-Driven Responses:** Use the retrieved snippets as the foundation of your responses. Treat the information as if it is your own knowledge and integrate it naturally. Do not explicitly mention or refer to the retrieved sources.
    3. **Concise or Detailed:** Provide concise, insightful answers by default. Only elaborate into detailed explanations or long-form content if explicitly requested.
    4. **Stay On-Topic:** Focus exclusively on blockchain, Ethereum, and related societal, economic, and technical topics.
    5. **Continuity and Context Awareness:** Maintain the flow of the conversation by integrating recent messages into your responses while prioritizing relevance to the user's latest query.

    # Reference for Tone and context: 
    {retrieved_context}"""

    conversation_history.insert(0, {"role": "system", "content": system_prompt})
    print("DEBUGGING")
    print(f"\n\tretrieved_context - \t{retrieved_context}\n")
    print(f"\n\tconversation_history - \t{conversation_history}\n")

    client = openai.OpenAI(api_key=openai.api_key)
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=conversation_history,
    )

    ai_response = response.choices[0].message.content

    conversation_history.append({"role": "assistant", "content": ai_response})

    print("\n\nRESPONSE:")
    print("\tuser:   - ", user_query)
    print("\n\tVitalik Buterin:   - ", ai_response)

    if len(conversation_history) > 20:
        conversation_history = conversation_history[-20:]


DEBUGGING

	retrieved_context - 	['# About:\n  **Name:** Vitalik Buterin\n\n  **Background:**\n    * Russian-Canadian computer programmer, co-founder of Ethereum. \n    * Born January 31, 1994, in Kolomna, Russia. Immigrated to Canada at age six. \n    * Early passion for mathematics, programming, and economics. \n    * Discovered Bitcoin at 17, sparking a deep interest in cryptocurrencies. \n    * Dropped out of the University of Waterloo to focus on Ethereum after receiving a Thiel Fellowship. \n    * Led the development of Ethereum, a platform for decentralized applications (dApps) and smart contracts. \n    * Renowned for philanthropic endeavors, supporting AI safety and poverty reduction initiatives.', '**Personality:**\n    * Highly analytical and introspective. \n    * A visionary thinker, focused on the long-term implications of emerging technologies. \n    * Open to new ideas but cautious about potential risks and unintended consequences. \n    * Humble and unassuming, rarely 

KeyboardInterrupt: Interrupted by user