In [None]:
#Importing necessary libraries
import os
from dotenv import load_dotenv
from anthropic import Client
from dotenv import load_dotenv
import sys
from faker import Faker
import random
import gradio as gr
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_anthropic import ChatAnthropic
from langchain_classic.memory import ConversationBufferMemory
from langchain_classic.chains import ConversationalRetrievalChain

!{sys.executable} -m pip install faker




In [None]:

# loading the .env variables
load_dotenv(override=True)

# Force export to OS env so LangChain can detect it (had to try this because the key was not loading at some point but by the time i shared the code it loaded well so i commented it out)
#os.environ["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY")

#getting the key from the our .env file. It is Anthropic_API_KEY
ANTHROPIC_KEY = os.getenv("ANTHROPIC_API_KEY")
client = Client(api_key=ANTHROPIC_KEY)

# Checking the anthropic models list our anthropic key ca help us play with
models = client.models.list()
for model in models:
    print(model.id)


In [None]:
#Getting the python executable path on my notebook to know where to install the faker library
print(sys.executable)

In [None]:
#Creating a fake person with faker
fake = Faker()
base_dir = "knowledge_base"
folders = ["personal", "projects", "learning"]

# We now create folders if they don't exist
for folder in folders:
    os.makedirs(f"{base_dir}/{folder}", exist_ok=True)

# Check if data already exists
personal_file = f"{base_dir}/personal/info.md"
projects_file = f"{base_dir}/projects/projects.md"
learning_file = f"{base_dir}/learning/learning.md"

#If the personal info file does not exist, create it
if not os.path.exists(personal_file):
    name = fake.name()
    profession = random.choice(["Data Analyst", "Business Analyst", "Software Engineer", "AI Specialist"])
    bio = fake.paragraph(nb_sentences=5)
    experience = "\n".join([f"- {fake.job()} at {fake.company()} ({fake.year()})" for _ in range(3)])
    
    personal_text = f"""
# Personal Profile
Name: {name}  
Profession: {profession}  

Bio: {bio}

## Experience
{experience}
"""
    with open(personal_file, "w") as f:
        f.write(personal_text)
    print("Personal info generated.")
else:
    #If the personal info file exists, skip the regeneration
    print("ℹPersonal info already exists. Skipping regeneration.")

#doing the same for project file
if not os.path.exists(projects_file):
    projects = "\n".join([
        f"- **{fake.catch_phrase()}** — {fake.bs().capitalize()} for {fake.company()}."
        for _ in range(5)
    ])
    projects_text = f"""
# Projects Portfolio

Key Projects:
{projects}
"""
    with open(projects_file, "w") as f:
        f.write(projects_text)
    print("Projects generated.")
else:
    print("ℹProjects already exist. Skipping regeneration.")

#same thing for learning file
if not os.path.exists(learning_file):
    topics = ["LangChain", "RAG Systems", "Vector Databases", "AI Ethics", "Prompt Engineering", "Data Visualization"]
    learning = "\n".join([
        f"- {random.choice(topics)} — {fake.sentence(nb_words=8)}"
        for _ in range(6)
    ])
    learning_text = f"""
# Learning Journey

Recent Topics and Notes:
{learning}
"""
    with open(learning_file, "w") as f:
        f.write(learning_text)
    print("Learning notes generated.")
else:
    print("ℹLearning notes already exist. Skipping regeneration.")


In [None]:
#loading the knowledge information from the knowledge_base folder
loader = DirectoryLoader("knowledge_base", glob="**/*.md", loader_cls=TextLoader)
documents = loader.load()

#Splitting the documents into chunks
splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=80)
chunks = splitter.split_documents(documents)

print(f"Loaded {len(documents)} documents and created {len(chunks)} chunks.")


In [None]:
#Creating the embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Chroma as the vector store
vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory="chroma_db")
vectorstore.persist()

print("Vector store created and saved to 'chroma_db'.")


In [None]:
#Check Langchain version as they updated the version recently thus making it difficult to use it successfullt
print(langchain.__version__)


In [None]:
# The main Langchain Abstraction are:  Memory, LLM, and Retriever

# Memory for conversation history
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

# Using one of the Anthropic models from the list above to create the LLM
llm = ChatAnthropic(
    model="claude-sonnet-4-5-20250929",
    temperature=0.6,
    max_tokens=1024,
    anthropic_api_key=ANTHROPIC_KEY
)

# Retriever from your vectorstore
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

#  Bringing everything together tConversational RAG Chain
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)

print("Anthropic conversational retriever is ready!")


In [None]:
#fnc to create a chat interface
def chat(message, history):
    if conversation_chain:
        result = conversation_chain.invoke({"question": message})
        return result["answer"]
    else:
        # Retrieval-only fallback
        docs = retriever.get_relevant_documents(message)
        context = "\n\n".join([d.page_content for d in docs])
        return f"(Offline Mode)\nTop relevant info:\n\n{context[:1000]}"


In [None]:
#used som css to make the chat interface look better, and dark mode. I love dark mode btw
css = """
body {background-color: #0f1117; color: #e6e6e6;}
.gradio-container {background-color: #0f1117 !important;}
textarea, input, .wrap.svelte-1ipelgc {background-color: #1b1f2a !important; color: #ffffff !important;}
"""

#Gradio blocks
with gr.Blocks(css=css, theme="gradio/monochrome") as demo:
    gr.Markdown(
        """
        <h2 style="color: #f5f5f5;">Personal Knowledge Worker</h2>
        <p style="color: #f5f5f5;">Chat with your auto-generated knowledge base (Claude-powered if available)</p>
        """,
        elem_id="title"
    )
    gr.ChatInterface(chat, type="messages")

demo.launch(inbrowser=True)
