In [19]:
import os
from getpass import getpass

os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API key: ")


Enter your OpenAI API key:  ········


In [31]:
import os
import zipfile


zip_path = "python-3.13-docs-text.zip"
extract_dir = "python_docs"

with zipfile.ZipFile(zip_path, "r") as zip_ref:
    zip_ref.extractall(extract_dir)

print("✅ ZIP extracted to:", extract_dir)


✅ ZIP extracted to: python_docs


In [32]:
from langchain.docstore.document import Document

documents = []

for root, _, files in os.walk("python_docs"):
    for file in files:
        if file.endswith(".txt"):
            with open(os.path.join(root, file), "r", encoding="utf-8") as f:
                text = f.read()
                documents.append(Document(page_content=text, metadata={"source": file}))

print(f"📄 Loaded {len(documents)} text documents.")


📄 Loaded 507 text documents.


In [33]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
split_docs = splitter.split_documents(documents)

print(f"✂️ Total text chunks: {len(split_docs)}")


✂️ Total text chunks: 15919


In [34]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from getpass import getpass
import os

# Ask for API key safely
os.environ["OPENAI_API_KEY"] = getpass("🔐 Enter your OpenAI API key: ")

# Generate embeddings
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(split_docs, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})


🔐 Enter your OpenAI API key:  ········


In [35]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)


In [36]:
query = "What does the print() function do in Python?"
result = qa_chain(query)

print("\n📘 Answer:")
print(result["result"])

print("\n🔗 Sources:")
for doc in result["source_documents"]:
    print(" -", doc.metadata["source"])



📘 Answer:
The `print()` function in Python is used to display output to the console. It writes the value of the argument(s) it is given. It differs from just writing the expression you want to write in the way it handles multiple arguments, floating-point quantities, and strings. Strings are printed without quotes, and a space is inserted between items.

🔗 Sources:
 - 3.0.txt
 - introduction.txt
 - 3.0.txt
 - 3.4.txt


In [38]:
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

# Create memory to store previous exchanges
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Create the conversational chain
conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    return_source_documents=True
)


  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


In [39]:
print("🤖 Ask me anything about Python docs! Type 'exit' to quit.\n")

while True:
    query = input("🧑 You: ")
    if query.lower() in ["exit", "quit", "bye"]:
        print("👋 Goodbye!")
        break

    result = conversation_chain({"question": query})

    print("\n🤖 Answer:\n" + result["answer"])

    print("\n📚 Sources:")
    for doc in result["source_documents"]:
        print(" -", doc.metadata["source"])
    
    print("\n---")


🤖 Ask me anything about Python docs! Type 'exit' to quit.



🧑 You:  give me the "if, else" function


ValueError: Got multiple output keys: dict_keys(['answer', 'source_documents']), cannot determine which to store in memory. Please set the 'output_key' explicitly.