In [1]:
# !pip install langchain
# !pip install openai
# !pip install -U langchain-community
# !pip install tiktoken
# !pip install faiss-gpu

In [2]:
import os
import zipfile


zip_path = "python-3.13-docs-text.zip"
extract_dir = "python_docs"

with zipfile.ZipFile(zip_path, "r") as zip_ref:
    zip_ref.extractall(extract_dir)

print("✅ ZIP extracted to:", extract_dir)


✅ ZIP extracted to: python_docs


In [3]:
from langchain.docstore.document import Document

documents = []

for root, _, files in os.walk("python_docs"):
    for file in files:
        if file.endswith(".txt"):
            with open(os.path.join(root, file), "r", encoding="utf-8") as f:
                text = f.read()
                documents.append(Document(page_content=text, metadata={"source": file}))

print(f"📄 Loaded {len(documents)} text documents.")


📄 Loaded 507 text documents.


In [4]:
#split documents into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
split_docs = splitter.split_documents(documents)

print(f"✂️ Total text chunks: {len(split_docs)}")


✂️ Total text chunks: 15919


In [6]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from getpass import getpass
import os

# Ask for API key safely
os.environ["OPENAI_API_KEY"] = getpass("🔐 Enter your OpenAI API key: ")

# Generate embeddings
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(split_docs, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})


🔐 Enter your OpenAI API key:  ········


In [7]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)


  llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)


In [8]:
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    memory_key="chat_history", 
    return_messages=True,
    output_key="answer"  # ✅ tell it to store only the answer
)

conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory,
    return_source_documents=True,
    output_key="answer"  # ✅ same fix here
)


  memory = ConversationBufferMemory(


In [None]:
print("🤖 Ask me anything about Python docs! Type 'exit' to quit.\n")

while True:
    query = input("🧑 You: ")
    if query.lower() in ["exit", "quit", "bye"]:
        print("👋 Goodbye!")
        break

    result = conversation_chain({"question": query})

    print("\n🤖 Answer:\n" + result["answer"])

    print("\n📚 Sources:")
    for doc in result["source_documents"]:
        print(" -", doc.metadata["source"])
    
    print("\n---")


🤖 Ask me anything about Python docs! Type 'exit' to quit.



🧑 You:  hello


  result = conversation_chain({"question": query})



🤖 Answer:
Hello! How can I assist you today?

📚 Sources:
 - itertools.txt
 - 3.11.txt
 - xml.txt
 - buffer.txt

---


🧑 You:  can you write me java script?



🤖 Answer:
I don't know how to write JavaScript.

📚 Sources:
 - appetite.txt
 - appetite.txt
 - appetite.txt
 - turtle.txt

---


🧑 You:  what about any other language



🤖 Answer:
Based on the context provided, Python is highlighted as a suitable language for automating tasks, developing GUI applications, and creating games. The context does not mention any other specific languages for writing programs.

📚 Sources:
 - appetite.txt
 - appetite.txt
 - codecs.txt
 - codecs.txt

---


🧑 You:  do it for java



🤖 Answer:
I don't know.

📚 Sources:
 - controlflow.txt
 - 3.10.txt
 - programming.txt
 - compound_stmts.txt

---


🧑 You:  quit


👋 Goodbye!
