<a href="https://colab.research.google.com/github/christancone/langchain_course/blob/main/softsora_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# ‚úÖ Install with a compatible requests version
!pip -q install -U langchain langchain-openai langchain-community langchain-text-splitters faiss-cpu tiktoken python-dotenv


In [6]:
# üîë Set your API key (safer than hardcoding)
import os, getpass
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter OPENAI_API_KEY: ")

Enter OPENAI_API_KEY: ¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑¬∑


In [7]:
# üìÅ 1) Create a tiny docs set (you can replace these with your own .txt/.md files)
import pathlib, textwrap, os, json
pathlib.Path("docs").mkdir(exist_ok=True)

samples = {
    "leave_policy.txt": """\
    Annual leave can be carried over up to 5 days.
    Public holidays do not reduce annual leave.
    """,
    "faq.txt": """\
    Q: How do I reset my password?
    A: Use the self-service portal or contact IT.
    """,
}
for name, content in samples.items():
    with open(f"docs/{name}", "w") as f:
        f.write(textwrap.dedent(content))

print("Docs:", os.listdir("docs"))


Docs: ['leave_policy.txt', 'faq.txt']


In [9]:
# ‚öôÔ∏è 2) Build the RAG components (load ‚Üí split ‚Üí embed ‚Üí index ‚Üí retrieve)
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# ‚úÖ Use TextLoader to avoid the 'unstructured' dependency for .txt/.md
loader = DirectoryLoader(
    "./docs",
    glob="**/*.txt",                  # change to "**/*.md" or run a second loader for md
    loader_cls=TextLoader,
    loader_kwargs={"encoding": "utf-8"}
)
docs = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=60)
chunks = splitter.split_documents(docs)

embeddings = OpenAIEmbeddings()
vs = FAISS.from_documents(chunks, embeddings)
retriever = vs.as_retriever(search_kwargs={"k": 4})

prompt = ChatPromptTemplate.from_template(
    "Use the CONTEXT to answer the QUESTION. If the answer is not in the context, say you don't know.\n\n"
    "CONTEXT:\n{context}\n\nQUESTION:\n{question}"
)
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

def format_docs(ds):
    return "\n\n".join(d.page_content for d in ds)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print("RAG pipeline ready ‚úÖ  (docs indexed:", len(chunks), "chunks )")


RAG pipeline ready ‚úÖ  (docs indexed: 2 chunks )


In [12]:
# üß™ 3) Helper to ask a question and show which files were used (new API)
import os

def answer_with_sources(question: str):
    # Retrieve docs (new style)
    retrieved = retriever.invoke(question)   # returns List[Document]
    answer = rag_chain.invoke(question)

    srcs = []
    for d in retrieved:
        s = (d.metadata or {}).get("source", "unknown")
        srcs.append(os.path.basename(s))
    srcs = list(dict.fromkeys(srcs))  # de-dup

    print("Q:", question, "\n")
    print("A:", answer, "\n")
    print("Sources:", srcs if srcs else ["<none>"])


In [13]:
# ‚ñ∂Ô∏è 4) Run a couple of demo queries
answer_with_sources("How many days of annual leave can be carried over?")
answer_with_sources("Do public holidays reduce annual leave?")
answer_with_sources("How do I reset my password?")
answer_with_sources("What is our reimbursement policy?")


Q: How many days of annual leave can be carried over? 

A: Up to 5 days. 

Sources: ['leave_policy.txt', 'faq.txt']
Q: Do public holidays reduce annual leave? 

A: No, public holidays do not reduce annual leave. 

Sources: ['leave_policy.txt', 'faq.txt']
Q: How do I reset my password? 

A: Use the self-service portal or contact IT. 

Sources: ['faq.txt', 'leave_policy.txt']
Q: What is our reimbursement policy? 

A: I don't know. 

Sources: ['leave_policy.txt', 'faq.txt']


In [14]:
# üí¨ (Optional) 5) Quick interactive loop
try:
    while True:
        q = input("Ask a question about your docs (blank to exit): ").strip()
        if not q:
            break
        answer_with_sources(q)
        print("-" * 60)
except KeyboardInterrupt:
    pass
