In [2]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

load_dotenv()

groq_api_key = os.getenv("GROQ_API_KEY")
if not groq_api_key:
    raise RuntimeError("❌ Please set GROQ_API_KEY in your .env file")

# 🔷 Load Word document
print("Loading Word document…")
loader = Docx2txtLoader("who are we.docx")  
docs = loader.load()
print(f"Loaded {len(docs)} document")

Loading Word document…
Loaded 1 document


In [3]:
docs

[Document(metadata={'source': 'who are we.docx'}, page_content="Global Tech Leaders\n\nFounded in 1982 as a division of Jaffer Brothers, JBS has been at the forefront of technology solutions for over four decades. With a strong foundation built on innovation, customer focus, and industry expertise, we have served over 1,000 delighted customers and completed major transformation projects that have shaped the technology landscape in Pakistan and beyond.\xa0\n\nJBS defines itself through a commitment to solving business problems with technology, partnering with 40+ global leaders like HP, Microsoft, Oracle, and SAP. Our team brings together unmatched skills in complex technology implementations, system integration, managed services, and large-scale IT digital transformation projects. This expertise drives our mission to enhance our clients’ capabilities and equip them for the future.\xa0\n\nVision\n\nOur vision is to partner in transforming businesses and the economy. By 2030, we aim to g

In [4]:
print("📄 FULL DOCUMENT TEXT:\n")
for i, doc in enumerate(docs, 1):
    print(f"\n--- Document {i} ---\n")
    print(doc.page_content.strip())
    print("\n" + "="*80 + "\n")


📄 FULL DOCUMENT TEXT:


--- Document 1 ---

Global Tech Leaders

Founded in 1982 as a division of Jaffer Brothers, JBS has been at the forefront of technology solutions for over four decades. With a strong foundation built on innovation, customer focus, and industry expertise, we have served over 1,000 delighted customers and completed major transformation projects that have shaped the technology landscape in Pakistan and beyond. 

JBS defines itself through a commitment to solving business problems with technology, partnering with 40+ global leaders like HP, Microsoft, Oracle, and SAP. Our team brings together unmatched skills in complex technology implementations, system integration, managed services, and large-scale IT digital transformation projects. This expertise drives our mission to enhance our clients’ capabilities and equip them for the future. 

Vision

Our vision is to partner in transforming businesses and the economy. By 2030, we aim to generate Rs. 100 billion, establish

In [5]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=900,      
    chunk_overlap=200
)
chunks = splitter.split_documents(docs)
print(f"✅ Created {len(chunks)} chunks\n")

print("📄 CHUNKS:\n")
for i, chunk in enumerate(chunks, 1):
    print(f"\n--- Chunk {i} ---\n")
    print(chunk.page_content.strip())
    print("\n" + "="*50)


✅ Created 17 chunks

📄 CHUNKS:


--- Chunk 1 ---

Global Tech Leaders

Founded in 1982 as a division of Jaffer Brothers, JBS has been at the forefront of technology solutions for over four decades. With a strong foundation built on innovation, customer focus, and industry expertise, we have served over 1,000 delighted customers and completed major transformation projects that have shaped the technology landscape in Pakistan and beyond. 

JBS defines itself through a commitment to solving business problems with technology, partnering with 40+ global leaders like HP, Microsoft, Oracle, and SAP. Our team brings together unmatched skills in complex technology implementations, system integration, managed services, and large-scale IT digital transformation projects. This expertise drives our mission to enhance our clients’ capabilities and equip them for the future. 

Vision


--- Chunk 2 ---

Vision

Our vision is to partner in transforming businesses and the economy. By 2030, we aim to gen

In [6]:
with open("chunks_output.txt", "w", encoding="utf-8") as f:
    for i, chunk in enumerate(chunks, 1):
        f.write(f"--- Chunk {i} ---\n")
        f.write(chunk.page_content.strip() + "\n")
        f.write("="*50 + "\n")
print("\n✅ Chunks saved to: chunks_output.txt")



✅ Chunks saved to: chunks_output.txt


In [7]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)

# Save vectorstore to disk
vectorstore.save_local("faiss_index")
print("\n✅ FAISS vectorstore saved to: ./faiss_index")


  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (



✅ FAISS vectorstore saved to: ./faiss_index


In [8]:
print(f"Found {len(chunks)} chunks.")

texts = [doc.page_content for doc in chunks]


vectors = embeddings.embed_documents(texts)

print(f"Generated {len(vectors)} vectors, each of length {len(vectors[0])}")
print("First vector (truncated):", vectors[0][:10]) 


Found 17 chunks.
Generated 17 vectors, each of length 384
First vector (truncated): [-0.0659935250878334, 0.009294958785176277, 0.024186480790376663, -0.06285223364830017, -0.019748229533433914, -0.029944555833935738, 0.02437896840274334, 0.010412868112325668, -0.04129697382450104, -0.04916885867714882]


In [9]:
print(f"\nFound {len(chunks)} chunks.")
texts = [doc.page_content for doc in chunks]

# Embed texts again just for printing (FAISS already saved above)
vectors = embeddings.embed_documents(texts)

print(f"Generated {len(vectors)} vectors, each of length {len(vectors[0])}\n")

for idx, (text, vector) in enumerate(zip(texts, vectors), 1):
    print(f"Chunk {idx}:")
    print(f"Text: {text[:100]}{'...' if len(text) > 100 else ''}")
    print(f"Embedding (first 10 dims): {vector[:10]}\n")



Found 17 chunks.
Generated 17 vectors, each of length 384

Chunk 1:
Text: Global Tech Leaders

Founded in 1982 as a division of Jaffer Brothers, JBS has been at the forefront...
Embedding (first 10 dims): [-0.0659935250878334, 0.009294958785176277, 0.024186480790376663, -0.06285223364830017, -0.019748229533433914, -0.029944555833935738, 0.02437896840274334, 0.010412868112325668, -0.04129697382450104, -0.04916885867714882]

Chunk 2:
Text: Vision

Our vision is to partner in transforming businesses and the economy. By 2030, we aim to gene...
Embedding (first 10 dims): [0.01802521012723446, 0.01220346987247467, -0.01760541833937168, -0.06844226270914078, -0.04030395299196243, 0.04962880164384842, -0.006736312992870808, -0.01173414010554552, -0.023910842835903168, -0.05457276105880737]

Chunk 3:
Text: Strategic Initiatives

Excellence

Excellence in business is at the heart of our strategy. With that...
Embedding (first 10 dims): [-0.014670129865407944, -0.0001573175104567781, -0.00826791

In [10]:
from langchain_community.vectorstores import FAISS

vectorstore = FAISS.load_local(
    "faiss_index",
    embeddings,
    allow_dangerous_deserialization=True
)

query = "What is the mission of the company?"
retrieved_docs = vectorstore.similarity_search(query, k=3)

print("🔎 Top 3 relevant chunks:")
for i, doc in enumerate(retrieved_docs, 1):
    print(f"\nResult {i}:\n{doc.page_content}")


🔎 Top 3 relevant chunks:

Result 1:
Vision

Our vision is to partner in transforming businesses and the economy. By 2030, we aim to generate Rs. 100 billion, establish our presence in 5 international locations and have 10 companies as part of our group. Our goal is to diversify and build strong partnerships worldwide.

Mission

Shaping Tomorrow, Today! JBS is more than a technology partner—we are architects of tomorrow. Our ongoing mission is to create solutions that are not only technologically advanced but also purpose-driven, ensuring that every project we undertake delivers on our promise to “Work Better.” Through our strategic pillars, we continue to support our clients and the communities we serve, driving progress that truly matters.

Strategic Initiatives

Excellence

Result 2:
Global Tech Leaders

Founded in 1982 as a division of Jaffer Brothers, JBS has been at the forefront of technology solutions for over four decades. With a strong foundation built on innovation, customer 

In [11]:
vectorstore = FAISS.load_local(
    "faiss_index",
    embeddings,
    allow_dangerous_deserialization=True
)

retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}
)

llm = ChatOpenAI(
    model_name="llama3-70b-8192",
    openai_api_base="https://api.groq.com/openai/v1",
    openai_api_key=groq_api_key,
    temperature=0
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff" 
)

print("\n🎯 You can now ask questions about your document. Type `exit` to quit.\n")

while True:
    query = input("❓ Your question: ").strip()
    if query.lower() in {"exit", "quit"}:
        print("👋 Goodbye!")
        break

    answer = qa_chain.run(query)
    print(f"\n💬 Answer: {answer}\n")
    print("="*60)


  llm = ChatOpenAI(



🎯 You can now ask questions about your document. Type `exit` to quit.



  answer = qa_chain.run(query)



💬 Answer: According to the provided context, the CEO of JBS is Veqar Ul Islam.

👋 Goodbye!


In [14]:
import numpy as np

vectorstore = FAISS.load_local(
    "faiss_index",
    embeddings,
    allow_dangerous_deserialization=True
)

retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}
)

llm = ChatOpenAI(
    model_name="llama3-70b-8192",
    openai_api_base="https://api.groq.com/openai/v1",
    openai_api_key=groq_api_key,
    temperature=0
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff" 
)

print("\n🎯 You can now ask questions about your document. Type `exit` to quit.\n")

while True:
    query = input("❓ Your question: ").strip()
    if query.lower() in {"exit", "quit"}:
        print("👋 Goodbye!")
        break

    # 🔷 Embed query & show embedding
    query_embedding = embeddings.embed_query(query)
    print(f"\n🧭 Query embedding (first 10 dims): {query_embedding[:10]}")

    # 🔷 Manually retrieve top 3 chunks and their embeddings
    docs = retriever.get_relevant_documents(query)
    print("\n🔎 Top 3 retrieved chunks & their embeddings:")
    for idx, doc in enumerate(docs, 1):
        # Embed the chunk text
        chunk_embedding = embeddings.embed_query(doc.page_content)
        print(f"\nChunk {idx}:")
        print(f"Text: {doc.page_content[:150]}{'...' if len(doc.page_content) > 150 else ''}")
        print(f"Embedding (first 10 dims): {chunk_embedding[:10]}")

    # 🔷 Get answer from LLM
    answer = qa_chain.run(query)
    print(f"\n💬 Answer: {answer}\n")
    print("="*60)



🎯 You can now ask questions about your document. Type `exit` to quit.


🧭 Query embedding (first 10 dims): [-0.04629238694906235, -0.025060830637812614, -0.0031494288705289364, -0.017059586942195892, -0.02136264741420746, 0.03477393090724945, 0.08747648447751999, 0.038983386009931564, 0.033607788383960724, -0.021158546209335327]

🔎 Top 3 retrieved chunks & their embeddings:

Chunk 1:
Text: Leadership Team

A leadership team of 10 committed individuals working together to achieve our goals.

CEO's Message

At JBS, we improve lives through...
Embedding (first 10 dims): [-0.053861409425735474, -0.0005651428364217281, 0.006284149829298258, -0.051151424646377563, 0.016119934618473053, 0.0144736897200346, 0.012752849608659744, -0.017463643103837967, 0.09126917272806168, -0.08578526973724365]

Chunk 2:
Text: Kashif Jadoon

CEO, Blutech Consulting

Kashif, with his plethora of experience and expertise is the present CEO of Blutech Consulting, a company acqu...
Embedding (first 10 dims): [-0.0

KeyboardInterrupt: Interrupted by user