## QMS_Data_Ingestion

In [23]:
import os
from langchain.chat_models import init_chat_model
from langchain.prompts import PromptTemplate
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.runnables import RunnableSequence
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from dotenv import load_dotenv
load_dotenv()

True

In [15]:
os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")
os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

### Document Loading

In [7]:
from langchain_community.document_loaders import DirectoryLoader,TextLoader

# Load documents from directory
dir_loader=DirectoryLoader(
    "../data",
    glob="**/*.txt", ## Pattern to match files  
    loader_cls= TextLoader, ##loader class to use
    loader_kwargs={'encoding': 'utf-8'},
    show_progress=True

)
documents = dir_loader.load()

print(f"Loaded {len(documents)} documents")
print(f"\nFirst document preview:")
print(documents[0].page_content[:200] + "...")

100%|██████████| 11/11 [00:00<00:00, 3679.21it/s]

Loaded 11 documents

First document preview:
ISO 13485:2016
Clause: 4.1.1
Title: General Quality Management System Requirements

SUMMARY:
Clause 4.1.1 requires the organization to establish, document, implement, and maintain a Quality Management...





In [8]:
documents

[Document(metadata={'source': '..\\data\\iso_13485\\4.1.1_general_qms_requirement.txt'}, page_content='ISO 13485:2016\nClause: 4.1.1\nTitle: General Quality Management System Requirements\n\nSUMMARY:\nClause 4.1.1 requires the organization to establish, document, implement, and maintain a Quality Management System (QMS). The organization must define the QMS scope and clearly identify its regulatory role.\n\nREQUIREMENTS:\n- Define the scope and perimeter of the QMS\n- Identify and document the regulatory role of the organization (manufacturer, importer, distributor, etc.)\n- Establish and maintain a documented QMS compliant with ISO 13485 and applicable regulatory requirements\n\nMANDATORY PROCESS:\nProcess Name: QMS Governance and Scope Definition\nProcess Description:\nThis process ensures that the organization formally defines its QMS scope, regulatory responsibilities, and establishes a documented system aligned with ISO 13485 requirements.\n\nREQUIRED DOCUMENTS:\n- Quality Manual\

### Document Splitting

In [12]:
# Initialize text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,  # Maximum size of each chunk
    chunk_overlap=150,  # Overlap between chunks to maintain context
    length_function=len,
    separators=[" "]  # Hierarchy of separators
)
chunks=text_splitter.split_documents(documents)

print(f"Created {len(chunks)} chunks from {len(documents)} documents")
print(f"\nChunk example:")
print(f"Content: {chunks[0].page_content[:150]}...")
print(f"Metadata: {chunks[0].metadata}")

Created 11 chunks from 11 documents

Chunk example:
Content: ISO 13485:2016
Clause: 4.1.1
Title: General Quality Management System Requirements

SUMMARY:
Clause 4.1.1 requires the organization to establish, docu...
Metadata: {'source': '..\\data\\iso_13485\\4.1.1_general_qms_requirement.txt'}


In [13]:
for i,chunk in enumerate(chunks):
    print(f"metadata of chunk number {i+1} is {chunks[i].metadata}")

metadata of chunk number 1 is {'source': '..\\data\\iso_13485\\4.1.1_general_qms_requirement.txt'}
metadata of chunk number 2 is {'source': '..\\data\\iso_13485\\4.1.2_process_approach_risk_based.txt'}
metadata of chunk number 3 is {'source': '..\\data\\iso_13485\\4.1.3_process_monitoring_control.txt'}
metadata of chunk number 4 is {'source': '..\\data\\iso_13485\\4.1.4_qms_change_management.txt'}
metadata of chunk number 5 is {'source': '..\\data\\iso_13485\\4.1.5_outsourced_process_control.txt'}
metadata of chunk number 6 is {'source': '..\\data\\iso_13485\\4.1.6_qms_software_validation.txt'}
metadata of chunk number 7 is {'source': '..\\data\\iso_13485\\4.2.1_qms_documentation_requirements.txt'}
metadata of chunk number 8 is {'source': '..\\data\\iso_13485\\4.2.2_quality_manual.txt'}
metadata of chunk number 9 is {'source': '..\\data\\iso_13485\\4.2.3_medical_device_file.txt'}
metadata of chunk number 10 is {'source': '..\\data\\iso_13485\\4.2.4_document_control.txt'}
metadata of ch

### Embeddings

In [18]:
sample_text="MAchine LEarning is fascinating"
embeddings=OpenAIEmbeddings()
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x00000173C740D6D0>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x00000173C84C1190>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [19]:
vector=embeddings.embed_query(sample_text)
len(vector)

1536

### Initialized Vectore Store

In [20]:
chunks

[Document(metadata={'source': '..\\data\\iso_13485\\4.1.1_general_qms_requirement.txt'}, page_content='ISO 13485:2016\nClause: 4.1.1\nTitle: General Quality Management System Requirements\n\nSUMMARY:\nClause 4.1.1 requires the organization to establish, document, implement, and maintain a Quality Management System (QMS). The organization must define the QMS scope and clearly identify its regulatory role.\n\nREQUIREMENTS:\n- Define the scope and perimeter of the QMS\n- Identify and document the regulatory role of the organization (manufacturer, importer, distributor, etc.)\n- Establish and maintain a documented QMS compliant with ISO 13485 and applicable regulatory requirements\n\nMANDATORY PROCESS:\nProcess Name: QMS Governance and Scope Definition\nProcess Description:\nThis process ensures that the organization formally defines its QMS scope, regulatory responsibilities, and establishes a documented system aligned with ISO 13485 requirements.\n\nREQUIRED DOCUMENTS:\n- Quality Manual\

In [24]:
## Create a Chromdb vector store
persist_directory=".././outpot_db"

## Initialize Chromadb with Open AI embeddings
vectorstore=Chroma.from_documents(
    documents=chunks,
    embedding=OpenAIEmbeddings(),
    persist_directory=persist_directory,
    collection_name="rag_collection"

)

print(f"Vector store created with {vectorstore._collection.count()} vectors")
print(f"Persisted to: {persist_directory}")

Vector store created with 11 vectors
Persisted to: .././outpot_db


In [30]:
query="what is requirements of General Quality Management System Requirements?"

similar_docs=vectorstore.similarity_search(query,k=3)
similar_docs[1].page_content

'ISO 13485:2016\nClause: 4.2.1\nTitle: General Documentation Requirements\n\nSUMMARY:\nClause 4.2.1 requires the organization to establish and maintain documented information required by ISO 13485. The documentation architecture of the QMS must be structured and controlled.\n\nREQUIREMENTS:\n- Identify all documents and records required by ISO 13485\n- Structure the QMS documentation architecture\n- Maintain a master list of controlled documents\n- Ensure documents remain current and accessible\n\nMANDATORY PROCESS:\nProcess Name: QMS Documentation Architecture Management\nProcess Description:\nThis process ensures identification, structuring, control, and maintenance of all QMS documentation in accordance with ISO 13485 requirements.\n\nREQUIRED DOCUMENTS:\n- Documentation architecture description\n- Master list of controlled documents\n- Document control procedure\n\nREQUIRED RECORDS:\n- Document history records\n- Evidence of document control activities\n\nEXPECTED EVIDENCE:\n- Appr

In [32]:
for i , content in enumerate(similar_docs):
    print("----------------------------------------------------------------------")
    print(f"content of similar docs : {similar_docs[i].page_content}")
    print("-----------------------------------------------------------------------")
    print(f"metdata of similar docs {similar_docs[i].metadata}")
    print("-----------------------------------------------------------------------")

----------------------------------------------------------------------
content of similar docs : ISO 13485:2016
Clause: 4.1.1
Title: General Quality Management System Requirements

SUMMARY:
Clause 4.1.1 requires the organization to establish, document, implement, and maintain a Quality Management System (QMS). The organization must define the QMS scope and clearly identify its regulatory role.

REQUIREMENTS:
- Define the scope and perimeter of the QMS
- Identify and document the regulatory role of the organization (manufacturer, importer, distributor, etc.)
- Establish and maintain a documented QMS compliant with ISO 13485 and applicable regulatory requirements

MANDATORY PROCESS:
Process Name: QMS Governance and Scope Definition
Process Description:
This process ensures that the organization formally defines its QMS scope, regulatory responsibilities, and establishes a documented system aligned with ISO 13485 requirements.

REQUIRED DOCUMENTS:
- Quality Manual
- Documented QMS scope d

#### Initialize LLM, RAG Chain, Prompt Template,Query the RAG system

In [33]:
from langchain_openai import ChatOpenAI

llm=ChatOpenAI(
    model_name="gpt-3.5-turbo"
)


In [34]:
test_response=llm.invoke("What is Large Language Models")
test_response

AIMessage(content='Large Language Models are a type of artificial intelligence model that are designed to process and understand vast amounts of text data. These models use advanced natural language processing and machine learning techniques to generate human-like text and respond to queries in a way that is coherent and contextually relevant. Large Language Models have been used for a variety of applications, including language translation, text completion, question answering, and chatbots. Some well-known examples of Large Language Models include GPT-3, BERT, and Transformer.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 100, 'prompt_tokens': 12, 'total_tokens': 112, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id

In [36]:
from langchain.chat_models.base import init_chat_model

llm=init_chat_model("openai:gpt-3.5-turbo")
#llm=init_chat_model("groq:")
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x00000173CCCB22B0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x00000173CCCB2220>, root_client=<openai.OpenAI object at 0x00000173CCCB2670>, root_async_client=<openai.AsyncOpenAI object at 0x00000173CCCB24F0>, model_kwargs={}, openai_api_key=SecretStr('**********'), stream_usage=True)

### Modern RAG Chain

In [37]:
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain

In [38]:
## Convert vector store to retriever
retriever=vectorstore.as_retriever(
    search_kwarg={"k":3} ## Retrieve top 3 relevant chunks
)
retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x00000173C859CFD0>, search_kwargs={})

In [39]:
from langchain_core.prompts import ChatPromptTemplate

system_prompt = """
You are a regulatory compliance assistant specialized in ISO 13485:2016 and Quality Management Systems (QMS).

You must answer strictly based on the retrieved context provided below.

Rules:
- Use ONLY the information found in the context.
- Do NOT add external knowledge.
- Do NOT assume missing information.
- If the answer is not explicitly available in the context, say:
  "The requested information is not available in the provided ISO 13485 context."
- Clearly reference the relevant ISO 13485 clause number in your answer.
- Use professional regulatory language.
- Organize the answer in numbered points (1-, 2-, 3-, etc.).
- When appropriate, group information under clear headings (e.g., Summary, Requirements, Required Documents, Required Records).
- Keep the explanation precise and structured.

Retrieved Context:
{context}
"""

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "Question: {input}")
])


##### What is create_stuff_documents_chain?
create_stuff_documents_chain creates a chain that "stuffs" (inserts) all retrieved documents into a single prompt and sends it to the LLM. It's called "stuff" because it literally stuffs all the documents into the context window at once.

In [40]:
### Create a document chain
from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain=create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nYou are a regulatory compliance assistant specialized in ISO 13485:2016 and Quality Management Systems (QMS).\n\nYou must answer strictly based on the retrieved context provided below.\n\nRules:\n- Use ONLY the information found in the context.\n- Do NOT add external knowledge.\n- Do NOT assume missing information.\n- If the answer is not explicitly available in the context, say:\n  "The requested information is not available in the provided ISO 13485 context."\n- Clearly reference the relevant ISO 13485 clause number in your answer.\n- Use professional regulatory language.\n-

#### What is create_retrieval_chain?
create_retrieval_chain is a function that combines a retriever (which fetches relevant documents) with a document chain (which processes those documents with an LLM) to create a complete RAG pipeline.

In [42]:
### Create The Final RAG Chain
from langchain.chains import create_retrieval_chain
rag_chain=create_retrieval_chain(retriever,document_chain)
rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x00000173C859CFD0>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nYou are a regulatory compliance assistant specialized in ISO 13485:2016 and Quality Management Systems (QMS).\n\nYou must answer stric

In [43]:
response=rag_chain.invoke({"input":"what is requirements of General Quality Management System Requirements?"})

In [44]:
response['answer']

'The requirements of General Quality Management System Requirements under Clause 4.1.1 of ISO 13485:2016 include:\n\n1- **Define the QMS Scope and Perimeter**:\n   - The organization must clearly define the scope and boundaries of its Quality Management System (QMS).\n\n2- **Identify and Document Regulatory Role**:\n   - The organization must identify and document its regulatory role within the QMS, such as being a manufacturer, importer, distributor, etc.\n\n3- **Establish and Maintain a Documented QMS**:\n   - The organization is required to establish and maintain a documented QMS in compliance with ISO 13485 requirements and applicable regulatory requirements.'

In [58]:
# Function to query the modern RAG system
def query_rag_modern(question):
    print(f"Question: {question}")
    print("-" * 50)
    
    # Using create_retrieval_chain approach
    result = rag_chain.invoke({"input": question})
    
    print(f"{result['answer']}")
    # print("\nRetrieved Context:")
    # for i, doc in enumerate(result['context']):
    #     # print(f"\n--- Source {i+1} ---")
    #     print(doc.page_content)
    
    return result

# Test queries
test_questions = [
    "what is REQUIRED RECORDS of Quality Manual"
]

for question in test_questions:
    result = query_rag_modern(question)
    print("\n" + "="*80 + "\n")

Question: what is REQUIRED RECORDS of Quality Manual
--------------------------------------------------
Based on the information provided in ISO 13485:2016, Clause 4.2.2 regarding the Quality Manual, the REQUIRED RECORDS related to the Quality Manual are as follows:

1- Retention records:
   - These records should include the version history, approval records, and distribution records of the Quality Manual.
   - The retention records should demonstrate the traceability and control of changes made to the Quality Manual over time.
   - These records are essential for maintaining a documented history of the Quality Manual, ensuring that approved versions are available for reference, and identifying who approved and received specific versions of the Quality Manual.


