In [None]:
! pip install -r requirements.txt

In [None]:
from langchain_community.document_loaders import DirectoryLoader, UnstructuredPDFLoader, UnstructuredMarkdownLoader
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings.sentence_transformer import (SentenceTransformerEmbeddings,)

documents = DirectoryLoader("../documents/high_level_docs", glob="**/*.md", show_progress=True, loader_cls=UnstructuredMarkdownLoader).load()
apidocs = DirectoryLoader("../documents/api_docs", glob="**/*.md", show_progress=True, loader_cls=UnstructuredMarkdownLoader).load()
features = DirectoryLoader("../documents/cucumber_feature_files", glob="**/*.md", show_progress=True, loader_cls=UnstructuredMarkdownLoader).load()
pdf_docs = DirectoryLoader('../documents/high_level_docs', glob="**/*.pdf", show_progress=True, loader_cls=UnstructuredPDFLoader).load()

splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
split_docs = splitter.split_documents(documents) + splitter.split_documents(apidocs) + splitter.split_documents(features) + splitter.split_documents(pdf_docs)

embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(documents=split_docs,
                                    embedding=embedding_function,
                                    persist_directory="./db_test_3")
retriever = vectorstore.as_retriever()

In [None]:
from typing import Literal
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_experimental.llms.ollama_functions import OllamaFunctions

class DocumentType(BaseModel):
    datasource: Literal["api_docs", "cucumber_feature_files", "high_level_docs", "no_related_document"] = Field(
        default="no_related_document",
        title="Most relevant document",
        description="Given a user question choose which document would be most relevant for answering their question", required=True)

llm = OllamaFunctions(model="llama3", format="json")
structured_llm = llm.with_structured_output(DocumentType)

# Prompt 
system = """You are an expert at routing a user question to the appropriate document.

The user question should be related 
Based on the question, route to the most relevant documents.
The user question should be related to a product catalogue service, the question is not applicable then return 'no_related_document'.

The following documents are available:
'api_docs' - Technical documentation that describes the structure, functionality, and usage of an API
'cucumber_feature_files' - Test scenarios and acceptance criteria for software features and behaviour
'high_level_docs' - Overview documents that provide a broad, conceptual understanding of a system, project, or topic without delving into granular details.
'no_related_document' - There are no related documents available.

Inside api_docs there are:
    product-provisioner.md
    product-store.md
    products-and-catalogue-service.md
    purchase-gateway.md
    purchase-orchestrator.md
    third-party-payment-transaction-service.md
Inside cucumber-feature files there are:
    client-user-journey.md
    content-access-permission-jwt.md
    content-access-permissions.md
    general-system-failures.md
    product-provisioner.md
    product-store.md
    products-and-catalogue-service.md
    purchase-gateway.md
    purchase-orchestrator.md
    third-party-payment-transaction-service.md
Inside high_level_docs there are:
    Architecture.md
    Getting to know our clients.pdf
    high-level-architecture.md
    JWT Overview.pdf
    
Please respond one of ["api_docs", "cucumber_feature_files", "high_level_docs", "no_related_document"]:
"""

from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate.from_template(
    """You are an expert at routing a user question to the appropriate document.

Based on the question, route to the most relevant documents, if not applicable then return 'no_related_document'

Human: {question}
AI: """
)

# Define router 
router = prompt | structured_llm

In [None]:
print(router.invoke({"question": "What is the error code A_100?"}))
print(router.invoke({"question": "What is the full list of Authentication error codes?"}))
print(router.invoke({"question": "What are the downstream of purchase gateway?"}))
print(router.invoke({"question": "What is the size of the sun?"}))