In [1]:
"""
%conda install -c conda-forge langchain
%conda install -c conda-forge pypdf

%pip install langchain_ollama
%pip install langchain_community
%pip install langchain_chroma

"""

'\n%conda install -c conda-forge langchain\n%conda install -c conda-forge pypdf\n\n%pip install langchain_ollama\n%pip install langchain_community\n'

In [2]:
"""
import getpass
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
"""

'\nimport getpass\nimport os\n\nos.environ["LANGCHAIN_TRACING_V2"] = "true"\nos.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")\n'

In [1]:
from langchain_ollama import ChatOllama

model = ChatOllama(
    model="llama3.2",
    temperature=0,
)

In [2]:
import os
import re
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings

folder_path = 'docs_mni/raw'
documents = []

# Loop through all text files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.txt'):
        with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
            text = file.read()
            # Split text into sentences by double new line
            file_documents = [Document(page_content=sentence) for sentence in re.split(r'\n\n', text)]
            documents.extend(file_documents)

# Define text_splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000, chunk_overlap=0, add_start_index=True
)

all_splits = text_splitter.split_documents(documents)
responses = [model.invoke(split.page_content) for split in all_splits]


embeddings = OllamaEmbeddings(
    model="nomic-embed-text"
)

vectors = [embeddings.embed_query(split.page_content) for split in all_splits]


In [4]:
%pip install langchain

import faiss
from langchain.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore

# Create a FAISS index
index = faiss.IndexFlatL2(len(vectors[0]))

# Create a document store
docstore = InMemoryDocstore({i: doc for i, doc in enumerate(all_splits)})

# Create an index to docstore ID mapping
index_to_docstore_id = {i: i for i in range(len(all_splits))}

# Initialize the FAISS vector store
vector_store = FAISS(
	index=index,
	docstore=docstore,
	index_to_docstore_id=index_to_docstore_id,
	embedding_function=embeddings
)

# Add documents to the vector store
ids = vector_store.add_documents(documents=all_splits)

Note: you may need to restart the kernel to use updated packages.


In [5]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import chain


@chain
def retriever(query: str) -> List[Document]:
    return vector_store.similarity_search(query, k=1)


retriever.batch(
    [
        "How many hospitals in NTEC?",
        "what is calbing in situation 2?",
    ],
)

[[Document(metadata={'start_index': 0}, page_content='Hospitals in NTEC: AHNH, BBH, NDH, PWH, SCH, SH and TPH.')],
 [Document(metadata={'start_index': 0}, page_content='Q2) If commence date not confirmed yet, should I leave it blank?\nA2) Consult your local IT')]]

In [6]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 1},
)

retriever.batch(
    [
        #"How many situations?",
        "When to apply for data port?",
    ],
)

[[Document(metadata={'start_index': 0}, page_content='how to apply for data port request?\nTo apply for a Data Port Request, you need to complete and submit the following steps:\n    Master Application Form: Submit an application form to request a new network connection.\n    Network Diagram (Email): Provide a diagram showing how the medical system will be connected to the HA network.\n    Data Port Request Form (NTECITD-7): Fill out this form to request data ports for your desktop computer.\n    You can also refer to the standard and guidelines provided by your organization, such as:\n    "IT Security Requirements for Quotation / Tender - Procurement of \'non-IT\' System/Equipment"\n    "Guidelines for Cabling System and Network Setup in External Network" (if applicable)\n    Additionally, you may need to provide further information or documentation, such as:\n    Floor Plan: Submit a floor plan indicating where you\'d like the data port installed.\n    Firewall Request Change Form (N

In [7]:
results = vector_store.similarity_search(
    "What is MNI standard?"
)

print(results[0])

page_content='MNI = Medical Network Infrastructure' metadata={'start_index': 0}


In [8]:
results = await vector_store.asimilarity_search("What is dataport?")

print(results[0])

page_content='DP = Dataport' metadata={'start_index': 0}
