In [3]:
# import modules

from langchain.llms.openai import OpenAI
from langchain.vectorstores import FAISS
from langchain.sql_database import SQLDatabase
import os
from langchain.prompts import ChatPromptTemplate

from langchain.vectorstores.chroma import Chroma

# MongoDB
from dotenv import load_dotenv
from pymongo import MongoClient

# user query embedding
from sentence_transformers import SentenceTransformer
from langchain.document_loaders import UnstructuredURLLoader

In [5]:
# config Mongo DB

load_dotenv()

MONGO_USERNAME = os.environ.get('MONGO_USERNAME')
MONGO_PASSWORD = os.environ.get('MONGO_PASSWORD')
MONGO_HOST = os.environ.get('MONGO_HOST')
MONGO_DB_CONNECTION_STRING = os.environ.get('MONGO_DB_CONNECTION_STRING')
MONGO_DB = os.environ.get('MONGO_DB')
MONGO_DB_COLLECTION = "InventoryData"

In [6]:
# connect to Mongo DB

client = MongoClient(MONGO_DB_CONNECTION_STRING)
db = client[MONGO_DB]
collections = db.list_collection_names()

inventory_coll = db['InventoryData']

print(f'Collection: {inventory_coll.name}')
print(f"Total no. of docs: {inventory_coll.count_documents({})}")

Collection: InventoryData
Total no. of docs: 538


In [7]:
# user query embedding function

emb_model = SentenceTransformer("thenlper/gte-large")

def get_embedding(text: str) -> list[float]:

    if not text.strip():
        print("Attempted to get embedding for empty text.")
        return []

    embedding = emb_model.encode(text)

    return embedding.tolist()



## Classical Search

In [7]:
# keyword search

def search(search_text):
    data = inventory_coll.find({"$text": {"$search": search_text}}).limit(3)
    return list(data)

data = search('Servo-i')

for x in data:
    print(x)

{'_id': ObjectId('6620e804924de81c6030e9df'), 'Item Code': 'BP0001', 'Item Description': 'Batteries/ UPS', 'Part No': {'': '01-2206-0101-001'}, 'Part Description': 'BATTERY SERVO-I MARQUET', 'Model': 'Servo-i', 'Unit Of Measurement': 'Unit', 'Spare Part Type': 'Just In Time', 'Location': 'Centralized', 'Part Category': 'Biomedical', 'Is Expiry date Required': 'Yes', 'Min Nos': 0, 'Max Nos': 20, ' Minimum Price Per Nos (RM) ': ' MYR 1.200,00 ', ' Maximum Price Per Nos (RM) ': ' MYR 1.610,00 ', 'Status': 'Active', 'Expiry Age (In Month)': 0, 'Current Stock Level': 1}
{'_id': ObjectId('6620e804924de81c6030e9ee'), 'Item Code': 'BP0025', 'Item Description': 'Sensors/ Detectors', 'Part No': {'': '01-2202-0302-001'}, 'Part Description': 'SIEMENS SERVO 9000 OXYGEN SENSOR OOM202', 'Model': 'Sv300', 'Unit Of Measurement': 'Unit', 'Spare Part Type': 'Fast Moving Item', 'Location': 'Company Site Office', 'Part Category': 'Biomedical', 'Is Expiry date Required': 'Yes', 'Min Nos': 1, 'Max Nos': 15, 

## Semantic Search

In [8]:
# fetch all docs from InventoryData
def fetch_all_documents():
    data = inventory_coll.find({})
    return list(data)

data = fetch_all_documents()
data

[{'_id': ObjectId('6620e804924de81c6030e9c0'),
  'Item Code': 'BP0018',
  'Item Description': 'Motors/ Micromotors/ Carbon brushes',
  'Part No': {'': '01-0802-0503-085'},
  'Part Description': 'Flow motor',
  'Unit Of Measurement': 'Unit',
  'Spare Part Type': 'Fast Moving Item',
  'Location': 'Company Site Office',
  'Part Category': 'Biomedical',
  'Is Expiry date Required': 'Yes',
  'Min Nos': 1,
  'Max Nos': 3,
  ' Minimum Price Per Nos (RM) ': ' MYR 500,00 ',
  ' Maximum Price Per Nos (RM) ': ' MYR 800,00 ',
  'Brand': 'Common',
  'Status': 'Active',
  'Expiry Age (In Month)': 0,
  'Current Stock Level': 0},
 {'_id': ObjectId('6620e804924de81c6030e9c1'),
  'Item Code': 'BP0025',
  'Item Description': 'Sensors/ Detectors',
  'Part No': {'': '01-0101-0101-044'},
  'Part Description': 'O2 Sensor OOM202',
  'Unit Of Measurement': 'Unit',
  'Spare Part Type': 'One Off',
  'Location': 'Centralized',
  'Part Category': 'Biomedical',
  'Is Expiry date Required': 'Yes',
  'Min Nos': 1,
  

In [10]:
# Step 1: Create embeddings and store in a new "InventoryEmbeddings" collection

embeddings = emb_model.encode(data)
embeddings

TypeError: object of type 'ObjectId' has no len()

In [None]:
# Step 2: Store embeddings in a vector store (e.g. FAISS or Chroma)

vectorstore = Chroma.from_documents(docs)

In [None]:
# Step 3: Store embeddings in a vector store (e.g. FAISS)
# Step 3: 