In [57]:
# สร้าง AI Database Environment
import chromadb
from chromadb.config import Settings
from chromadb.utils import embedding_functions

db = chromadb.Client(settings=Settings(
    persist_directory='ai_db_x',
    chroma_db_impl='duckdb+parquet',
    allow_reset=False
))

# Model Name หาได้จาก https://www.sbert.net/docs/pretrained_models.html#sentence-embedding-models/
embbed = embedding_functions.SentenceTransformerEmbeddingFunction(model_name='all-mpnet-base-v2')
collection_name = 'freewill'
collection = db.get_collection(name=collection_name) if collection_name in [collection.name for collection in db.list_collections()] else None
if collection == None:
    collection = db.get_or_create_collection(
        name=collection_name, 
        embedding_function=embbed,
        
        # https://github.com/nmslib/hnswlib/tree/master#python-bindings
        # #metadata={"hnsw:space": "l2"}
        # #metadata={"hnsw:space": "ip"}\
        metadata={"hnsw:space": "cosine"}
    )

print(collection)
print(db.list_collections())

name='freewill' id=UUID('03bc248b-058e-4a00-ab34-4a044ec3d398') metadata={'hnsw:space': 'cosine'}
[Collection(name=freewill)]


In [58]:
# โหลดเอกสาร pdf และ เก็บไว้ใน ai db
from langchain.document_loaders import PDFMinerLoader
import os
pdfDoc = PDFMinerLoader(file_path=f'{os.getcwd()}/source_documents-single/GS_QA.pdf')
doc = pdfDoc.load()


from langchain.text_splitter import RecursiveCharacterTextSplitter
import re
text_spliter = RecursiveCharacterTextSplitter(
    separators = ['\n', '\n\n'],
    chunk_size = 500,
    chunk_overlap = 10
)
clean_texts = [f'{text.strip()}\n' if text.strip().endswith('.') else text.strip() for text in re.split('\n{1,}|\r\n', doc[0].page_content) if len(text.strip()) > 0]
print(clean_texts)

prepare_texts = text_spliter.split_text(' '.join(clean_texts))
# print(len(prepare_texts))
print(prepare_texts[0]) 
# print('\n\nindex : 2')
# print(prepare_texts[1]) 
# print('\n\nindex : 3')
# print(prepare_texts[2])


['Q1 Q: What are the conditions that client trading transactions are subject to?', 'A1 A: Client trading transactions are subject to stock status and customer status according to', 'predetermined system conditions for buying and selling.\n', 'Q2 Q: What are the Share Status options available in the system?', 'A2 A: The Share Status options available in the system are ACTIVE, INACTIVE, CLOSE_ONLY,', 'and HALTED.\n', 'Q3 Q: What are the Account Status options available in the system?', 'A3 A: The Account Status options available in the system are CanBuy, CanSell, BuyBotton', 'Display, SellButton Display, Buy, Sell, Pending, Active, Lock(buy), Lock(sell), and', 'Lock(buy,sell).\n', 'Q4 Q: Is it possible to buy and sell shares when the Share Status is CLOSEONLY?', 'A4 A: No, it is not possible to buy and sell shares when the Share Status is CLOSEONLY.\n', 'Q5 Q: What are the possible values for the CanBuy and CanSell Account Status', 'options?', 'A5 A: The possible values for CanBuy and Ca

In [59]:
#db.reset()
idx = 1
for text in prepare_texts:
    collection.add(
            ids=[f'idx_{idx}'],
            documents=[text],
            metadatas=[{'tag': text, 'id': f'idx_{idx}', 'source': doc[0].metadata['source']}]
        )
    idx += 1

In [60]:
db.persist()

print(db.list_collections())
print(collection.count())
print(len(prepare_texts))
print(collection)
print(collection.get()['metadatas'])


[Collection(name=freewill)]
14
14
name='freewill' id=UUID('03bc248b-058e-4a00-ab34-4a044ec3d398') metadata={'hnsw:space': 'cosine'}
[{'tag': 'Q1 Q: What are the conditions that client trading transactions are subject to? A1 A: Client trading transactions are subject to stock status and customer status according to predetermined system conditions for buying and selling.\n Q2 Q: What are the Share Status options available in the system? A2 A: The Share Status options available in the system are ACTIVE, INACTIVE, CLOSE_ONLY, and HALTED.', 'id': 'idx_1', 'source': 'd:\\Environment_Owner\\Developments\\Chatbot\\langchain\\fw_gpt4all_project\\LLM-GPT4All/source_documents-single/GS_QA.pdf'}, {'tag': 'Q3 Q: What are the Account Status options available in the system? A3 A: The Account Status options available in the system are CanBuy, CanSell, BuyBotton Display, SellButton Display, Buy, Sell, Pending, Active, Lock(buy), Lock(sell), and Lock(buy,sell).\n Q4 Q: Is it possible to buy and sell sha

In [57]:
chroma_db = chromadb.Client(settings=Settings(
    persist_directory='ai_db_x',
    chroma_db_impl='duckdb+parquet',
    allow_reset=False
))

print(chroma_db.get_collection(name='freewill').get())


{'ids': ['idx_1', 'idx_2', 'idx_3', 'idx_4', 'idx_5', 'idx_6', 'idx_7', 'idx_8', 'idx_9', 'idx_10', 'idx_11', 'idx_12', 'idx_13', 'idx_14'], 'embeddings': None, 'documents': ['Q1 Q: What are the conditions that client trading transactions are subject to? A1 A: Client trading transactions are subject to stock status and customer status according to predetermined system conditions for buying and selling.\n Q2 Q: What are the Share Status options available in the system? A2 A: The Share Status options available in the system are ACTIVE, INACTIVE, CLOSE_ONLY, and HALTED.', 'Q3 Q: What are the Account Status options available in the system? A3 A: The Account Status options available in the system are CanBuy, CanSell, BuyBotton Display, SellButton Display, Buy, Sell, Pending, Active, Lock(buy), Lock(sell), and Lock(buy,sell).\n Q4 Q: Is it possible to buy and sell shares when the Share Status is CLOSEONLY? A4 A: No, it is not possible to buy and sell shares when the Share Status is CLOSEONLY

In [61]:
query_str = 'What is the difference between INACTIVE and HALTED Share Status options?'
res = collection.query(
    query_texts=[query_str],
    n_results=10,
    #where={'id': 'idx_11'}
    where_document={'$contains': query_str}
)
print(res['ids'][0][0])
#print(res['documents'][0])
print('\n\n'.join(res['documents'][0]))



res2 = collection.get(
    ids='idx_12'
)
print(res2)


idx_13
Q32 Q: What is the difference between INACTIVE and HALTED Share Status options? A32 A: INACTIVE means that buying and selling securities is not available while HALTED means that buying and selling securities are suspended temporarily.
 Q33 Q: What does the Account Status Lock(buy, sell) option indicate? A33 A: The Account Status Lock(buy, sell) option indicates that the customer is temporarily not allowed to both buy and sell securities.
{'ids': ['idx_12'], 'embeddings': None, 'documents': ['Q30 Q: Can a customer still view their account details when there is an inactive Share Status? A30 A: Yes, customers can still view their account details when there is an inactive Share Status.\n Q31 Q: What are the four Share Status options again? A31  A: The four Share Status options are ACTIVE, INACTIVE, CLOSE_ONLY, and HALTED.'], 'metadatas': [{'tag': 'Q30 Q: Can a customer still view their account details when there is an inactive Share Status? A30 A: Yes, customers can still view their