In [5]:
from langchain.vectorstores.chroma import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings
from langchain_core.documents import Document
from unstructured.partition.text import partition_text
from unstructured.chunking.basic import chunk_elements
from unstructured.partition.docx import partition_docx

import time, os, shutil
# CHROMA_PATH = "final_test/unstructured-multi"
CHROMA_PATH = "final_test/unstructured-bge"


In [84]:
def load_embedding_model(model_path : str):
    start_time = time.time()
    encode_kwargs = {"normalize_embeddings": True}
    local_embedding = HuggingFaceEmbeddings(
        model_name=model_path,
        cache_folder="./models",
        encode_kwargs=encode_kwargs
    )
    end_time = time.time()
    print(f'model load time {round(end_time - start_time, 0)} second')
    return local_embedding

embedding = load_embedding_model(model_path="intfloat/multilingual-e5-large")

model load time 100.0 second


In [11]:
def load_embedding_model(model_path : str):
    start_time = time.time()
    encode_kwargs = {"normalize_embeddings": True}
    local_embedding = HuggingFaceBgeEmbeddings(
        model_name=model_path,
        cache_folder="./models",
        encode_kwargs=encode_kwargs
    )
    end_time = time.time()
    print(f'model load time {round(end_time - start_time, 0)} second')
    return local_embedding

embedding = load_embedding_model(model_path="BAAI/bge-m3")

model load time 93.0 second


In [6]:
def save_to_chroma(chunks: list[Document]):
    print("Starting Embedding")
    # Clear out the database first.
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)

    # Create a new DB from the documents.
    start_time = time.time()
    db = Chroma.from_documents(
        chunks, 
        embedding, 
        persist_directory=CHROMA_PATH,
        collection_metadata={"hnsw:space": "cosine"}
    )
    db.persist()
    end_time = time.time()
    print(f'embedding time {round(end_time - start_time, 0)} second')
    print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

# unstructured

## PNPK 2023

In [12]:
elements = partition_text(filename="final/PNPK 2023 GGK_clean.txt")

In [13]:
chunks = chunk_elements(elements, 
                        max_characters=1800, 
                        new_after_n_chars=1500, 
                        overlap=300, 
                        overlap_all=True)

In [14]:
document_chunks = []
for chunk in chunks:
    temp_metadata = chunk.metadata.to_dict()
    temp_metadata["languages"] = 'ind'
    temp = Document(
            page_content=chunk.text,
            metadata=temp_metadata,
        )
    document_chunks.append(temp)

In [15]:
# unstructured
db = Chroma.from_documents(
        document_chunks, 
        embedding, 
        persist_directory=CHROMA_PATH, 
        collection_metadata={"hnsw:space": "cosine"}
    )
db.persist()

  warn_deprecated(


### Tabel PNPK

In [16]:
elements = partition_docx(filename="docx/PNPK tabel.docx")

In [17]:
import re

text = ""
for elem in elements:
    text += elem.text
# print(text)
table_list = re.split(r"Tabel \d+", text)
table_list.pop(0)

''

In [18]:
document_chunks = []
for table in table_list:
    temp = Document(
            page_content=table,
            metadata={'filename':'PNPK 2023 GGK.pdf'}
        )
    document_chunks.append(temp)

In [19]:
db.add_documents(document_chunks)

['03c374b4-373d-4c02-86d1-34347ccead5c',
 '024998c6-ecdf-41a1-a1c5-a1ce1e1684ff',
 'bc3803ce-346f-4f52-a81d-986bdf8c0b53',
 'e73ca9ec-a97d-4358-bed0-29ef96e1c805',
 '319fb4f2-92fb-42df-a2c1-b8a3016a14c6',
 '2254300d-3f6e-4299-ab09-72b3f7ef8a3a',
 'bd4619b9-704d-4560-99a7-f33bcec997ec',
 '7a9b7e12-95ff-4658-93f9-d46750a0f12f',
 '2deb9877-cd91-4b53-bcf1-75b07ea6e21c',
 '3ee31bee-4ed2-4a68-b1f9-55aa6e094aa1',
 '79594285-baad-4dde-a493-3b5d93a42517',
 'f7a93cf2-1817-46b0-af0a-906e48920781',
 '6fedcf67-a3df-4956-bef0-f7a607eb1227',
 '8ec8f2a7-853b-4a6f-adf3-c58fa5e380e7',
 'd2b582e0-70c3-4759-80ec-3ea6836c8d6e',
 '37f771f3-f2b7-4b4a-b205-3297344ee95c',
 '7acbb00e-65ae-422f-9b53-e7b96edfee94',
 '86c83822-4dca-426f-bd1e-49c6084b4cfb',
 'edc52697-e259-4e0b-9c24-36a38614b7f8',
 'd08dd1ff-e5e5-4a45-9fa9-caae7ab58ee3',
 '8b6f2893-e8be-4316-b27b-1e63c86268f2',
 '900992a8-2587-4954-8a85-d742d8eee95c',
 '8b364ed2-5ac8-4c8c-8ed3-e83d4d4d9985',
 '48d08502-7eda-4626-b769-98c3d18d9d62',
 '2c6c1745-2ad5-

## Nutrisi pada anak dengan penyakit ginjal

In [20]:
elements = partition_text(filename="final/nutrisi_pada_anak_dengan_penyakit_ginjal - Copy_poster.txt")

In [21]:
chunks = chunk_elements(elements, 
                        max_characters=1500, 
                        new_after_n_chars=1000, 
                        overlap=300, 
                        overlap_all=True)

In [22]:
document_chunks = []
for chunk in chunks:
    temp_metadata = chunk.metadata.to_dict()
    temp_metadata["languages"] = 'ind'
    temp = Document(
            page_content=chunk.text,
            metadata=temp_metadata,
        )
    document_chunks.append(temp)

In [23]:
# unstructured
db.add_documents(document_chunks)

['30d5feb9-2176-4e1d-a867-9205952d90f1',
 'b0266db9-a697-41d7-9ccf-adf2d6638447',
 '3eb0ce9f-db6e-48ba-861b-fc04d5a3e0f9',
 '1cb74adc-b960-45cf-ba3e-f0a5f085d7a7',
 'a78b1f84-fee8-43dc-b5eb-3d8ed3b1097e',
 'e83ba27a-bdef-4e6b-a506-4bb32c8c3f68',
 'd40fe260-08d8-4fc4-b2a2-0e139a9d1260',
 'c9e22e8b-908d-489b-80ae-268b53d38539',
 '7bc6f6e9-557f-4259-b133-63d1a94c6511',
 'bb0f15ce-a13f-42cd-a7af-57a29df093b6',
 'bf95bbef-a478-4d40-a140-7822f22d9e41',
 '00dc4534-8bee-414c-90d4-05575809adaa',
 '082e887c-acc0-4c3f-a2f4-6185a3517258',
 'a62b4a8e-84cf-40e5-be73-6e4c3a7ca9ad',
 'b9bcb337-3a56-46db-92bd-904b2c7d13ad',
 '949e2e6b-8b47-4f97-9d78-8e0d635a4577',
 '3968ea3a-c5b7-4ac0-9eb7-dc3ed09740e7',
 '87fa3962-9e01-4aed-9f8f-5eb2e742da0c',
 '297d7349-c9fd-451a-9757-48997227c4d0',
 'dc57702b-862a-48f5-a8fb-c17e949d5e96',
 '84e97482-c627-4108-b532-f64ea7bb804e',
 '95259f92-40c2-489d-bdda-dc3d074a1cca',
 '43da9823-9055-46df-b357-85bc4b301be4',
 '0206a2f0-3e07-4f23-b326-dc377911f0e1',
 '4d94426c-cc37-

### Tabel Nutrisi pada Anak dengan Penyakit Ginjal

In [24]:
elements = partition_docx(filename="docx/nutrisi tabel.docx")

In [25]:
import re

text = ""
for elem in elements:
    text += elem.text
# print(text)
table_list = re.split(r"Tabel \d", text)
table_list.pop(0)

''

In [26]:
from langchain_core.documents import Document

document_chunks = []
for table in table_list:
    temp = Document(
            page_content=table,
            metadata={'filename':'nutrisi_pada_anak_dengan_penyakit_ginjal.pdf'}
        )
    document_chunks.append(temp)

In [27]:
db.add_documents(document_chunks)

['9945948f-4e09-487e-88cc-58277ce27c50',
 '00bb513e-7e5a-4e35-8592-72b04a7dcef6',
 '4db6e555-b701-406c-8a83-4f2ca9c35208',
 '9182bb3b-abbb-4322-8df4-8b5f09820cab',
 '5c4fc065-68d4-4247-afda-e271b18e22fa',
 '905039a2-e9da-4984-b1b9-8750118b9635',
 '23deacda-e745-45d5-91e8-bbe86fde62af',
 'a07fbe7f-5da7-4d21-8b2e-3bc00220c86e']

## Tatalaksana Hemodialisis pada Anak dan Bayi

In [28]:
elements = partition_text(filename="final/Tatalaksana_Hemodialisis_pada_Anak_dan_Bayi_clean.txt")

In [29]:
chunks = chunk_elements(elements, 
                        max_characters=1500, 
                        new_after_n_chars=1000, 
                        overlap=300, 
                        overlap_all=True)

In [30]:
document_chunks = []
for chunk in chunks:
    temp_metadata = chunk.metadata.to_dict()
    temp_metadata["languages"] = 'ind'
    temp = Document(
            page_content=chunk.text,
            metadata=temp_metadata,
        )
    document_chunks.append(temp)

In [31]:
db.add_documents(document_chunks)

['36bf0944-fc04-4055-b4b7-8db547146b6c',
 '043cd7a2-3179-4952-9d7e-329734180034',
 'bedf61cd-d2c2-4a6b-9f0c-f5a59e8c1742',
 '4b1919a0-da75-48c3-b244-9ef0e6ac4c11',
 'fa6e26b9-711f-4f57-acbe-12398faa475b',
 '2065d4c9-9083-4994-b6b0-ed8ba9b493c1',
 '2c4b7c14-c253-4292-85a4-f7835831f124',
 'df594951-cf1a-44d3-b6a0-11d3e6a15f0a',
 'b2eed3e7-fcea-48bd-87a0-b3930b416cea',
 '226775b3-953e-43e5-b268-baf923e6bb36',
 '74fdb917-ebf5-4f94-8e1e-d9999bdc7dd2',
 'b3ee7966-9735-4e9a-b053-6d21870e5e27',
 '654d04f2-c49d-445b-a707-1776fb07aa32',
 'acd64141-0f28-4cf8-9dab-c4fc3a6e9c0d',
 '6e61896e-eea3-47d3-8cfe-1af36f0b3b63',
 'a3ae4aab-b5a3-4132-a938-2c724f69d5ea',
 'aa5dd0ae-321c-4a32-88ea-45c90e38c0dd',
 'b9b161af-5792-4d28-9b88-54054ffbfd36',
 'c4bf1ffc-d322-4804-a113-746a2d52b190',
 '763c33ba-7926-4db9-a044-ee2a9fd5da28',
 'b55ac48f-66a3-4d9f-a1c5-c5cf71a1bffd',
 '4679bb86-7072-4cc1-9421-1961a4dbc040',
 '795f7440-06f2-437b-843a-92c146ad08d1',
 '69c68330-fd8b-40ab-9444-760edd3336cf',
 '395354fe-8ed4-

## Tatalaksana Penyakit Ginjal Kronik pada Anak

In [32]:
elements = partition_text(filename="final/Tatalaksana_Penyakit_Ginjal_Kronik_pada_Anak_clean_split_final.txt")

In [33]:
chunks = chunk_elements(elements, 
                        max_characters=1500, 
                        new_after_n_chars=1000, 
                        overlap=300, 
                        overlap_all=True)

In [34]:
document_chunks = []
for chunk in chunks:
    temp_metadata = chunk.metadata.to_dict()
    temp_metadata["languages"] = 'ind'
    temp = Document(
            page_content=chunk.text,
            metadata=temp_metadata,
        )
    document_chunks.append(temp)

In [35]:
db.add_documents(document_chunks)

['9a230ba7-613e-4bf8-93d6-4e0164aa8bf3',
 '6adcd992-4ddf-4fd7-91ae-165679e9ab59',
 '9c82f641-9ce0-472e-aca6-3994e29f10b8',
 '6d1dd23f-5b6f-45a6-8554-10764edff0fc',
 '4dff2ce6-daa5-4276-8214-1bd44e64d95a',
 '9e8a6e3e-0453-4400-913a-2c287e2f0895',
 '057ff5d1-2a2b-4584-a731-89e4cfd6ac80',
 '807709ae-6438-47a8-aa7e-e92fe97a21d1',
 '5aaae9c0-a17b-404c-8ac8-c7a70dfba9d4',
 'bf48aa31-2713-4487-9c34-a1e82646c456',
 'd5df86ad-eb84-4693-9904-0d3f4d57ec4f',
 'aab66174-8bb8-4939-8a4c-42cb7dfa1950',
 'e849c64e-0736-456f-ad62-08f4f966d3ef',
 '9dc89847-3171-4a01-a6ad-99f9566610ef',
 'cc3a2b7d-fcba-4162-9026-5610c86486da',
 'c0e75629-5e21-4058-867b-f69fbee535b1',
 '35002af0-1164-48e4-86c2-7521a137bfc4']

### Tabel Tatalaksana Penyakit Ginjal Kronik pada Anak

In [36]:
tata_laksana_tabel_elements = partition_docx(filename="docx/tatalaksana anak tabel.docx")

In [37]:
import re

tata_anak_tabel_text = ""
for elem in tata_laksana_tabel_elements:
    tata_anak_tabel_text += elem.text
# print(tata_anak_tabel_text)
table_list = re.split(r"Tabel \d", tata_anak_tabel_text)
table_list.pop(0)

''

In [38]:
document_chunks = []
for table in table_list:
    temp = Document(
            page_content=table,
            metadata={'filename':'Tatalaksana_Penyakit_Ginjal_Kronik_pada_Anak.pdf'}
        )
    document_chunks.append(temp)

In [39]:
db.add_documents(document_chunks)

['2c2d3230-c193-40b0-a417-3d4bf5ab2944',
 '9777c7ff-4b69-4294-a836-0d5390ce6e8e',
 'b3b348eb-37aa-4033-96a5-8e740d987905',
 '0dfc9e77-21db-47ca-bdf9-8f549651e6a0',
 '67cfacdc-abc6-4316-9e3a-b133b2eb7cdf']


# Langchain

In [85]:
from langchain_community.document_loaders import TextLoader
CHROMA_PATH = "final_test/langchain-multi"
# CHROMA_PATH = "final_test/langchain-bge"

In [86]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def split_text(documents: list[Document]):
    print("Starting chunking")

    "Recursive Splitter"
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1500,
        chunk_overlap=300,
        length_function=len,
        add_start_index=True,
    )
    chunks = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(chunks)} chunks.")
    print(f"len docs {len(chunks)}")
    document = chunks[10]
    print("page content \n", document.page_content)
    print("doc metadata \n", document.metadata)
    return chunks

## PNPK 2023

In [87]:
loader = TextLoader("final/PNPK 2023 GGK_clean.txt", encoding="UTF-8")
documents = loader.load()

In [88]:
# langchain splitter
chunks = split_text(documents)
db = Chroma.from_documents(
        chunks, 
        embedding, 
        persist_directory=CHROMA_PATH, 
        collection_metadata={"hnsw:space": "cosine"}
    )
db.persist()

Starting chunking
Split 1 documents into 377 chunks.
len docs 377
page content 
 albuminuria atau ACR urin.  
3. Pasien dengan Hematuria
Pasien dengan hematuria dilakukan pemeriksaan: [Peringkat bukti:
Level IV]
a.Pasien dengan hematuria harus disingkirkan kemungkinanpenyebab lain seperti infeksi saluran kemih
b.Pasien dengan hematuria ma
kroskopik/mikroskopik persisten,
tanpa adanya proteinuria dan penurunan eGFR dirujuk kespesialis urologi untuk pemeriksaan lebih lanjut
c.Pasien dengan hematuria makroskopik/mikroskopik, denganpenyerta klinis hipertensi, proteinuria, dan penurunan eGFR
doc metadata 
 {'source': 'final/PNPK 2023 GGK_clean.txt', 'start_index': 8941}


### Tabel PNPK

In [89]:
elements = partition_docx(filename="docx/PNPK tabel.docx")

In [90]:
import re

text = ""
for elem in elements:
    text += elem.text
# print(text)
table_list = re.split(r"Tabel \d+", text)
table_list.pop(0)

''

In [91]:
document_chunks = []
for table in table_list:
    temp = Document(
            page_content=table,
            metadata={'filename':'PNPK 2023 GGK.pdf'}
        )
    document_chunks.append(temp)

In [92]:
db.add_documents(document_chunks)

['3f43f44c-656a-4f1a-8ce0-fbf4bd6795f3',
 'ff9d4fd9-453e-4fa1-9851-b3d66942b320',
 'fd29cadf-e4ca-4d7d-b986-821c09ffccb3',
 '4e381dca-5916-4748-a0f9-1860693bfcbb',
 'c9029008-5b41-4362-b2ea-4615a1fb46e8',
 '22170c4d-84f8-40f5-a954-136d1b47f2a8',
 '88b7deac-0053-4fea-8308-71c537635116',
 '50e68a83-1a3f-47d2-98ad-94e6b6af7d40',
 'eb0ee422-4427-4af5-9c76-e8679e8bee1b',
 '0922b080-1014-4929-8afc-7eefb2a4397b',
 '840ef6c9-6879-458f-a7e1-f74f8493dcd2',
 'a8686c62-428d-428b-9f24-a30cba7c1234',
 '61b51af2-0bb1-49eb-ab11-2fcf7cbf0490',
 'b3ddc297-f944-4c17-81ea-cb54e0022ba3',
 '02b22249-c9df-4fa0-84d4-0090fc0bdf18',
 'e4dbb789-1b1d-4a1b-a80b-7ce6dd03bd06',
 '2e29d98a-345f-430b-8958-e251505e9e96',
 'd16cedc8-72bb-4284-93ec-00a73a1f54f2',
 '7998128c-2977-4dda-9a3d-a0d694ab9be6',
 '2e437528-d4cc-4e27-add3-e57ac850b586',
 '171ce9ca-648a-4566-be7b-07633639369e',
 'af96fb2e-e219-45e6-86cd-7b4ba648c91a',
 '43cb250e-3c2b-4bfd-9982-c6b769b6cce8',
 '4ea3e6d2-412e-4fdb-b4d3-e9ce6e9eeda5',
 'cc60ddc6-19bf-

## Nutrisi pada anak dengan penyakit ginjal

In [93]:
loader = TextLoader("final/nutrisi_pada_anak_dengan_penyakit_ginjal - Copy_poster.txt", encoding="UTF-8")
documents = loader.load()

In [94]:
# langchain splitter
chunks = split_text(documents)
db.add_documents(chunks)

Starting chunking
Split 1 documents into 20 chunks.
len docs 20
page content 
 d. Kekurangan berat badan lebih dari 10% berat badan ideal. e. Indeks massa tubuh kurang dari persentil 5 atau lebih dari persentil 85 untuk tinggi badan menurut umur. f. Kenaikan berat badan yang tidak adekuat dan perawakan pendek. g. Abnormalitas nutrisi yang berhubungan dengan gangguan biokimiawi. 4. Terapi kebutuhan energi Penyebab gangguan asupan kalori pada pasien PGK adalah penurunan selera makan dan muntah. Angka kematian PGK lebih tinggi pada anak dengan indeks massa tubuh di bawah atau di atas nilai normal. Faktor penyebab gangguan selera makan pada pasien PGK antara lain:  a. Pada anak dengan poliuria, rasa haus lebih  nyata dibanding rasa lapar . b. Pasien lebih menyukai makanan bergaram  dibandingkan makanan manis sebagai asupan  energi. c. Akumulasi selera makan yang meregulasi sitokin dan hormon. d. Refl  uks gastroesofagus. e. Gangguan motilitas lambung. f.  Pengosongan lambung terlambat. g. 

['4263fda1-cd52-4820-976b-4e6dc61243a3',
 '8aaef94e-4a54-42a7-94ae-1cf6fc66b1a7',
 'b9bad13d-327d-484e-9f7b-72d61a544556',
 '1a3decb1-150b-456a-aa63-ed8467f386ed',
 'ffb6c5d1-1321-4d8b-8c34-94cb254e6d6d',
 '59261517-30d1-4472-a0a1-8adaf789db03',
 '76a56483-e6a9-4f17-9afd-167560e181d5',
 '70331218-c989-4bd0-af4f-018e712b78d1',
 'ff959d8c-e4f3-4060-88cf-3aeba29ffcd1',
 'c6556b70-f47c-4c7b-a1ae-ccb6662bbc0c',
 '46930e37-d2ef-4254-971e-4cadd6e5a7cb',
 'd6768b42-4605-4acb-a350-2462266eeea3',
 '663857f9-8e4f-4f5f-b638-3d9cc68dedfd',
 '13d4dd5e-8273-41f5-8301-8284504b6637',
 'f953cf3e-d043-4c86-86b4-9f278164addc',
 'a7757ac1-068c-4272-977c-8c26c930ca35',
 'c927af81-9d91-4159-b91f-3b6b370ea1a3',
 '29d6c390-4a2d-474c-bbc3-051d6f4515a0',
 '8b13ad2a-c717-43fc-98b7-c2c7c7fc4a45',
 '84604b62-bd4a-476d-9204-aae727613135']

### Tabel Nutrisi pada Anak dengan Penyakit Ginjal

In [95]:
elements = partition_docx(filename="docx/nutrisi tabel.docx")

In [96]:
import re

text = ""
for elem in elements:
    text += elem.text
# print(text)
table_list = re.split(r"Tabel \d", text)
table_list.pop(0)

''

In [97]:
from langchain_core.documents import Document

document_chunks = []
for table in table_list:
    temp = Document(
            page_content=table,
            metadata={'filename':'nutrisi_pada_anak_dengan_penyakit_ginjal.pdf'}
        )
    document_chunks.append(temp)

In [98]:
db.add_documents(document_chunks)

['3202a338-a574-4803-a527-379d932a1d6e',
 'b9ad9adf-8a97-4c6f-ab67-61e3b0247963',
 '56de1f36-92be-4ee3-a97a-8ddc489a05bb',
 'd81b95d1-abac-4644-be5c-39b2672f0421',
 'e52ae3e4-594e-422b-8b02-a1a9c311f0a7',
 'd12a5e7a-cd70-495e-9f34-b1740e4e83dc',
 'fb378c2a-1220-4573-a4ff-c3512920d009',
 '5ccf2850-f90f-4615-9b7e-314c7a83a2e2']

## Tatalaksana Hemodialisis pada Anak dan Bayi

In [99]:
loader = TextLoader("final/Tatalaksana_Hemodialisis_pada_Anak_dan_Bayi_clean.txt", encoding='utf-8')
documents = loader.load()

In [100]:
chunks = split_text(documents)
db.add_documents(chunks)

Starting chunking
Split 1 documents into 27 chunks.
len docs 27
page content 
 mendasar antara  hemodialisis dewasa dan pediatrik. Misalnya,  pada pediatrik, aliran darah dan hemodialyzer  dipilih atas dasar bahwa anak-anak dapat  mentolerir 8% (maksimum 10%) total volume  darah mereka di sirkuit ekstrakorporeal  berdasarkan perkiraan volume darah total 80  mL/ kg untuk bayi dan 70 mL/ kg untuk anak  yang lebih tua. Jika sirkuit hemodialisis terkecil  yang tersedia melebihi volume kritis ini, dapat  ditutupi dengan larutan albumin manusia  4,5% atau darah donor untuk mencegah  gejala hipovolemia. Karena aliran darah bayi dan anak memiliki  kaliber lebih kecil, kecepatan pompa darah  cenderung lebih cepat, dengan sasaran 8–10  mL/kg/menit, dibandingkan 3-5 mL/kg/menit  untuk hemodialisis dewasa. Penggulung  kepala pompa darah (blood pump head  rollers) harus disesuaikan dengan ukuran  aliran dialisis, hemolisis mekanik dapat terjadi  jika aliran darah dewasa digunakan dalam  mesin diali

['8361816d-c013-4474-af38-25165763e2a0',
 '95aa0200-c208-4392-bc4f-3da21a333656',
 '07307a43-3649-4d01-bcfd-f7d059900121',
 '101e7047-2e3e-44ee-9f34-1a92ef2038b7',
 '996d4c64-017d-4707-a169-93f468960e85',
 '93afae33-7690-432d-92aa-2dcbc171a3f7',
 '5d1e9615-47b5-46a9-8a14-d5d4d0993e14',
 'd63c531b-5a4f-4015-9814-e015ea490d6c',
 '69e8f63b-fcc9-4c23-9619-179c5743af73',
 '9507e1d4-6d39-458c-8aa5-d1bf48ac9b00',
 '0f64f7a1-0776-401f-984c-9a4c706c3b63',
 '1f571e1c-5720-4b8e-b41c-7103ac72f02f',
 '05fa63f8-4a8d-4ba7-bd3a-17377273b46e',
 '2202a82c-5a2e-4405-8d39-19048592f9f0',
 '9f3652ef-d097-4178-b793-d043e05e3b98',
 'd3856d1e-5be6-45f4-8683-3fa4d2618514',
 '79634c9e-96e7-49f8-81a9-9454bb6bfffe',
 '99850e73-6c08-4fa0-896f-f1f57c1113c5',
 '39c6ec1c-99b4-4217-aaf0-dc479d2cb167',
 '01a552d3-a3f1-4bb8-871c-d3704f475aba',
 '2c4e1959-d8e5-45e7-bc9f-41d3a104753e',
 '0b73cdeb-f5d3-4fa0-b9a8-64a9bac918dd',
 'd3079430-146a-4795-836e-edd62059af0f',
 'ba04d334-f69e-428e-9346-020b0a99c40d',
 '9707c4a7-2aa3-

## Tatalaksana Penyakit Ginjal Kronik pada Anak

In [101]:
loader = TextLoader("final/Tatalaksana_Penyakit_Ginjal_Kronik_pada_Anak_clean_split_final.txt", encoding='utf-8')
documents = loader.load()

In [102]:
chunks = split_text(documents)
db.add_documents(chunks)

Starting chunking
Split 1 documents into 12 chunks.
len docs 12
page content 
 Penyakit ginjal kronis stadium 5 merupakan indikasi untuk transplantasi. Meskipun demikian, tidak semua 
pasien dengan penyakit ginjal kronis stadium 5 dapat 
menjadi kandidat untuk transplantasi ginjal. Prosedur ini 
dapat terlalu berisiko bagi sebagian karena komorbiditas
yang telah diderita pasien atau karena kontraindikasi 
tertentu, seperti infeksi kronis yang akan dieksaserbasi oleh imunosupresi. Kontraindikasi absolut bagi transplantasi ginjal adalah keganasan aktif, terutama jika telah bermetastasis. Infeksi HIV dan potensi rekurensi penyakit ginjal juga perlu dipertimbangkan 
dalam rencana transplantasi. 
Prognosis pasien dengan penyakit ginjal kronis adalah 
bervariasi menurut stadium dan penatalaksanaan yang dilakukan. Dengandeteksi dan penatalaksanaan dini,morbiditas dan mortalitas diharapkan dapat diturunkan.  
3.Kesimpulan
Penyakit ginjal kronis pada anak dapat disebabkan penyakit 
kongenital, 

['0baac2f2-c6c1-4bc7-ac63-fb8e094b3cb2',
 'd0d745be-1229-45d0-a419-233979a5bb49',
 '1384706d-0653-4d52-bb50-ca7266b156d9',
 '98853da8-5fea-4092-83dd-d09676542bb8',
 'e03b2f78-d515-4ca4-a102-95f7779214dd',
 '3304b0d9-9ce6-4a58-bf16-d005f94900e5',
 '80208123-b72f-43bd-a564-f708dbf9ca1f',
 'aea7b58d-3d28-490e-be16-a964deba4436',
 '83e6fb0a-23ef-40ef-86e1-55adaddf7126',
 '0e2d0bb8-3969-441f-9a6f-e90bf803e370',
 'dba287e2-4fac-4271-9686-38278089bba9',
 '937011f3-4dde-46aa-b894-806898905b80']

### Tabel Tatalaksana Penyakit Ginjal Kronik pada Anak

In [103]:
tata_laksana_tabel_elements = partition_docx(filename="docx/tatalaksana anak tabel.docx")

In [104]:
import re

tata_anak_tabel_text = ""
for elem in tata_laksana_tabel_elements:
    tata_anak_tabel_text += elem.text
# print(tata_anak_tabel_text)
table_list = re.split(r"Tabel \d", tata_anak_tabel_text)
table_list.pop(0)

''

In [105]:
document_chunks = []
for table in table_list:
    temp = Document(
            page_content=table,
            metadata={'filename':'Tatalaksana_Penyakit_Ginjal_Kronik_pada_Anak.pdf'}
        )
    document_chunks.append(temp)

In [106]:
db.add_documents(document_chunks)

['39dc9a46-0db5-474b-bdbd-a4fb4d6f4d6d',
 'a0c89e40-a60c-488f-bc15-76fe8992857e',
 'b641ab96-da8b-4b31-bf75-a5f4f61290d1',
 '11be138e-99a7-43e5-9cb7-ca5c61cae9cd',
 '8c4c2893-b7c8-4a00-a140-10ecfb188a3f']