In [1]:
import os
import json
import torch
import faiss
import numpy as np
import gc
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer, AutoModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# --------------------
# 0. Setup và Load model
# --------------------
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
device = "cuda" if torch.cuda.is_available() else "cpu"

model_name = "intfloat/multilingual-e5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.eval().to(device)



XLMRobertaModel(
  (embeddings): XLMRobertaEmbeddings(
    (word_embeddings): Embedding(250002, 768, padding_idx=1)
    (position_embeddings): Embedding(514, 768, padding_idx=1)
    (token_type_embeddings): Embedding(1, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): XLMRobertaEncoder(
    (layer): ModuleList(
      (0-11): 12 x XLMRobertaLayer(
        (attention): XLMRobertaAttention(
          (self): XLMRobertaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): XLMRobertaSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=Tru

In [None]:
# --------------------
# 1. Dataset + Embedding function
# --------------------
class TextDataset(Dataset):
    def __init__(self, texts):
        self.texts = texts

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return self.texts[idx]

def embed_texts(texts, batch_size=2):
    embeddings = []
    dataset = TextDataset(texts)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    for batch in tqdm(dataloader, desc="Embedding batches"):
        try:
            encoded = tokenizer(
                [f"passage: {text}" for text in batch],
                return_tensors="pt", padding=True, truncation=True, max_length=512
            )
            encoded = {k: v.to(device) for k, v in encoded.items()}

            with torch.no_grad():
                outputs = model(**encoded)
                emb = outputs.last_hidden_state[:, 0, :] # First token (CLS) embedding
                emb = torch.nn.functional.normalize(emb, p=2, dim=1) # Normalize vector L2
                embeddings.append(emb.cpu().numpy())

            del encoded, outputs, emb
            gc.collect() # Clear cache
            torch.cuda.empty_cache() # Clear cache

        except RuntimeError as e:
            print(f"[!] Skipped batch due to CUDA error: {e}")
            gc.collect()
            torch.cuda.empty_cache()
            continue

    return np.vstack(embeddings)

In [4]:
# --------------------
# 2. Xử lý từng file .jsonl với khả năng resume
# --------------------
data_dir = "wiki_chunks"
output_dir = "wiki_embeddings"
os.makedirs(output_dir, exist_ok=True)

all_metadata = []
embedding_parts = []

# Lưu danh sách file đã xử lý
processed_files = set()
for fname in os.listdir(output_dir):
    if fname.startswith("embeddings_part_") and fname.endswith(".npy"):
        part_id = fname.split("_")[-1].split(".")[0]
        processed_files.add(int(part_id))

files = sorted(os.listdir(data_dir))  # Đảm bảo cùng thứ tự mỗi lần chạy

for idx, file in enumerate(tqdm(files, desc="Processing files")):
    if not file.endswith(".jsonl") or idx in processed_files:
        continue

    file_path = os.path.join(data_dir, file)
    texts = []
    metadata = []

    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            item = json.loads(line)
            texts.append(item["text"])
            metadata.append(item["metadata"])

    if not texts:
        continue

    print(f"[→] Embedding file: {file} (idx={idx})")

    embeddings = embed_texts(texts, batch_size=8)

    emb_file = os.path.join(output_dir, f"embeddings_part_{idx}.npy")
    meta_file = os.path.join(output_dir, f"metadata_part_{idx}.json")

    np.save(emb_file, embeddings)
    with open(meta_file, "w", encoding="utf-8") as f:
        json.dump(metadata, f, ensure_ascii=False, indent=2)

    embedding_parts.append(emb_file)

# Giới hạn chiều cao output
from IPython.display import display, HTML

display(HTML('''
<style>
.output_scroll {
    max-height: 300px;
    overflow-y: scroll;
}
</style>
'''))

Processing files:   0%|          | 0/762 [00:00<?, ?it/s]

[→] Embedding file: An Nam Cộng sản Đảng.jsonl (idx=0)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  3.95it/s]
Processing files:   0%|          | 1/762 [00:00<06:30,  1.95it/s]

[→] Embedding file: Ban Chấp hành Trung ương Đảng Cộng sản Việt Nam.jsonl (idx=1)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.05it/s]
Processing files:   0%|          | 2/762 [00:00<05:40,  2.23it/s]

[→] Embedding file: Ban Chỉ đạo Cải cách Tư pháp Trung ương Đảng Cộng sản Việt Nam.jsonl (idx=2)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.14it/s]
Processing files:   0%|          | 3/762 [00:01<04:13,  2.99it/s]

[→] Embedding file: Ban Chỉ đạo Tây Bắc Việt Nam.jsonl (idx=3)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.32it/s]
Processing files:   1%|          | 4/762 [00:01<03:22,  3.74it/s]

[→] Embedding file: Ban Chỉ đạo Tây Nam Bộ Việt Nam.jsonl (idx=4)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.46it/s]
Processing files:   1%|          | 5/762 [00:01<03:01,  4.16it/s]

[→] Embedding file: Ban Chỉ đạo Tây Nguyên Việt Nam.jsonl (idx=5)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.71it/s]
Processing files:   1%|          | 6/762 [00:01<02:46,  4.54it/s]

[→] Embedding file: Ban Dân vận Trung ương Đảng Cộng sản Việt Nam.jsonl (idx=6)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.29it/s]
Processing files:   1%|          | 7/762 [00:01<02:26,  5.14it/s]

[→] Embedding file: Ban Nội chính Trung ương.jsonl (idx=7)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.94it/s]
Processing files:   1%|          | 8/762 [00:01<02:16,  5.54it/s]

[→] Embedding file: Ban nhạc Việt Nam.jsonl (idx=8)


Embedding batches: 100%|██████████| 31/31 [00:05<00:00,  5.19it/s]
Processing files:   1%|          | 9/762 [00:07<25:04,  2.00s/it]

[→] Embedding file: Ban Đối ngoại Trung ương Đảng Cộng sản Việt Nam.jsonl (idx=9)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.50it/s]
Processing files:   1%|▏         | 10/762 [00:08<18:03,  1.44s/it]

[→] Embedding file: Biên giới Việt Nam-Campuchia.jsonl (idx=10)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.35it/s]
Processing files:   1%|▏         | 11/762 [00:09<16:07,  1.29s/it]

[→] Embedding file: Biên giới Việt Nam-Lào.jsonl (idx=11)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  9.35it/s]
Processing files:   2%|▏         | 12/762 [00:09<11:37,  1.07it/s]

[→] Embedding file: Biên giới Việt Nam-Trung Quốc.jsonl (idx=12)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.45it/s]
Processing files:   2%|▏         | 13/762 [00:10<12:58,  1.04s/it]

[→] Embedding file: Biên giới Việt Nam.jsonl (idx=13)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.41it/s]
Processing files:   2%|▏         | 14/762 [00:11<11:11,  1.11it/s]

[→] Embedding file: Biểu tình tại Việt Nam.jsonl (idx=14)


Embedding batches: 100%|██████████| 25/25 [00:04<00:00,  5.18it/s]
Processing files:   2%|▏         | 15/762 [00:15<25:58,  2.09s/it]

[→] Embedding file: Biểu tượng của Việt Nam.jsonl (idx=15)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.66it/s]
Processing files:   2%|▏         | 16/762 [00:16<20:49,  1.67s/it]

[→] Embedding file: Biệt động Sài Gòn.jsonl (idx=16)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.34it/s]
Processing files:   2%|▏         | 17/762 [00:17<17:24,  1.40s/it]

[→] Embedding file: Bà mẹ Việt Nam anh hùng.jsonl (idx=17)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.25it/s]
Processing files:   2%|▏         | 18/762 [00:17<14:00,  1.13s/it]

[→] Embedding file: Bài Việt Nam chọn lọc.jsonl (idx=18)


Embedding batches: 100%|██████████| 45/45 [00:08<00:00,  5.13it/s]
Processing files:   2%|▏         | 19/762 [00:26<42:30,  3.43s/it]

[→] Embedding file: Bài hát về chiến tranh Việt Nam.jsonl (idx=19)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.43it/s]
Processing files:   3%|▎         | 20/762 [00:26<30:54,  2.50s/it]

[→] Embedding file: Bác sĩ Việt Nam.jsonl (idx=20)


Embedding batches: 100%|██████████| 59/59 [00:11<00:00,  5.33it/s]
Processing files:   3%|▎         | 21/762 [00:38<1:02:43,  5.08s/it]

[→] Embedding file: Bán đảo Việt Nam.jsonl (idx=21)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.40it/s]
Processing files:   3%|▎         | 22/762 [00:38<44:34,  3.61s/it]  

[→] Embedding file: Báo Nhân dân.jsonl (idx=22)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.91it/s]
Processing files:   3%|▎         | 23/762 [00:38<31:48,  2.58s/it]

[→] Embedding file: Bãi biển Việt Nam.jsonl (idx=23)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.68it/s]
Processing files:   3%|▎         | 24/762 [00:39<24:52,  2.02s/it]

[→] Embedding file: Bão tại Việt Nam.jsonl (idx=24)


Embedding batches: 100%|██████████| 45/45 [00:08<00:00,  5.22it/s]
Processing files:   3%|▎         | 25/762 [00:47<49:16,  4.01s/it]

[→] Embedding file: Bê bối của Bộ Công an Việt Nam.jsonl (idx=25)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.34it/s]
Processing files:   3%|▎         | 26/762 [00:48<35:38,  2.90s/it]

[→] Embedding file: Bê bối doanh nghiệp nhà nước XHCNVN.jsonl (idx=26)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.12it/s]
Processing files:   4%|▎         | 27/762 [00:48<26:09,  2.14s/it]

[→] Embedding file: Bê bối quân sự Việt Nam.jsonl (idx=27)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.81it/s]
Processing files:   4%|▎         | 28/762 [00:48<19:36,  1.60s/it]

[→] Embedding file: Bí thư Quân ủy Trung ương Việt Nam.jsonl (idx=28)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.05it/s]
Processing files:   4%|▍         | 29/762 [00:48<14:11,  1.16s/it]

[→] Embedding file: Bạo loạn ở Việt Nam.jsonl (idx=29)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.67it/s]
Processing files:   4%|▍         | 30/762 [00:49<10:29,  1.16it/s]

[→] Embedding file: Bản mẫu Việt Nam.jsonl (idx=30)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 10.42it/s]
Processing files:   4%|▍         | 31/762 [00:49<07:42,  1.58it/s]

[→] Embedding file: Bảo tàng Lịch sử Quân sự Việt Nam.jsonl (idx=31)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.58it/s]
Processing files:   4%|▍         | 32/762 [00:49<06:32,  1.86it/s]

[→] Embedding file: Bảo tàng tại Việt Nam.jsonl (idx=32)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.30it/s]
Processing files:   4%|▍         | 33/762 [00:51<11:29,  1.06it/s]

[→] Embedding file: Bảo tồn ở Việt Nam.jsonl (idx=33)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  6.10it/s]
Processing files:   4%|▍         | 34/762 [00:52<10:28,  1.16it/s]

[→] Embedding file: Bảo vật quốc gia của Việt Nam.jsonl (idx=34)


Embedding batches: 100%|██████████| 11/11 [00:02<00:00,  5.20it/s]
Processing files:   5%|▍         | 35/762 [00:54<15:03,  1.24s/it]

[→] Embedding file: Bầu cử tại Việt Nam.jsonl (idx=35)


Embedding batches: 100%|██████████| 12/12 [00:02<00:00,  5.40it/s]
Processing files:   5%|▍         | 36/762 [00:56<18:38,  1.54s/it]

[→] Embedding file: Bắc Bộ.jsonl (idx=36)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 10.94it/s]


[→] Embedding file: Bắc thuộc.jsonl (idx=37)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.35it/s]
Processing files:   5%|▍         | 38/762 [00:57<11:52,  1.02it/s]

[→] Embedding file: Bệnh viện tại Việt Nam.jsonl (idx=38)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.15it/s]
Processing files:   5%|▌         | 39/762 [00:57<09:30,  1.27it/s]

[→] Embedding file: Bồn trũng Nam Côn Sơn.jsonl (idx=39)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.08it/s]
Processing files:   5%|▌         | 40/762 [00:57<07:32,  1.59it/s]

[→] Embedding file: Bồn trầm tích Việt Nam.jsonl (idx=40)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.45it/s]
Processing files:   5%|▌         | 41/762 [00:57<06:00,  2.00it/s]

[→] Embedding file: Bộ Chính trị Ban Chấp hành Trung ương Đảng Cộng sản Việt Nam.jsonl (idx=41)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.63it/s]
Processing files:   6%|▌         | 42/762 [00:57<04:44,  2.53it/s]

[→] Embedding file: Bộ Công Thương Việt Nam.jsonl (idx=42)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.44it/s]
Processing files:   6%|▌         | 43/762 [00:57<03:51,  3.10it/s]

[→] Embedding file: Bộ Công an Việt Nam.jsonl (idx=43)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.27it/s]
Processing files:   6%|▌         | 44/762 [00:58<04:04,  2.94it/s]

[→] Embedding file: Bộ Giao thông Vận tải Việt Nam.jsonl (idx=44)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.20it/s]
Processing files:   6%|▌         | 45/762 [00:58<03:19,  3.60it/s]

[→] Embedding file: Bộ Khoa học và Công nghệ Việt Nam.jsonl (idx=45)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.97it/s]
Processing files:   6%|▌         | 46/762 [00:59<05:20,  2.24it/s]

[→] Embedding file: Bộ Kế hoạch và Đầu tư Việt Nam.jsonl (idx=46)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.52it/s]
Processing files:   6%|▌         | 47/762 [00:59<05:04,  2.35it/s]

[→] Embedding file: Bộ Ngoại giao Việt Nam.jsonl (idx=47)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.94it/s]
Processing files:   6%|▋         | 48/762 [00:59<04:01,  2.95it/s]

[→] Embedding file: Bộ Nông nghiệp và Phát triển Nông thôn Việt Nam.jsonl (idx=48)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.90it/s]
Processing files:   6%|▋         | 49/762 [01:00<03:53,  3.05it/s]

[→] Embedding file: Bộ Nội vụ Việt Nam.jsonl (idx=49)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  7.03it/s]
Processing files:   7%|▋         | 50/762 [01:00<03:46,  3.14it/s]

[→] Embedding file: Bộ Quốc phòng Việt Nam.jsonl (idx=50)


Embedding batches: 100%|██████████| 69/69 [00:12<00:00,  5.49it/s]
Processing files:   7%|▋         | 51/762 [01:13<47:15,  3.99s/it]

[→] Embedding file: Bộ trưởng Bộ Thương mại Việt Nam.jsonl (idx=51)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.21it/s]
Processing files:   7%|▋         | 52/762 [01:14<37:51,  3.20s/it]

[→] Embedding file: Bộ trưởng Bộ Tài chính Việt Nam.jsonl (idx=52)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.60it/s]
Processing files:   7%|▋         | 53/762 [01:15<29:40,  2.51s/it]

[→] Embedding file: Cao trào kháng Nhật.jsonl (idx=53)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.43it/s]
Processing files:   7%|▋         | 54/762 [01:15<21:52,  1.85s/it]

[→] Embedding file: Chiến dịch Bắc Kỳ.jsonl (idx=54)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.60it/s]
Processing files:   7%|▋         | 55/762 [01:15<16:22,  1.39s/it]

[→] Embedding file: Chiến dịch Xuân Hè 1972.jsonl (idx=55)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.30it/s]
Processing files:   7%|▋         | 56/762 [01:16<14:08,  1.20s/it]

[→] Embedding file: Chiến dịch Điện Biên Phủ.jsonl (idx=56)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.60it/s]
Processing files:   7%|▋         | 57/762 [01:17<11:47,  1.00s/it]

[→] Embedding file: Chiến thuật Chiến tranh Việt Nam.jsonl (idx=57)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.06it/s]
Processing files:   8%|▊         | 58/762 [01:17<08:42,  1.35it/s]

[→] Embedding file: Chiến tranh Việt Nam.jsonl (idx=58)


Embedding batches: 100%|██████████| 132/132 [00:25<00:00,  5.11it/s]
Processing files:   8%|▊         | 59/762 [01:43<1:36:57,  8.28s/it]

[→] Embedding file: Chiến tranh Việt Nam–Trung Quốc.jsonl (idx=59)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.41it/s]
Processing files:   8%|▊         | 60/762 [01:43<1:08:55,  5.89s/it]

[→] Embedding file: Chiến tranh Việt–Xiêm.jsonl (idx=60)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.34it/s]
Processing files:   8%|▊         | 61/762 [01:44<50:10,  4.29s/it]  

[→] Embedding file: Chiến tranh liên quan tới Việt Nam.jsonl (idx=61)


Embedding batches: 100%|██████████| 57/57 [00:10<00:00,  5.21it/s]
Processing files:   8%|▊         | 62/762 [01:55<1:13:24,  6.29s/it]

[→] Embedding file: Chiến tranh trong lịch sử Việt Nam.jsonl (idx=62)


Embedding batches: 100%|██████████| 20/20 [00:03<00:00,  5.27it/s]
Processing files:   8%|▊         | 63/762 [01:58<1:04:38,  5.55s/it]

[→] Embedding file: Chiến tranh Đông Dương.jsonl (idx=63)


Embedding batches: 100%|██████████| 15/15 [00:02<00:00,  5.19it/s]
Processing files:   8%|▊         | 64/762 [02:01<55:17,  4.75s/it]  

[→] Embedding file: Cháy công trình xây dựng ở Việt Nam.jsonl (idx=64)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.40it/s]
Processing files:   9%|▊         | 65/762 [02:03<43:11,  3.72s/it]

[→] Embedding file: Chính phủ Quốc gia Việt Nam.jsonl (idx=65)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  8.96it/s]
Processing files:   9%|▊         | 66/762 [02:03<31:00,  2.67s/it]

[→] Embedding file: Chính phủ Việt Nam qua các thời kỳ.jsonl (idx=66)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  9.54it/s]
Processing files:   9%|▉         | 67/762 [02:03<22:03,  1.90s/it]

[→] Embedding file: Chính phủ Việt Nam.jsonl (idx=67)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.51it/s]
Processing files:   9%|▉         | 68/762 [02:04<18:35,  1.61s/it]

[→] Embedding file: Chính sách Việt Nam.jsonl (idx=68)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.93it/s]
Processing files:   9%|▉         | 69/762 [02:04<15:21,  1.33s/it]

[→] Embedding file: Chính trị Việt Nam Cộng hòa.jsonl (idx=69)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.87it/s]
Processing files:   9%|▉         | 70/762 [02:05<11:56,  1.04s/it]

[→] Embedding file: Chính trị Việt Nam.jsonl (idx=70)


Embedding batches: 100%|██████████| 29/29 [00:05<00:00,  5.13it/s]
Processing files:   9%|▉         | 71/762 [02:10<27:55,  2.43s/it]

[→] Embedding file: Chất độc da cam.jsonl (idx=71)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.54it/s]
Processing files:   9%|▉         | 72/762 [02:11<20:47,  1.81s/it]

[→] Embedding file: Chết ở Việt Nam.jsonl (idx=72)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.98it/s]
Processing files:  10%|▉         | 73/762 [02:11<15:42,  1.37s/it]

[→] Embedding file: Chủ tịch nước Việt Nam.jsonl (idx=73)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.33it/s]
Processing files:  10%|▉         | 74/762 [02:13<17:28,  1.52s/it]

[→] Embedding file: Cung điện tại Việt Nam.jsonl (idx=74)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.58it/s]
Processing files:  10%|▉         | 75/762 [02:14<15:57,  1.39s/it]

[→] Embedding file: Cuộc nổi dậy thời Nguyễn.jsonl (idx=75)


Embedding batches: 100%|██████████| 12/12 [00:02<00:00,  5.27it/s]
Processing files:  10%|▉         | 76/762 [02:16<18:59,  1.66s/it]

[→] Embedding file: Các dân tộc Việt Nam.jsonl (idx=76)


Embedding batches: 100%|██████████| 49/49 [00:09<00:00,  5.26it/s]
Processing files:  10%|█         | 77/762 [02:26<45:16,  3.97s/it]

[→] Embedding file: Các đơn vị Quân đội Hoa Kỳ trong Chiến tranh Việt Nam.jsonl (idx=77)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.69it/s]
Processing files:  10%|█         | 78/762 [02:26<32:10,  2.82s/it]

[→] Embedding file: Cách mạng tháng Tám.jsonl (idx=78)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.38it/s]
Processing files:  10%|█         | 79/762 [02:26<23:48,  2.09s/it]

[→] Embedding file: Cáp treo tại Việt Nam.jsonl (idx=79)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.13it/s]
Processing files:  10%|█         | 80/762 [02:27<17:48,  1.57s/it]

[→] Embedding file: Công an nhân dân Việt Nam.jsonl (idx=80)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.57it/s]
Processing files:  11%|█         | 81/762 [02:28<17:58,  1.58s/it]

[→] Embedding file: Công chúa Việt Nam.jsonl (idx=81)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.93it/s]
Processing files:  11%|█         | 82/762 [02:29<14:52,  1.31s/it]

[→] Embedding file: Công chúa nhà Lê trung hưng.jsonl (idx=82)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.15it/s]
Processing files:  11%|█         | 83/762 [02:29<10:54,  1.04it/s]

[→] Embedding file: Công chức Việt Nam.jsonl (idx=83)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.82it/s]
Processing files:  11%|█         | 84/762 [02:30<08:50,  1.28it/s]

[→] Embedding file: Công dân Việt Nam.jsonl (idx=84)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.46it/s]
Processing files:  11%|█         | 85/762 [02:30<07:27,  1.51it/s]

[→] Embedding file: Công trình kiến trúc đã bị phá dỡ tại Việt Nam.jsonl (idx=85)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.46it/s]
Processing files:  11%|█▏        | 86/762 [02:30<05:46,  1.95it/s]

[→] Embedding file: Công trình xây dựng Việt Nam.jsonl (idx=86)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.19it/s]
Processing files:  11%|█▏        | 87/762 [02:30<04:27,  2.52it/s]

[→] Embedding file: Công ty Việt Nam.jsonl (idx=87)


Embedding batches: 100%|██████████| 35/35 [00:06<00:00,  5.60it/s]
Processing files:  12%|█▏        | 88/762 [02:36<24:14,  2.16s/it]

[→] Embedding file: Công tước Việt Nam.jsonl (idx=88)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.39it/s]
Processing files:  12%|█▏        | 89/762 [02:37<18:13,  1.62s/it]

[→] Embedding file: Công tử Bạc Liêu.jsonl (idx=89)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.15it/s]
Processing files:  12%|█▏        | 90/762 [02:37<14:04,  1.26s/it]

[→] Embedding file: Công viên Việt Nam.jsonl (idx=90)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.27it/s]
Processing files:  12%|█▏        | 91/762 [02:38<10:56,  1.02it/s]

[→] Embedding file: Công viên tại Hà Nội.jsonl (idx=91)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.48it/s]
Processing files:  12%|█▏        | 92/762 [02:38<08:11,  1.36it/s]

[→] Embedding file: Công viên tại Thành phố Hồ Chí Minh.jsonl (idx=92)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.59it/s]
Processing files:  12%|█▏        | 93/762 [02:39<09:21,  1.19it/s]

[→] Embedding file: Công viên vui chơi tại Việt Nam.jsonl (idx=93)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.40it/s]
Processing files:  12%|█▏        | 94/762 [02:39<07:01,  1.59it/s]

[→] Embedding file: Cơ quan của Đảng Cộng sản Việt Nam.jsonl (idx=94)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.78it/s]
Processing files:  12%|█▏        | 95/762 [02:39<06:05,  1.82it/s]

[→] Embedding file: Cơ quan điều tra Công an nhân dân Việt Nam.jsonl (idx=95)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.88it/s]
Processing files:  13%|█▎        | 96/762 [02:39<04:46,  2.33it/s]

[→] Embedding file: Cơ sở giáo dục Tôn giáo tại Việt Nam.jsonl (idx=96)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.80it/s]
Processing files:  13%|█▎        | 97/762 [02:40<04:30,  2.46it/s]

[→] Embedding file: Cảng Việt Nam.jsonl (idx=97)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.31it/s]
Processing files:  13%|█▎        | 98/762 [02:42<08:49,  1.25it/s]

[→] Embedding file: Cảnh sát biển Việt Nam.jsonl (idx=98)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.69it/s]
Processing files:  13%|█▎        | 99/762 [02:42<06:34,  1.68it/s]

[→] Embedding file: Cố đô Việt Nam.jsonl (idx=99)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.31it/s]
Processing files:  13%|█▎        | 100/762 [02:42<07:08,  1.55it/s]

[→] Embedding file: Cộng sản ở Việt Nam.jsonl (idx=100)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.13it/s]
Processing files:  13%|█▎        | 101/762 [02:43<07:37,  1.45it/s]

[→] Embedding file: Cựu học sinh Trường Học sinh miền Nam.jsonl (idx=101)


Embedding batches: 100%|██████████| 13/13 [00:02<00:00,  5.35it/s]
Processing files:  13%|█▎        | 102/762 [02:46<13:23,  1.22s/it]

[→] Embedding file: Cựu quốc gia trong lịch sử Việt Nam.jsonl (idx=102)


Embedding batches: 100%|██████████| 18/18 [00:03<00:00,  5.12it/s]
Processing files:  14%|█▎        | 103/762 [02:49<20:59,  1.91s/it]

[→] Embedding file: Cựu đảng viên Đảng Cộng sản Việt Nam.jsonl (idx=103)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.90it/s]
Processing files:  14%|█▎        | 104/762 [02:49<15:10,  1.38s/it]

[→] Embedding file: Danh hiệu Việt Nam.jsonl (idx=104)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.19it/s]
Processing files:  14%|█▍        | 105/762 [02:51<16:57,  1.55s/it]

[→] Embedding file: Danh lam thắng cảnh tại Huế.jsonl (idx=105)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.23it/s]
Processing files:  14%|█▍        | 106/762 [02:52<13:08,  1.20s/it]

[→] Embedding file: Danh sách (Việt Nam).jsonl (idx=106)


Embedding batches: 100%|██████████| 44/44 [00:07<00:00,  6.10it/s]
Processing files:  14%|█▍        | 107/762 [02:59<32:52,  3.01s/it]

[→] Embedding file: Danh sách công trình xây dựng và kiến trúc tại Việt Nam.jsonl (idx=107)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.85it/s]
Processing files:  14%|█▍        | 108/762 [02:59<23:58,  2.20s/it]

[→] Embedding file: Danh sách hành chính Việt Nam.jsonl (idx=108)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  6.85it/s]
Processing files:  14%|█▍        | 109/762 [03:01<21:05,  1.94s/it]

[→] Embedding file: Danh sách nhân vật Việt Nam.jsonl (idx=109)


Embedding batches: 100%|██████████| 46/46 [00:08<00:00,  5.39it/s]
Processing files:  14%|█▍        | 110/762 [03:09<42:41,  3.93s/it]

[→] Embedding file: Danh sách phân cấp hành chính Việt Nam.jsonl (idx=110)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.32it/s]
Processing files:  15%|█▍        | 111/762 [03:10<32:18,  2.98s/it]

[→] Embedding file: Danh sách thể thao Việt Nam.jsonl (idx=111)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.57it/s]
Processing files:  15%|█▍        | 112/762 [03:10<23:35,  2.18s/it]

[→] Embedding file: Danh sách trường học tại Việt Nam.jsonl (idx=112)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.16it/s]
Processing files:  15%|█▍        | 113/762 [03:11<20:18,  1.88s/it]

[→] Embedding file: Danh sách tập chương trình truyền hình Việt Nam.jsonl (idx=113)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.76it/s]
Processing files:  15%|█▍        | 114/762 [03:12<15:11,  1.41s/it]

[→] Embedding file: Danh sách âm nhạc Việt Nam.jsonl (idx=114)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.16it/s]
Processing files:  15%|█▌        | 115/762 [03:13<13:47,  1.28s/it]

[→] Embedding file: Danh sách địa lý Việt Nam.jsonl (idx=115)


Embedding batches: 100%|██████████| 5/5 [00:01<00:00,  5.00it/s]
Processing files:  15%|█▌        | 116/762 [03:14<12:54,  1.20s/it]

[→] Embedding file: Di cư Việt Nam.jsonl (idx=116)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.44it/s]
Processing files:  15%|█▌        | 117/762 [03:15<12:02,  1.12s/it]

[→] Embedding file: Di sản Quân chủ Việt Nam.jsonl (idx=117)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.82it/s]
Processing files:  15%|█▌        | 118/762 [03:15<10:07,  1.06it/s]

[→] Embedding file: Di sản triều đại nhà Nguyễn.jsonl (idx=118)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.41it/s]
Processing files:  16%|█▌        | 119/762 [03:15<08:06,  1.32it/s]

[→] Embedding file: Di sản văn hóa Việt Nam.jsonl (idx=119)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.27it/s]
Processing files:  16%|█▌        | 120/762 [03:16<07:32,  1.42it/s]

[→] Embedding file: Di sản văn hóa phi vật thể tại Việt Nam.jsonl (idx=120)


Embedding batches: 100%|██████████| 15/15 [00:02<00:00,  5.59it/s]
Processing files:  16%|█▌        | 121/762 [03:19<13:54,  1.30s/it]

[→] Embedding file: Di tích Cố đô Huế.jsonl (idx=121)


Embedding batches: 100%|██████████| 33/33 [00:06<00:00,  5.30it/s]
Processing files:  16%|█▌        | 122/762 [03:25<29:43,  2.79s/it]

[→] Embedding file: Di tích Việt Nam.jsonl (idx=122)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  7.97it/s]
Processing files:  16%|█▌        | 123/762 [03:25<21:35,  2.03s/it]

[→] Embedding file: Di tích khảo cổ Việt Nam.jsonl (idx=123)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.80it/s]
Processing files:  16%|█▋        | 124/762 [03:26<18:24,  1.73s/it]

[→] Embedding file: Di tích lịch sử Việt Nam.jsonl (idx=124)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.61it/s]
Processing files:  16%|█▋        | 125/762 [03:27<14:35,  1.37s/it]

[→] Embedding file: Di tích nhà Lý.jsonl (idx=125)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  4.97it/s]
Processing files:  17%|█▋        | 126/762 [03:27<10:51,  1.02s/it]

[→] Embedding file: Di tích quốc gia đặc biệt.jsonl (idx=126)


Embedding batches: 100%|██████████| 96/96 [00:18<00:00,  5.27it/s]
Processing files:  17%|█▋        | 127/762 [03:45<1:05:30,  6.19s/it]

[→] Embedding file: Doanh nghiệp nhà nước Việt Nam.jsonl (idx=127)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.43it/s]
Processing files:  17%|█▋        | 128/762 [03:47<49:55,  4.72s/it]  

[→] Embedding file: Doanh nhân Việt Nam.jsonl (idx=128)


Embedding batches: 100%|██████████| 52/52 [00:09<00:00,  5.39it/s]
Processing files:  17%|█▋        | 129/762 [03:56<1:05:28,  6.21s/it]

[→] Embedding file: Du lịch Hà Nội.jsonl (idx=129)


Embedding batches: 100%|██████████| 11/11 [00:02<00:00,  5.36it/s]
Processing files:  17%|█▋        | 130/762 [03:58<52:16,  4.96s/it]  

[→] Embedding file: Du lịch Việt Nam theo địa phương.jsonl (idx=130)


Embedding batches: 100%|██████████| 11/11 [00:02<00:00,  5.21it/s]
Processing files:  17%|█▋        | 131/762 [04:00<43:14,  4.11s/it]

[→] Embedding file: Du lịch Việt Nam.jsonl (idx=131)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.30it/s]
Processing files:  17%|█▋        | 132/762 [04:02<35:37,  3.39s/it]

[→] Embedding file: Du lịch sinh thái Việt Nam.jsonl (idx=132)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.74it/s]
Processing files:  17%|█▋        | 133/762 [04:03<28:13,  2.69s/it]

[→] Embedding file: Dòng thời gian của đại dịch COVID-19 tại Việt Nam.jsonl (idx=133)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.19it/s]
Processing files:  18%|█▊        | 134/762 [04:05<24:00,  2.29s/it]

[→] Embedding file: Dầu khí Việt Nam.jsonl (idx=134)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  4.99it/s]
Processing files:  18%|█▊        | 135/762 [04:05<17:25,  1.67s/it]

[→] Embedding file: Dịch bệnh tại Việt Nam.jsonl (idx=135)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.18it/s]
Processing files:  18%|█▊        | 136/762 [04:05<12:47,  1.23s/it]

[→] Embedding file: FULRO.jsonl (idx=136)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.40it/s]
Processing files:  18%|█▊        | 137/762 [04:06<10:42,  1.03s/it]

[→] Embedding file: Gia tộc Việt Nam.jsonl (idx=137)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  4.98it/s]
Processing files:  18%|█▊        | 138/762 [04:06<09:23,  1.11it/s]

[→] Embedding file: Gia Định.jsonl (idx=138)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  7.03it/s]
Processing files:  18%|█▊        | 139/762 [04:06<07:28,  1.39it/s]

[→] Embedding file: Giao thông Việt Nam.jsonl (idx=139)


Embedding batches: 100%|██████████| 14/14 [00:02<00:00,  5.33it/s]
Processing files:  18%|█▊        | 140/762 [04:09<13:26,  1.30s/it]

[→] Embedding file: Giáo dục Việt Nam Cộng hòa.jsonl (idx=140)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.16it/s]
Processing files:  19%|█▊        | 141/762 [04:10<10:57,  1.06s/it]

[→] Embedding file: Giáo dục Việt Nam.jsonl (idx=141)


Embedding batches: 100%|██████████| 41/41 [00:07<00:00,  5.18it/s]
Processing files:  19%|█▊        | 142/762 [04:18<32:15,  3.12s/it]

[→] Embedding file: Giáo dục đại học Việt Nam.jsonl (idx=142)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.16it/s]
Processing files:  19%|█▉        | 143/762 [04:18<24:23,  2.36s/it]

[→] Embedding file: Giáo hội Phật giáo Việt Nam Thống nhất.jsonl (idx=143)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.46it/s]
Processing files:  19%|█▉        | 144/762 [04:19<20:28,  1.99s/it]

[→] Embedding file: Giải thưởng Việt Nam.jsonl (idx=144)


Embedding batches: 100%|██████████| 37/37 [00:06<00:00,  5.41it/s]
Processing files:  19%|█▉        | 145/762 [04:26<35:27,  3.45s/it]

[→] Embedding file: Giải trí tại Việt Nam.jsonl (idx=145)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.45it/s]
Processing files:  19%|█▉        | 146/762 [04:26<25:55,  2.53s/it]

[→] Embedding file: Giờ ở Việt Nam.jsonl (idx=146)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.29it/s]
Processing files:  19%|█▉        | 147/762 [04:27<18:33,  1.81s/it]

[→] Embedding file: HIV_AIDS tại Việt Nam.jsonl (idx=147)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.39it/s]
Processing files:  19%|█▉        | 148/762 [04:27<13:28,  1.32s/it]

[→] Embedding file: Hai Bà Trưng (quận).jsonl (idx=148)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.41it/s]
Processing files:  20%|█▉        | 149/762 [04:27<09:59,  1.02it/s]

[→] Embedding file: Hai Bà Trưng.jsonl (idx=149)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.25it/s]
Processing files:  20%|█▉        | 150/762 [04:28<09:56,  1.03it/s]

[→] Embedding file: Hang động Việt Nam.jsonl (idx=150)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.42it/s]
Processing files:  20%|█▉        | 151/762 [04:29<09:48,  1.04it/s]

[→] Embedding file: Hiến pháp Việt Nam.jsonl (idx=151)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.10it/s]
Processing files:  20%|█▉        | 152/762 [04:29<07:28,  1.36it/s]

[→] Embedding file: Hiệp ước của Việt Nam Cộng hòa.jsonl (idx=152)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.67it/s]
Processing files:  20%|██        | 153/762 [04:29<05:41,  1.78it/s]

[→] Embedding file: Hoa Kỳ trong Chiến tranh Việt Nam.jsonl (idx=153)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.47it/s]
Processing files:  20%|██        | 154/762 [04:30<06:15,  1.62it/s]

[→] Embedding file: Hoa khôi Áo dài Việt Nam.jsonl (idx=154)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.98it/s]
Processing files:  20%|██        | 155/762 [04:30<05:26,  1.86it/s]

[→] Embedding file: Hoàng hậu Việt Nam.jsonl (idx=155)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.44it/s]
Processing files:  20%|██        | 156/762 [04:31<06:03,  1.67it/s]

[→] Embedding file: Hoàng nữ Việt Nam.jsonl (idx=156)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.80it/s]
Processing files:  21%|██        | 157/762 [04:32<05:49,  1.73it/s]

[→] Embedding file: Hoàng thái hậu Việt Nam.jsonl (idx=157)


Embedding batches: 100%|██████████| 14/14 [00:02<00:00,  5.25it/s]
Processing files:  21%|██        | 158/762 [04:34<12:09,  1.21s/it]

[→] Embedding file: Hoàng tộc Việt Nam.jsonl (idx=158)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.57it/s]
Processing files:  21%|██        | 159/762 [04:35<10:10,  1.01s/it]

[→] Embedding file: Hoàng tộc nhà Hậu Lê.jsonl (idx=159)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  5.30it/s]
Processing files:  21%|██        | 160/762 [04:36<11:40,  1.16s/it]

[→] Embedding file: Hoàng tộc nhà Lý.jsonl (idx=160)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.63it/s]
Processing files:  21%|██        | 161/762 [04:37<10:50,  1.08s/it]

[→] Embedding file: Hoàng tộc nhà Nguyễn.jsonl (idx=161)


Embedding batches: 100%|██████████| 21/21 [00:04<00:00,  5.21it/s]
Processing files:  21%|██▏       | 162/762 [04:41<19:42,  1.97s/it]

[→] Embedding file: Hoàng tộc nhà Tiền Lê.jsonl (idx=162)


Embedding batches: 100%|██████████| 6/6 [00:00<00:00,  6.66it/s]
Processing files:  21%|██▏       | 163/762 [04:42<16:30,  1.65s/it]

[→] Embedding file: Hoàng tộc nhà Triệu.jsonl (idx=163)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.61it/s]
Processing files:  22%|██▏       | 164/762 [04:42<12:05,  1.21s/it]

[→] Embedding file: Hoàng tộc nhà Trần.jsonl (idx=164)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.75it/s]
Processing files:  22%|██▏       | 165/762 [04:44<13:09,  1.32s/it]

[→] Embedding file: Hoàng tộc nhà Tây Sơn.jsonl (idx=165)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  4.84it/s]
Processing files:  22%|██▏       | 166/762 [04:44<10:27,  1.05s/it]

[→] Embedding file: Hoàng đế truy tôn Việt Nam.jsonl (idx=166)


Embedding batches: 100%|██████████| 18/18 [00:03<00:00,  5.11it/s]
Processing files:  22%|██▏       | 167/762 [04:48<17:51,  1.80s/it]

[→] Embedding file: Hoạt động của Cảnh sát biển Việt Nam.jsonl (idx=167)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.63it/s]
Processing files:  22%|██▏       | 168/762 [04:49<16:13,  1.64s/it]

[→] Embedding file: Hoạt động quân sự liên quan tới Việt Nam.jsonl (idx=168)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.70it/s]
Processing files:  22%|██▏       | 169/762 [04:49<11:48,  1.19s/it]

[→] Embedding file: Huy hiệu 30 năm tuổi Đảng Cộng sản Việt Nam.jsonl (idx=169)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.96it/s]
Processing files:  22%|██▏       | 170/762 [04:50<11:16,  1.14s/it]

[→] Embedding file: Huy hiệu 35 năm tuổi Đảng Cộng sản Việt Nam.jsonl (idx=170)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.68it/s]
Processing files:  22%|██▏       | 171/762 [04:50<08:14,  1.20it/s]

[→] Embedding file: Huy hiệu 40 năm tuổi Đảng Cộng sản Việt Nam.jsonl (idx=171)


Embedding batches: 100%|██████████| 13/13 [00:02<00:00,  5.95it/s]
Processing files:  23%|██▎       | 172/762 [04:53<12:15,  1.25s/it]

[→] Embedding file: Huy hiệu 45 năm tuổi Đảng Cộng sản Việt Nam.jsonl (idx=172)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.61it/s]
Processing files:  23%|██▎       | 173/762 [04:53<09:39,  1.02it/s]

[→] Embedding file: Huy hiệu 50 năm tuổi Đảng Cộng sản Việt Nam.jsonl (idx=173)


Embedding batches: 100%|██████████| 19/19 [00:03<00:00,  5.35it/s]
Processing files:  23%|██▎       | 174/762 [04:57<17:13,  1.76s/it]

[→] Embedding file: Huy hiệu 55 năm tuổi Đảng Cộng sản Việt Nam.jsonl (idx=174)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.25it/s]
Processing files:  23%|██▎       | 175/762 [04:57<13:29,  1.38s/it]

[→] Embedding file: Huy hiệu 60 năm tuổi Đảng Cộng sản Việt Nam.jsonl (idx=175)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.29it/s]
Processing files:  23%|██▎       | 176/762 [04:59<15:00,  1.54s/it]

[→] Embedding file: Huy hiệu 65 năm tuổi Đảng Cộng sản Việt Nam.jsonl (idx=176)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.01it/s]
Processing files:  23%|██▎       | 177/762 [05:00<11:59,  1.23s/it]

[→] Embedding file: Huy hiệu 70 năm tuổi Đảng Cộng sản Việt Nam.jsonl (idx=177)


Embedding batches: 100%|██████████| 13/13 [00:02<00:00,  5.40it/s]
Processing files:  23%|██▎       | 178/762 [05:02<15:27,  1.59s/it]

[→] Embedding file: Huy hiệu 75 năm tuổi Đảng Cộng sản Việt Nam.jsonl (idx=178)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.37it/s]
Processing files:  23%|██▎       | 179/762 [05:02<12:11,  1.25s/it]

[→] Embedding file: Huy hiệu 80 năm tuổi Đảng Cộng sản Việt Nam.jsonl (idx=179)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.35it/s]
Processing files:  24%|██▎       | 180/762 [05:03<10:43,  1.11s/it]

[→] Embedding file: Hà Nội.jsonl (idx=180)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.69it/s]
Processing files:  24%|██▍       | 181/762 [05:04<10:04,  1.04s/it]

[→] Embedding file: Hành chính Việt Nam Cộng hòa.jsonl (idx=181)


Embedding batches: 100%|██████████| 26/26 [00:05<00:00,  5.18it/s]
Processing files:  24%|██▍       | 182/762 [05:09<21:38,  2.24s/it]

[→] Embedding file: Hành chính Việt Nam.jsonl (idx=182)


Embedding batches: 100%|██████████| 24/24 [00:04<00:00,  5.09it/s]
Processing files:  24%|██▍       | 183/762 [05:14<28:49,  2.99s/it]

[→] Embedding file: Hãng hàng không Việt Nam.jsonl (idx=183)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.75it/s]
Processing files:  24%|██▍       | 184/762 [05:14<21:11,  2.20s/it]

[→] Embedding file: Hùng Vương.jsonl (idx=184)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.81it/s]
Processing files:  24%|██▍       | 185/762 [05:15<15:50,  1.65s/it]

[→] Embedding file: Hạt giống đỏ.jsonl (idx=185)


Embedding batches: 100%|██████████| 12/12 [00:02<00:00,  5.11it/s]
Processing files:  24%|██▍       | 186/762 [05:17<17:51,  1.86s/it]

[→] Embedding file: Hải chiến Hoàng Sa 1974.jsonl (idx=186)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.17it/s]
Processing files:  25%|██▍       | 187/762 [05:19<17:31,  1.83s/it]

[→] Embedding file: Hầu tước Việt Nam.jsonl (idx=187)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  4.46it/s]
Processing files:  25%|██▍       | 188/762 [05:19<12:56,  1.35s/it]

[→] Embedding file: Hậu Chiến tranh Việt Nam.jsonl (idx=188)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.14it/s]
Processing files:  25%|██▍       | 189/762 [05:20<12:25,  1.30s/it]

[→] Embedding file: Hệ thống giáo dục Việt Nam.jsonl (idx=189)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.40it/s]
Processing files:  25%|██▍       | 190/762 [05:20<09:08,  1.04it/s]

[→] Embedding file: Hệ thống thủy đạo Kinh thành Huế.jsonl (idx=190)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.28it/s]
Processing files:  25%|██▌       | 191/762 [05:21<08:02,  1.18it/s]

[→] Embedding file: Hệ đo lường cổ của Việt Nam.jsonl (idx=191)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.92it/s]
Processing files:  25%|██▌       | 192/762 [05:21<06:52,  1.38it/s]

[→] Embedding file: Học viện Chính trị Quốc gia Hồ Chí Minh.jsonl (idx=192)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.93it/s]
Processing files:  25%|██▌       | 193/762 [05:21<05:17,  1.79it/s]

[→] Embedding file: Hồ Chí Minh.jsonl (idx=193)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  4.90it/s]
Processing files:  25%|██▌       | 194/762 [05:23<07:48,  1.21it/s]

[→] Embedding file: Hồ Việt Nam.jsonl (idx=194)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  9.63it/s]
Processing files:  26%|██▌       | 195/762 [05:23<05:46,  1.64it/s]

[→] Embedding file: Hồng Bàng.jsonl (idx=195)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  4.75it/s]
Processing files:  26%|██▌       | 196/762 [05:23<05:14,  1.80it/s]

[→] Embedding file: Hội nghị ngoại giao ở Việt Nam.jsonl (idx=196)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.13it/s]
Processing files:  26%|██▌       | 197/762 [05:24<04:01,  2.34it/s]

[→] Embedding file: Hội đoàn Việt Nam.jsonl (idx=197)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.00it/s]
Processing files:  26%|██▌       | 198/762 [05:24<03:23,  2.77it/s]

[→] Embedding file: Hội đồng quốc phòng và an ninh Việt Nam.jsonl (idx=198)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.69it/s]
Processing files:  26%|██▌       | 199/762 [05:24<03:15,  2.88it/s]

[→] Embedding file: Khoa học Việt Nam theo thời kỳ.jsonl (idx=199)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.07it/s]
Processing files:  26%|██▌       | 200/762 [05:24<02:45,  3.39it/s]

[→] Embedding file: Khoa học và công nghệ Việt Nam.jsonl (idx=200)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.99it/s]
Processing files:  26%|██▋       | 201/762 [05:25<04:17,  2.18it/s]

[→] Embedding file: Khu Ramsar Việt Nam.jsonl (idx=201)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.99it/s]
Processing files:  27%|██▋       | 202/762 [05:25<03:28,  2.69it/s]

[→] Embedding file: Khu bảo tồn Việt Nam.jsonl (idx=202)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  7.46it/s]
Processing files:  27%|██▋       | 203/762 [05:26<03:34,  2.61it/s]

[→] Embedding file: Khu bảo tồn thiên nhiên Việt Nam.jsonl (idx=203)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  5.88it/s]
Processing files:  27%|██▋       | 204/762 [05:27<06:19,  1.47it/s]

[→] Embedding file: Khu du lịch Việt Nam.jsonl (idx=204)


Embedding batches: 100%|██████████| 14/14 [00:02<00:00,  5.24it/s]
Processing files:  27%|██▋       | 205/762 [05:30<11:53,  1.28s/it]

[→] Embedding file: Khu dự trữ sinh quyển Việt Nam.jsonl (idx=205)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.06it/s]
Processing files:  27%|██▋       | 206/762 [05:30<09:43,  1.05s/it]

[→] Embedding file: Khu tự trị Thái.jsonl (idx=206)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.71it/s]
Processing files:  27%|██▋       | 207/762 [05:30<07:17,  1.27it/s]

[→] Embedding file: Khu vực có tranh chấp chủ quyền của Việt Nam.jsonl (idx=207)


Embedding batches: 100%|██████████| 16/16 [00:03<00:00,  5.23it/s]
Processing files:  27%|██▋       | 208/762 [05:33<13:36,  1.47s/it]

[→] Embedding file: Khu đô thị Việt Nam.jsonl (idx=208)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.35it/s]
Processing files:  27%|██▋       | 209/762 [05:34<09:54,  1.08s/it]

[→] Embedding file: Khách sạn tại Việt Nam.jsonl (idx=209)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.50it/s]
Processing files:  28%|██▊       | 210/762 [05:34<07:58,  1.15it/s]

[→] Embedding file: Khí hậu Việt Nam.jsonl (idx=210)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.89it/s]
Processing files:  28%|██▊       | 211/762 [05:34<06:31,  1.41it/s]

[→] Embedding file: Khảo cổ Việt Nam.jsonl (idx=211)


Embedding batches: 100%|██████████| 15/15 [00:02<00:00,  5.89it/s]
Processing files:  28%|██▊       | 212/762 [05:37<11:36,  1.27s/it]

[→] Embedding file: Khởi nghĩa Ba Tơ.jsonl (idx=212)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.85it/s]
Processing files:  28%|██▊       | 213/762 [05:37<09:04,  1.01it/s]

[→] Embedding file: Khởi nghĩa Ba Đình.jsonl (idx=213)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.19it/s]
Processing files:  28%|██▊       | 214/762 [05:37<06:53,  1.32it/s]

[→] Embedding file: Khởi nghĩa Hùng Lĩnh.jsonl (idx=214)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  7.11it/s]
Processing files:  28%|██▊       | 215/762 [05:38<05:36,  1.62it/s]

[→] Embedding file: Khởi nghĩa Lam Sơn.jsonl (idx=215)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.93it/s]
Processing files:  28%|██▊       | 216/762 [05:38<05:21,  1.70it/s]

[→] Embedding file: Khởi nghĩa Thanh Sơn.jsonl (idx=216)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.76it/s]
Processing files:  28%|██▊       | 217/762 [05:39<04:35,  1.98it/s]

[→] Embedding file: Khởi nghĩa Việt Nam thời Bắc thuộc lần 1.jsonl (idx=217)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.82it/s]
Processing files:  29%|██▊       | 218/762 [05:39<05:05,  1.78it/s]

[→] Embedding file: Khởi nghĩa Việt Nam thời Bắc thuộc lần 2.jsonl (idx=218)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.15it/s]
Processing files:  29%|██▊       | 219/762 [05:39<03:57,  2.28it/s]

[→] Embedding file: Khởi nghĩa Việt Nam thời Bắc thuộc lần 4.jsonl (idx=219)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.96it/s]
Processing files:  29%|██▉       | 220/762 [05:40<04:09,  2.17it/s]

[→] Embedding file: Khởi nghĩa chống Pháp (1930–1945).jsonl (idx=220)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.49it/s]
Processing files:  29%|██▉       | 221/762 [05:40<03:14,  2.77it/s]

[→] Embedding file: Khởi nghĩa chống Pháp.jsonl (idx=221)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.49it/s]
Processing files:  29%|██▉       | 222/762 [05:40<03:08,  2.87it/s]

[→] Embedding file: Khởi nghĩa nông dân thời Lê trung hưng.jsonl (idx=222)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.26it/s]
Processing files:  29%|██▉       | 223/762 [05:41<02:38,  3.40it/s]

[→] Embedding file: Kinh doanh ở Việt Nam.jsonl (idx=223)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  8.46it/s]
Processing files:  29%|██▉       | 224/762 [05:41<02:31,  3.56it/s]

[→] Embedding file: Kinh thành Huế.jsonl (idx=224)


Embedding batches: 100%|██████████| 14/14 [00:02<00:00,  5.22it/s]
Processing files:  30%|██▉       | 225/762 [05:44<09:00,  1.01s/it]

[→] Embedding file: Kinh tế Hà Nội.jsonl (idx=225)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.39it/s]
Processing files:  30%|██▉       | 226/762 [05:44<08:18,  1.07it/s]

[→] Embedding file: Kinh tế Thành phố Hồ Chí Minh.jsonl (idx=226)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.18it/s]
Processing files:  30%|██▉       | 227/762 [05:45<08:56,  1.00s/it]

[→] Embedding file: Kinh tế Việt Nam.jsonl (idx=227)


Embedding batches: 100%|██████████| 33/33 [00:06<00:00,  5.26it/s]
Processing files:  30%|██▉       | 228/762 [05:52<23:03,  2.59s/it]

[→] Embedding file: Kiệt tác truyền khẩu và phi vật thể nhân loại tại Việt Nam.jsonl (idx=228)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.85it/s]
Processing files:  30%|███       | 229/762 [05:52<17:03,  1.92s/it]

[→] Embedding file: Kênh đào Việt Nam.jsonl (idx=229)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.11it/s]
Processing files:  30%|███       | 230/762 [05:53<13:30,  1.52s/it]

[→] Embedding file: Kế hoạch hóa kinh tế Việt Nam.jsonl (idx=230)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  6.23it/s]
Processing files:  30%|███       | 231/762 [05:54<11:37,  1.31s/it]

[→] Embedding file: Kỉ luật Đảng Cộng sản Việt Nam.jsonl (idx=231)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.44it/s]
Processing files:  30%|███       | 232/762 [05:54<08:33,  1.03it/s]

[→] Embedding file: Kỷ lục Việt Nam.jsonl (idx=232)


Embedding batches: 100%|██████████| 23/23 [00:04<00:00,  5.31it/s]
Processing files:  31%|███       | 233/762 [05:58<17:28,  1.98s/it]

[→] Embedding file: LGBT Việt Nam.jsonl (idx=233)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.55it/s]
Processing files:  31%|███       | 234/762 [05:59<14:37,  1.66s/it]

[→] Embedding file: Lao động ở Việt Nam.jsonl (idx=234)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.32it/s]
Processing files:  31%|███       | 235/762 [06:00<11:44,  1.34s/it]

[→] Embedding file: Liên bang Đông Dương.jsonl (idx=235)


Embedding batches: 100%|██████████| 6/6 [00:00<00:00,  6.07it/s]
Processing files:  31%|███       | 236/762 [06:01<10:49,  1.24s/it]

[→] Embedding file: Luật Việt Nam Cộng hòa.jsonl (idx=236)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.20it/s]
Processing files:  31%|███       | 237/762 [06:01<08:51,  1.01s/it]

[→] Embedding file: Làng nghề Việt Nam.jsonl (idx=237)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 11.15it/s]


[→] Embedding file: Làng nghề truyền thống Việt Nam.jsonl (idx=238)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.77it/s]
Processing files:  31%|███▏      | 239/762 [06:02<07:05,  1.23it/s]

[→] Embedding file: Lâm nghiệp Việt Nam.jsonl (idx=239)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.40it/s]
Processing files:  31%|███▏      | 240/762 [06:03<06:21,  1.37it/s]

[→] Embedding file: Lũ lụt tại Việt Nam.jsonl (idx=240)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.71it/s]
Processing files:  32%|███▏      | 241/762 [06:03<04:57,  1.75it/s]

[→] Embedding file: Lạc Việt.jsonl (idx=241)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.19it/s]
Processing files:  32%|███▏      | 242/762 [06:03<04:23,  1.97it/s]

[→] Embedding file: Lễ hội Việt Nam.jsonl (idx=242)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.27it/s]
Processing files:  32%|███▏      | 243/762 [06:05<07:46,  1.11it/s]

[→] Embedding file: Lịch sử An Giang.jsonl (idx=243)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  5.24it/s]
Processing files:  32%|███▏      | 244/762 [06:07<09:20,  1.08s/it]

[→] Embedding file: Lịch sử Bà Rịa – Vũng Tàu.jsonl (idx=244)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.94it/s]
Processing files:  32%|███▏      | 245/762 [06:07<08:19,  1.03it/s]

[→] Embedding file: Lịch sử Bình Phước.jsonl (idx=245)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.45it/s]
Processing files:  32%|███▏      | 246/762 [06:08<07:05,  1.21it/s]

[→] Embedding file: Lịch sử Bình Định.jsonl (idx=246)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.32it/s]
Processing files:  32%|███▏      | 247/762 [06:08<06:28,  1.32it/s]

[→] Embedding file: Lịch sử Bạc Liêu.jsonl (idx=247)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.48it/s]
Processing files:  33%|███▎      | 248/762 [06:09<06:54,  1.24it/s]

[→] Embedding file: Lịch sử Bắc Ninh.jsonl (idx=248)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.73it/s]
Processing files:  33%|███▎      | 249/762 [06:10<06:39,  1.28it/s]

[→] Embedding file: Lịch sử Chăm Pa.jsonl (idx=249)


Embedding batches: 100%|██████████| 14/14 [00:02<00:00,  5.19it/s]
Processing files:  33%|███▎      | 250/762 [06:13<11:34,  1.36s/it]

[→] Embedding file: Lịch sử Cà Mau.jsonl (idx=250)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.55it/s]
Processing files:  33%|███▎      | 251/762 [06:13<09:56,  1.17s/it]

[→] Embedding file: Lịch sử Gia Lai.jsonl (idx=251)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.42it/s]
Processing files:  33%|███▎      | 252/762 [06:14<08:51,  1.04s/it]

[→] Embedding file: Lịch sử Hà Nam.jsonl (idx=252)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.54it/s]
Processing files:  33%|███▎      | 253/762 [06:14<06:36,  1.28it/s]

[→] Embedding file: Lịch sử Hà Nội.jsonl (idx=253)


Embedding batches: 100%|██████████| 18/18 [00:03<00:00,  5.23it/s]
Processing files:  33%|███▎      | 254/762 [06:18<13:23,  1.58s/it]

[→] Embedding file: Lịch sử Hòa Bình.jsonl (idx=254)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.15it/s]
Processing files:  33%|███▎      | 255/762 [06:18<10:22,  1.23s/it]

[→] Embedding file: Lịch sử Hưng Yên.jsonl (idx=255)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.10it/s]
Processing files:  34%|███▎      | 256/762 [06:18<08:06,  1.04it/s]

[→] Embedding file: Lịch sử Hải Phòng.jsonl (idx=256)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.23it/s]
Processing files:  34%|███▎      | 257/762 [06:20<08:35,  1.02s/it]

[→] Embedding file: Lịch sử Khánh Hòa.jsonl (idx=257)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.63it/s]
Processing files:  34%|███▍      | 258/762 [06:20<07:49,  1.07it/s]

[→] Embedding file: Lịch sử Kon Tum.jsonl (idx=258)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.04it/s]
Processing files:  34%|███▍      | 259/762 [06:21<07:59,  1.05it/s]

[→] Embedding file: Lịch sử Long An.jsonl (idx=259)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.43it/s]
Processing files:  34%|███▍      | 260/762 [06:22<07:27,  1.12it/s]

[→] Embedding file: Lịch sử Lâm Đồng.jsonl (idx=260)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.25it/s]
Processing files:  34%|███▍      | 261/762 [06:23<06:11,  1.35it/s]

[→] Embedding file: Lịch sử Nam Định.jsonl (idx=261)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.24it/s]
Processing files:  34%|███▍      | 262/762 [06:23<05:33,  1.50it/s]

[→] Embedding file: Lịch sử Phú Thọ.jsonl (idx=262)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.87it/s]
Processing files:  35%|███▍      | 263/762 [06:24<05:35,  1.49it/s]

[→] Embedding file: Lịch sử Quân đội nhân dân Việt Nam.jsonl (idx=263)


Embedding batches: 100%|██████████| 13/13 [00:02<00:00,  5.31it/s]
Processing files:  35%|███▍      | 264/762 [06:26<10:02,  1.21s/it]

[→] Embedding file: Lịch sử Quảng Nam.jsonl (idx=264)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.86it/s]
Processing files:  35%|███▍      | 265/762 [06:27<08:18,  1.00s/it]

[→] Embedding file: Lịch sử Quảng Ngãi.jsonl (idx=265)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.24it/s]
Processing files:  35%|███▍      | 266/762 [06:27<07:43,  1.07it/s]

[→] Embedding file: Lịch sử Quảng Ninh.jsonl (idx=266)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.83it/s]
Processing files:  35%|███▌      | 267/762 [06:28<07:07,  1.16it/s]

[→] Embedding file: Lịch sử Quảng Trị.jsonl (idx=267)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.14it/s]
Processing files:  35%|███▌      | 268/762 [06:29<06:55,  1.19it/s]

[→] Embedding file: Lịch sử Sài Gòn – Thành phố Hồ Chí Minh.jsonl (idx=268)


Embedding batches: 100%|██████████| 30/30 [00:05<00:00,  5.16it/s]
Processing files:  35%|███▌      | 269/762 [06:35<19:12,  2.34s/it]

[→] Embedding file: Lịch sử Sóc Trăng.jsonl (idx=269)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.05it/s]
Processing files:  35%|███▌      | 270/762 [06:36<15:23,  1.88s/it]

[→] Embedding file: Lịch sử Thanh Hóa.jsonl (idx=270)


Embedding batches: 100%|██████████| 12/12 [00:02<00:00,  5.29it/s]
Processing files:  36%|███▌      | 271/762 [06:38<16:21,  2.00s/it]

[→] Embedding file: Lịch sử Thái Bình.jsonl (idx=271)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.18it/s]
Processing files:  36%|███▌      | 272/762 [06:38<12:39,  1.55s/it]

[→] Embedding file: Lịch sử Thừa Thiên Huế.jsonl (idx=272)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.29it/s]
Processing files:  36%|███▌      | 273/762 [06:39<10:14,  1.26s/it]

[→] Embedding file: Lịch sử Việt Nam Cộng hòa.jsonl (idx=273)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  5.43it/s]
Processing files:  36%|███▌      | 274/762 [06:40<10:47,  1.33s/it]

[→] Embedding file: Lịch sử Việt Nam theo tỉnh thành.jsonl (idx=274)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.10it/s]
Processing files:  36%|███▌      | 275/762 [06:41<08:01,  1.01it/s]

[→] Embedding file: Lịch sử Việt Nam thời Bắc thuộc lần 1.jsonl (idx=275)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.37it/s]
Processing files:  36%|███▌      | 276/762 [06:41<06:04,  1.33it/s]

[→] Embedding file: Lịch sử Việt Nam thời Bắc thuộc lần 2.jsonl (idx=276)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.51it/s]
Processing files:  36%|███▋      | 277/762 [06:42<07:20,  1.10it/s]

[→] Embedding file: Lịch sử Việt Nam thời Bắc thuộc lần 3.jsonl (idx=277)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  4.85it/s]
Processing files:  36%|███▋      | 278/762 [06:43<06:39,  1.21it/s]

[→] Embedding file: Lịch sử Việt Nam thời Bắc thuộc lần 4.jsonl (idx=278)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.00it/s]
Processing files:  37%|███▋      | 279/762 [06:44<07:34,  1.06it/s]

[→] Embedding file: Lịch sử Việt Nam thời Hai Bà Trưng.jsonl (idx=279)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.26it/s]
Processing files:  37%|███▋      | 280/762 [06:45<07:08,  1.12it/s]

[→] Embedding file: Lịch sử Việt Nam thời Lê sơ.jsonl (idx=280)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.46it/s]
Processing files:  37%|███▋      | 281/762 [06:46<07:13,  1.11it/s]

[→] Embedding file: Lịch sử Việt Nam thời Lê trung hưng.jsonl (idx=281)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.54it/s]
Processing files:  37%|███▋      | 282/762 [06:46<05:48,  1.38it/s]

[→] Embedding file: Lịch sử Việt Nam thời Lý.jsonl (idx=282)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.28it/s]
Processing files:  37%|███▋      | 283/762 [06:47<05:26,  1.47it/s]

[→] Embedding file: Lịch sử Việt Nam thời Mạc.jsonl (idx=283)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.13it/s]
Processing files:  37%|███▋      | 284/762 [06:47<04:16,  1.87it/s]

[→] Embedding file: Lịch sử Việt Nam thời Nguyễn.jsonl (idx=284)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.17it/s]
Processing files:  37%|███▋      | 285/762 [06:47<03:55,  2.03it/s]

[→] Embedding file: Lịch sử Việt Nam thời Pháp thuộc.jsonl (idx=285)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.54it/s]
Processing files:  38%|███▊      | 286/762 [06:48<04:28,  1.77it/s]

[→] Embedding file: Lịch sử Việt Nam thời Trần.jsonl (idx=286)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.21it/s]
Processing files:  38%|███▊      | 287/762 [06:48<04:31,  1.75it/s]

[→] Embedding file: Lịch sử Việt Nam thời Tự chủ.jsonl (idx=287)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  6.15it/s]
Processing files:  38%|███▊      | 288/762 [06:49<04:43,  1.67it/s]

[→] Embedding file: Lịch sử Việt Nam thời kỳ 1945–1975.jsonl (idx=288)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.46it/s]
Processing files:  38%|███▊      | 289/762 [06:49<04:11,  1.88it/s]

[→] Embedding file: Lịch sử Việt Nam thời kỳ từ 1976.jsonl (idx=289)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.39it/s]
Processing files:  38%|███▊      | 290/762 [06:50<04:15,  1.85it/s]

[→] Embedding file: Lịch sử Việt Nam.jsonl (idx=290)


Embedding batches: 100%|██████████| 108/108 [00:19<00:00,  5.40it/s]
Processing files:  38%|███▊      | 291/762 [07:10<50:08,  6.39s/it]

[→] Embedding file: Lịch sử Vĩnh Long.jsonl (idx=291)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.58it/s]
Processing files:  38%|███▊      | 292/762 [07:11<37:59,  4.85s/it]

[→] Embedding file: Lịch sử Vĩnh Phúc.jsonl (idx=292)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.22it/s]
Processing files:  38%|███▊      | 293/762 [07:12<27:41,  3.54s/it]

[→] Embedding file: Lịch sử bóng đá Việt Nam.jsonl (idx=293)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.21it/s]
Processing files:  39%|███▊      | 294/762 [07:13<22:04,  2.83s/it]

[→] Embedding file: Lịch sử chính trị Việt Nam.jsonl (idx=294)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.58it/s]
Processing files:  39%|███▊      | 295/762 [07:14<17:07,  2.20s/it]

[→] Embedding file: Lịch sử hành chính Việt Nam theo tỉnh thành.jsonl (idx=295)


Embedding batches: 100%|██████████| 50/50 [00:09<00:00,  5.03it/s]
Processing files:  39%|███▉      | 296/762 [07:24<35:11,  4.53s/it]

[→] Embedding file: Lịch sử hành chính Việt Nam.jsonl (idx=296)


Embedding batches: 100%|██████████| 27/27 [00:05<00:00,  5.20it/s]
Processing files:  39%|███▉      | 297/762 [07:29<36:42,  4.74s/it]

[→] Embedding file: Lịch sử kinh tế Việt Nam.jsonl (idx=297)


Embedding batches: 100%|██████████| 53/53 [00:10<00:00,  5.27it/s]
Processing files:  39%|███▉      | 298/762 [07:39<49:00,  6.34s/it]

[→] Embedding file: Lịch sử miền Nam Việt Nam.jsonl (idx=298)


Embedding batches: 100%|██████████| 16/16 [00:03<00:00,  5.24it/s]
Processing files:  39%|███▉      | 299/762 [07:42<41:20,  5.36s/it]

[→] Embedding file: Lịch sử nghệ thuật Việt Nam.jsonl (idx=299)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.26it/s]
Processing files:  39%|███▉      | 300/762 [07:43<30:40,  3.98s/it]

[→] Embedding file: Lịch sử quân sự Việt Nam.jsonl (idx=300)


Embedding batches: 100%|██████████| 54/54 [00:10<00:00,  5.10it/s]
Processing files:  40%|███▉      | 301/762 [07:53<45:52,  5.97s/it]

[→] Embedding file: Lịch sử quân sự Đàng Trong.jsonl (idx=301)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.27it/s]
Processing files:  40%|███▉      | 302/762 [07:54<34:14,  4.47s/it]

[→] Embedding file: Lịch sử Đà Nẵng.jsonl (idx=302)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  5.02it/s]
Processing files:  40%|███▉      | 303/762 [07:56<27:36,  3.61s/it]

[→] Embedding file: Lịch sử Đàng Trong.jsonl (idx=303)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.15it/s]
Processing files:  40%|███▉      | 304/762 [07:57<20:37,  2.70s/it]

[→] Embedding file: Lịch sử Đắk Lắk.jsonl (idx=304)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.30it/s]
Processing files:  40%|████      | 305/762 [07:57<15:08,  1.99s/it]

[→] Embedding file: Lịch sử Đồng Nai.jsonl (idx=305)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.55it/s]
Processing files:  40%|████      | 306/762 [07:57<10:51,  1.43s/it]

[→] Embedding file: Lục quân Hoa Kỳ trong Chiến tranh Việt Nam.jsonl (idx=306)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.85it/s]
Processing files:  40%|████      | 307/762 [07:57<07:51,  1.04s/it]

[→] Embedding file: Miền Bắc Việt Nam.jsonl (idx=307)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.95it/s]
Processing files:  40%|████      | 308/762 [07:57<05:52,  1.29it/s]

[→] Embedding file: Miền Nam Việt Nam.jsonl (idx=308)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.17it/s]
Processing files:  41%|████      | 309/762 [07:58<04:51,  1.56it/s]

[→] Embedding file: Miền Việt Nam.jsonl (idx=309)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.26it/s]
Processing files:  41%|████      | 310/762 [07:59<05:34,  1.35it/s]

[→] Embedding file: Môi trường Thành phố Hồ Chí Minh.jsonl (idx=310)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.52it/s]
Processing files:  41%|████      | 311/762 [07:59<04:18,  1.74it/s]

[→] Embedding file: Môi trường Việt Nam 2017.jsonl (idx=311)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.20it/s]
Processing files:  41%|████      | 312/762 [07:59<04:20,  1.73it/s]

[→] Embedding file: Môi trường Việt Nam.jsonl (idx=312)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.32it/s]
Processing files:  41%|████      | 313/762 [08:01<07:16,  1.03it/s]

[→] Embedding file: Mũi đất Việt Nam.jsonl (idx=313)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 11.03it/s]


[→] Embedding file: Mất danh hiệu Công an nhân dân Việt Nam.jsonl (idx=314)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.43it/s]
Processing files:  41%|████▏     | 315/762 [08:02<04:23,  1.70it/s]

[→] Embedding file: Mặt trận Dân tộc Giải phóng miền Nam Việt Nam.jsonl (idx=315)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.52it/s]
Processing files:  41%|████▏     | 316/762 [08:02<04:19,  1.72it/s]

[→] Embedding file: Mỏ dầu ở Việt Nam.jsonl (idx=316)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  7.39it/s]
Processing files:  42%|████▏     | 317/762 [08:02<03:43,  1.99it/s]

[→] Embedding file: Mỏ ở Việt Nam.jsonl (idx=317)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.87it/s]
Processing files:  42%|████▏     | 318/762 [08:03<03:45,  1.97it/s]

[→] Embedding file: Nam Bộ.jsonl (idx=318)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.30it/s]
Processing files:  42%|████▏     | 319/762 [08:04<03:52,  1.90it/s]

[→] Embedding file: Nam Kỳ lục tỉnh.jsonl (idx=319)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.18it/s]
Processing files:  42%|████▏     | 320/762 [08:04<04:48,  1.53it/s]

[→] Embedding file: Nam Việt.jsonl (idx=320)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  6.09it/s]
Processing files:  42%|████▏     | 321/762 [08:05<05:11,  1.42it/s]

[→] Embedding file: Nghi lễ cấp quốc gia của Việt Nam.jsonl (idx=321)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.77it/s]
Processing files:  42%|████▏     | 322/762 [08:05<04:02,  1.81it/s]

[→] Embedding file: Nghiên cứu về Việt Nam.jsonl (idx=322)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.83it/s]
Processing files:  42%|████▏     | 323/762 [08:06<03:10,  2.30it/s]

[→] Embedding file: Nghèo ở Việt Nam.jsonl (idx=323)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.33it/s]
Processing files:  43%|████▎     | 324/762 [08:06<03:03,  2.38it/s]

[→] Embedding file: Nghĩa quân chống Pháp.jsonl (idx=324)


Embedding batches: 100%|██████████| 29/29 [00:05<00:00,  5.20it/s]
Processing files:  43%|████▎     | 325/762 [08:12<14:15,  1.96s/it]

[→] Embedding file: Nghĩa trang và khu tưởng niệm Chiến tranh Việt Nam.jsonl (idx=325)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.51it/s]
Processing files:  43%|████▎     | 326/762 [08:12<10:23,  1.43s/it]

[→] Embedding file: Nghệ thuật Việt Nam.jsonl (idx=326)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.58it/s]
Processing files:  43%|████▎     | 327/762 [08:13<09:37,  1.33s/it]

[→] Embedding file: Ngoại giao Việt Nam thời Mạc.jsonl (idx=327)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.10it/s]
Processing files:  43%|████▎     | 328/762 [08:13<07:05,  1.02it/s]

[→] Embedding file: Ngoại giao Việt Nam.jsonl (idx=328)


Embedding batches: 100%|██████████| 26/26 [00:04<00:00,  5.43it/s]
Processing files:  43%|████▎     | 329/762 [08:18<15:20,  2.13s/it]

[→] Embedding file: Nguyễn Phú Trọng.jsonl (idx=329)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.16it/s]
Processing files:  43%|████▎     | 330/762 [08:19<12:24,  1.72s/it]

[→] Embedding file: Ngành dược Việt Nam.jsonl (idx=330)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.74it/s]
Processing files:  43%|████▎     | 331/762 [08:19<08:57,  1.25s/it]

[→] Embedding file: Ngày lễ Việt Nam.jsonl (idx=331)


Embedding batches: 100%|██████████| 16/16 [00:03<00:00,  5.22it/s]
Processing files:  44%|████▎     | 332/762 [08:22<12:51,  1.79s/it]

[→] Embedding file: Ngân hàng Nhà nước Việt Nam.jsonl (idx=332)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.14it/s]
Processing files:  44%|████▎     | 333/762 [08:22<09:17,  1.30s/it]

[→] Embedding file: Ngôn ngữ tại Việt Nam.jsonl (idx=333)


Embedding batches: 100%|██████████| 41/41 [00:06<00:00,  6.01it/s]
Processing files:  44%|████▍     | 334/762 [08:29<21:07,  2.96s/it]

[→] Embedding file: Người LGBT Việt Nam.jsonl (idx=334)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.85it/s]
Processing files:  44%|████▍     | 335/762 [08:29<15:24,  2.17s/it]

[→] Embedding file: Người Mỹ trong Chiến tranh Việt Nam.jsonl (idx=335)


Embedding batches: 100%|██████████| 22/22 [00:04<00:00,  5.30it/s]
Processing files:  44%|████▍     | 336/762 [08:33<19:38,  2.77s/it]

[→] Embedding file: Người Việt Nam bị ám sát.jsonl (idx=336)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.20it/s]
Processing files:  44%|████▍     | 337/762 [08:34<15:47,  2.23s/it]

[→] Embedding file: Người Việt Nam giả tưởng.jsonl (idx=337)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.56it/s]
Processing files:  44%|████▍     | 338/762 [08:36<14:29,  2.05s/it]

[→] Embedding file: Người Việt Nam giữ kỷ lục Guinness.jsonl (idx=338)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.57it/s]
Processing files:  44%|████▍     | 339/762 [08:36<11:16,  1.60s/it]

[→] Embedding file: Người Việt Nam lưu vong.jsonl (idx=339)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.54it/s]
Processing files:  45%|████▍     | 340/762 [08:37<09:47,  1.39s/it]

[→] Embedding file: Người Việt Nam.jsonl (idx=340)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.69it/s]
Processing files:  45%|████▍     | 341/762 [08:38<09:04,  1.29s/it]

[→] Embedding file: Người Việt di cư.jsonl (idx=341)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.06it/s]
Processing files:  45%|████▍     | 342/762 [08:39<06:46,  1.03it/s]

[→] Embedding file: Người bị ám sát bởi thành viên của Việt Minh.jsonl (idx=342)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.63it/s]
Processing files:  45%|████▌     | 343/762 [08:39<05:04,  1.38it/s]

[→] Embedding file: Người chống cộng Việt Nam.jsonl (idx=343)


Embedding batches: 100%|██████████| 65/65 [00:12<00:00,  5.33it/s]
Processing files:  45%|████▌     | 344/762 [08:51<29:02,  4.17s/it]

[→] Embedding file: Người khuyết tật Việt Nam.jsonl (idx=344)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.83it/s]
Processing files:  45%|████▌     | 345/762 [08:52<21:23,  3.08s/it]

[→] Embedding file: Người thọ bách niên Việt Nam.jsonl (idx=345)


Embedding batches: 100%|██████████| 41/41 [00:07<00:00,  5.40it/s]
Processing files:  45%|████▌     | 346/762 [08:59<30:46,  4.44s/it]

[→] Embedding file: Người tị nạn Việt Nam.jsonl (idx=346)


Embedding batches: 100%|██████████| 28/28 [00:05<00:00,  5.40it/s]
Processing files:  46%|████▌     | 347/762 [09:04<32:16,  4.67s/it]

[→] Embedding file: Người xứ Đông.jsonl (idx=347)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.19it/s]
Processing files:  46%|████▌     | 348/762 [09:05<22:53,  3.32s/it]

[→] Embedding file: Người Đàng Trong.jsonl (idx=348)


Embedding batches: 100%|██████████| 39/39 [00:07<00:00,  5.04it/s]
Processing files:  46%|████▌     | 349/762 [09:12<32:00,  4.65s/it]

[→] Embedding file: Nhiếp ảnh Việt Nam.jsonl (idx=349)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.64it/s]
Processing files:  46%|████▌     | 350/762 [09:14<25:39,  3.74s/it]

[→] Embedding file: Nhiếp ảnh gia chiến tranh Việt Nam.jsonl (idx=350)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  9.24it/s]
Processing files:  46%|████▌     | 351/762 [09:14<18:09,  2.65s/it]

[→] Embedding file: Nho giáo Việt Nam.jsonl (idx=351)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  5.40it/s]
Processing files:  46%|████▌     | 352/762 [09:16<15:44,  2.30s/it]

[→] Embedding file: Nhà Hậu Lê.jsonl (idx=352)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.18it/s]
Processing files:  46%|████▋     | 353/762 [09:16<11:24,  1.67s/it]

[→] Embedding file: Nhà Hậu Trần.jsonl (idx=353)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.41it/s]
Processing files:  46%|████▋     | 354/762 [09:18<11:44,  1.73s/it]

[→] Embedding file: Nhà Hồ.jsonl (idx=354)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.29it/s]
Processing files:  47%|████▋     | 355/762 [09:19<11:41,  1.72s/it]

[→] Embedding file: Nhà Lê sơ.jsonl (idx=355)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 11.63it/s]


[→] Embedding file: Nhà Lý.jsonl (idx=356)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.79it/s]
Processing files:  47%|████▋     | 357/762 [09:20<06:56,  1.03s/it]

[→] Embedding file: Nhà Mạc.jsonl (idx=357)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.36it/s]
Processing files:  47%|████▋     | 358/762 [09:20<05:29,  1.23it/s]

[→] Embedding file: Nhà Nguyễn.jsonl (idx=358)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  5.55it/s]
Processing files:  47%|████▋     | 359/762 [09:21<06:35,  1.02it/s]

[→] Embedding file: Nhà Tiền Lê.jsonl (idx=359)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.70it/s]
Processing files:  47%|████▋     | 360/762 [09:22<06:05,  1.10it/s]

[→] Embedding file: Nhà Tiền Lý.jsonl (idx=360)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.26it/s]
Processing files:  47%|████▋     | 361/762 [09:22<04:40,  1.43it/s]

[→] Embedding file: Nhà Triệu.jsonl (idx=361)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.03it/s]
Processing files:  48%|████▊     | 362/762 [09:23<05:14,  1.27it/s]

[→] Embedding file: Nhà Trần.jsonl (idx=362)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  4.85it/s]
Processing files:  48%|████▊     | 363/762 [09:25<06:54,  1.04s/it]

[→] Embedding file: Nhà Tây Sơn.jsonl (idx=363)


Embedding batches: 100%|██████████| 29/29 [00:05<00:00,  5.03it/s]
Processing files:  48%|████▊     | 364/762 [09:31<16:05,  2.43s/it]

[→] Embedding file: Nhà cách mạng Việt Nam.jsonl (idx=364)


Embedding batches: 100%|██████████| 161/161 [00:31<00:00,  5.18it/s]
Processing files:  48%|████▊     | 365/762 [10:02<1:12:01, 10.89s/it]

[→] Embedding file: Nhà ga tại Việt Nam.jsonl (idx=365)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 10.96it/s]


[→] Embedding file: Nhà giáo Việt Nam.jsonl (idx=366)


Embedding batches: 100%|██████████| 65/65 [00:12<00:00,  5.36it/s]
Processing files:  48%|████▊     | 367/762 [10:14<57:19,  8.71s/it]  

[→] Embedding file: Nhà hoạt động nhân quyền Việt Nam.jsonl (idx=367)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.10it/s]
Processing files:  48%|████▊     | 368/762 [10:14<43:38,  6.65s/it]

[→] Embedding file: Nhà hoạt động phản đối Chiến tranh Việt Nam.jsonl (idx=368)


Embedding batches: 100%|██████████| 17/17 [00:03<00:00,  5.31it/s]
Processing files:  48%|████▊     | 369/762 [10:18<37:41,  5.75s/it]

[→] Embedding file: Nhà kinh tế học Việt Nam.jsonl (idx=369)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.60it/s]
Processing files:  49%|████▊     | 370/762 [10:19<29:19,  4.49s/it]

[→] Embedding file: Nhà nghiên cứu kinh tế Việt Nam.jsonl (idx=370)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.86it/s]
Processing files:  49%|████▊     | 371/762 [10:20<22:39,  3.48s/it]

[→] Embedding file: Nhà nghiên cứu văn hóa Việt Nam.jsonl (idx=371)


Embedding batches: 100%|██████████| 25/25 [00:04<00:00,  5.48it/s]
Processing files:  49%|████▉     | 372/762 [10:24<24:37,  3.79s/it]

[→] Embedding file: Nhà nước Việt Nam.jsonl (idx=372)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.28it/s]
Processing files:  49%|████▉     | 373/762 [10:25<18:22,  2.83s/it]

[→] Embedding file: Nhà sử học Việt Nam Cộng hòa.jsonl (idx=373)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.13it/s]
Processing files:  49%|████▉     | 374/762 [10:25<13:36,  2.10s/it]

[→] Embedding file: Nhà sử học Việt Nam.jsonl (idx=374)


Embedding batches: 100%|██████████| 26/26 [00:04<00:00,  5.57it/s]
Processing files:  49%|████▉     | 375/762 [10:30<18:28,  2.86s/it]

[→] Embedding file: Nhà thơ Việt Nam thời Bắc thuộc.jsonl (idx=375)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.32it/s]
Processing files:  49%|████▉     | 376/762 [10:30<13:16,  2.06s/it]

[→] Embedding file: Nhà tiên tri Việt Nam.jsonl (idx=376)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.24it/s]
Processing files:  49%|████▉     | 377/762 [10:32<12:58,  2.02s/it]

[→] Embedding file: Nhà truyền giáo tại Việt Nam.jsonl (idx=377)


Embedding batches: 100%|██████████| 12/12 [00:02<00:00,  5.48it/s]
Processing files:  50%|████▉     | 378/762 [10:34<13:17,  2.08s/it]

[→] Embedding file: Nhà tù tại Việt Nam.jsonl (idx=378)


Embedding batches: 100%|██████████| 17/17 [00:02<00:00,  6.62it/s]
Processing files:  50%|████▉     | 379/762 [10:36<14:12,  2.23s/it]

[→] Embedding file: Nhà từ thiện Việt Nam.jsonl (idx=379)


Embedding batches: 100%|██████████| 12/12 [00:02<00:00,  5.56it/s]
Processing files:  50%|████▉     | 380/762 [10:39<14:04,  2.21s/it]

[→] Embedding file: Nhà Đinh.jsonl (idx=380)


Embedding batches: 100%|██████████| 12/12 [00:02<00:00,  5.17it/s]
Processing files:  50%|█████     | 381/762 [10:41<14:16,  2.25s/it]

[→] Embedding file: Nhân quyền tại Việt Nam.jsonl (idx=381)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.76it/s]
Processing files:  50%|█████     | 382/762 [10:42<11:38,  1.84s/it]

[→] Embedding file: Nhân vật Việt Nam thời kỳ Tiền độc lập.jsonl (idx=382)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.99it/s]
Processing files:  50%|█████     | 383/762 [10:42<08:24,  1.33s/it]

[→] Embedding file: Nhân vật bất đồng chính kiến Việt Nam.jsonl (idx=383)


Embedding batches: 100%|██████████| 71/71 [00:13<00:00,  5.23it/s]
Processing files:  50%|█████     | 384/762 [10:56<31:32,  5.01s/it]

[→] Embedding file: Nhân vật lịch sử Việt Nam.jsonl (idx=384)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  5.49it/s]
Processing files:  51%|█████     | 385/762 [10:57<24:47,  3.95s/it]

[→] Embedding file: Nhân vật quân sự Việt Nam.jsonl (idx=385)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.31it/s]
Processing files:  51%|█████     | 386/762 [10:58<18:13,  2.91s/it]

[→] Embedding file: Nhân vật thời Lê – Trịnh.jsonl (idx=386)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.44it/s]
Processing files:  51%|█████     | 387/762 [10:58<13:02,  2.09s/it]

[→] Embedding file: Nhân vật thời Lý.jsonl (idx=387)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.23it/s]
Processing files:  51%|█████     | 388/762 [10:58<09:50,  1.58s/it]

[→] Embedding file: Nhân vật thời Nguyễn.jsonl (idx=388)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  7.19it/s]
Processing files:  51%|█████     | 389/762 [10:58<07:24,  1.19s/it]

[→] Embedding file: Nhân vật thời Trần.jsonl (idx=389)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.20it/s]
Processing files:  51%|█████     | 390/762 [10:59<06:59,  1.13s/it]

[→] Embedding file: Nhân vật thời Đinh.jsonl (idx=390)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.90it/s]
Processing files:  51%|█████▏    | 391/762 [11:00<05:31,  1.12it/s]

[→] Embedding file: Nhân vật trong chiến tranh Việt Nam.jsonl (idx=391)


Embedding batches: 100%|██████████| 32/32 [00:06<00:00,  5.28it/s]
Processing files:  51%|█████▏    | 392/762 [11:06<15:06,  2.45s/it]

[→] Embedding file: Nhãn hiệu Việt Nam.jsonl (idx=392)


Embedding batches: 100%|██████████| 42/42 [00:07<00:00,  5.53it/s]
Processing files:  52%|█████▏    | 393/762 [11:13<24:35,  4.00s/it]

[→] Embedding file: Nông nghiệp Việt Nam.jsonl (idx=393)


Embedding batches: 100%|██████████| 20/20 [00:03<00:00,  5.29it/s]
Processing files:  52%|█████▏    | 394/762 [11:17<24:09,  3.94s/it]

[→] Embedding file: Nông thôn Việt Nam.jsonl (idx=394)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.50it/s]
Processing files:  52%|█████▏    | 395/762 [11:18<18:34,  3.04s/it]

[→] Embedding file: Núi Việt Nam.jsonl (idx=395)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.77it/s]
Processing files:  52%|█████▏    | 396/762 [11:19<13:37,  2.23s/it]

[→] Embedding file: Núi lửa Việt Nam.jsonl (idx=396)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 11.98it/s]


[→] Embedding file: Năng lượng ở Việt Nam.jsonl (idx=397)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.50it/s]
Processing files:  52%|█████▏    | 398/762 [11:19<07:56,  1.31s/it]

[→] Embedding file: Nội chiến Campuchia.jsonl (idx=398)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.37it/s]
Processing files:  52%|█████▏    | 399/762 [11:21<08:46,  1.45s/it]

[→] Embedding file: Nội chiến Việt Nam.jsonl (idx=399)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  5.39it/s]
Processing files:  52%|█████▏    | 400/762 [11:22<08:49,  1.46s/it]

[→] Embedding file: Nữ giới Việt Nam.jsonl (idx=400)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  5.62it/s]
Processing files:  53%|█████▎    | 401/762 [11:24<08:45,  1.46s/it]

[→] Embedding file: Nữ giới tại Việt Nam.jsonl (idx=401)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.67it/s]
Processing files:  53%|█████▎    | 402/762 [11:24<07:12,  1.20s/it]

[→] Embedding file: Nữ tướng của Hai Bà Trưng.jsonl (idx=402)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.30it/s]
Processing files:  53%|█████▎    | 403/762 [11:25<05:47,  1.03it/s]

[→] Embedding file: Phim tài liệu chiến tranh Việt Nam.jsonl (idx=403)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.26it/s]
Processing files:  53%|█████▎    | 404/762 [11:25<04:56,  1.21it/s]

[→] Embedding file: Phim về chiến tranh Việt Nam.jsonl (idx=404)


Embedding batches: 100%|██████████| 41/41 [00:07<00:00,  5.53it/s]
Processing files:  53%|█████▎    | 405/762 [11:33<16:26,  2.76s/it]

[→] Embedding file: Phong trào Cần Vương.jsonl (idx=405)


Embedding batches: 100%|██████████| 6/6 [00:00<00:00,  6.01it/s]
Processing files:  53%|█████▎    | 406/762 [11:34<13:18,  2.24s/it]

[→] Embedding file: Phong trào Duy Tân.jsonl (idx=406)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.61it/s]
Processing files:  53%|█████▎    | 407/762 [11:34<10:18,  1.74s/it]

[→] Embedding file: Phong trào Nhân Văn – Giai Phẩm.jsonl (idx=407)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.62it/s]
Processing files:  54%|█████▎    | 408/762 [11:35<09:08,  1.55s/it]

[→] Embedding file: Phong trào cải cách Việt Nam.jsonl (idx=408)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.71it/s]
Processing files:  54%|█████▎    | 409/762 [11:35<06:43,  1.14s/it]

[→] Embedding file: Phong trào dân chủ Việt Nam.jsonl (idx=409)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 12.09it/s]


[→] Embedding file: Phong trào Đông Du.jsonl (idx=410)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.13it/s]
Processing files:  54%|█████▍    | 411/762 [11:36<03:54,  1.50it/s]

[→] Embedding file: Phong trào đấu tranh trong Chiến tranh Việt Nam.jsonl (idx=411)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.00it/s]
Processing files:  54%|█████▍    | 412/762 [11:36<03:40,  1.59it/s]

[→] Embedding file: Phong trào độc lập Việt Nam.jsonl (idx=412)


Embedding batches: 100%|██████████| 36/36 [00:06<00:00,  5.36it/s]
Processing files:  54%|█████▍    | 413/762 [11:43<12:56,  2.23s/it]

[→] Embedding file: Phong tục Việt Nam.jsonl (idx=413)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  5.49it/s]
Processing files:  54%|█████▍    | 414/762 [11:44<11:43,  2.02s/it]

[→] Embedding file: Pháp luật phong kiến Việt Nam.jsonl (idx=414)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.10it/s]
Processing files:  54%|█████▍    | 415/762 [11:45<09:41,  1.68s/it]

[→] Embedding file: Phát minh của Việt Nam.jsonl (idx=415)


Embedding batches: 100%|██████████| 11/11 [00:02<00:00,  5.41it/s]
Processing files:  55%|█████▍    | 416/762 [11:47<10:15,  1.78s/it]

[→] Embedding file: Phân cấp hành chính Việt Nam.jsonl (idx=416)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.37it/s]
Processing files:  55%|█████▍    | 417/762 [11:48<08:13,  1.43s/it]

[→] Embedding file: Phó Bí thư Tỉnh ủy Việt Nam.jsonl (idx=417)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.86it/s]
Processing files:  55%|█████▍    | 418/762 [11:48<06:40,  1.16s/it]

[→] Embedding file: Phó Chủ tịch nước Việt Nam.jsonl (idx=418)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.95it/s]
Processing files:  55%|█████▍    | 419/762 [11:49<05:50,  1.02s/it]

[→] Embedding file: Phù Nam.jsonl (idx=419)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.75it/s]
Processing files:  55%|█████▌    | 420/762 [11:50<05:00,  1.14it/s]

[→] Embedding file: Phố cổ tại Việt Nam.jsonl (idx=420)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.82it/s]
Processing files:  55%|█████▌    | 421/762 [11:50<03:48,  1.49it/s]

[→] Embedding file: Quan chức chính phủ Việt Nam.jsonl (idx=421)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.09it/s]
Processing files:  55%|█████▌    | 422/762 [11:50<03:20,  1.69it/s]

[→] Embedding file: Quan chức phong kiến Việt Nam.jsonl (idx=422)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.18it/s]
Processing files:  56%|█████▌    | 423/762 [11:51<03:59,  1.41it/s]

[→] Embedding file: Quan lại nhà Hồ.jsonl (idx=423)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.90it/s]
Processing files:  56%|█████▌    | 424/762 [11:51<03:23,  1.66it/s]

[→] Embedding file: Quan lại nhà Lê sơ.jsonl (idx=424)


Embedding batches: 100%|██████████| 12/12 [00:02<00:00,  5.92it/s]
Processing files:  56%|█████▌    | 425/762 [11:54<05:47,  1.03s/it]

[→] Embedding file: Quan lại nhà Lê trung hưng.jsonl (idx=425)


Embedding batches: 100%|██████████| 16/16 [00:03<00:00,  5.15it/s]
Processing files:  56%|█████▌    | 426/762 [11:57<09:17,  1.66s/it]

[→] Embedding file: Quan lại nhà Mạc.jsonl (idx=426)


Embedding batches: 100%|██████████| 17/17 [00:03<00:00,  5.38it/s]
Processing files:  56%|█████▌    | 427/762 [12:00<11:48,  2.11s/it]

[→] Embedding file: Quan lại nhà Nguyễn.jsonl (idx=427)


Embedding batches: 100%|██████████| 93/93 [00:17<00:00,  5.21it/s]
Processing files:  56%|█████▌    | 428/762 [12:18<38:06,  6.85s/it]

[→] Embedding file: Quan lại nhà Tiền Lê.jsonl (idx=428)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  7.75it/s]
Processing files:  56%|█████▋    | 429/762 [12:18<27:03,  4.87s/it]

[→] Embedding file: Quan lại nhà Tây Sơn.jsonl (idx=429)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  5.23it/s]
Processing files:  56%|█████▋    | 430/762 [12:20<21:26,  3.88s/it]

[→] Embedding file: Quan lại nhà Đinh.jsonl (idx=430)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.41it/s]
Processing files:  57%|█████▋    | 431/762 [12:20<15:53,  2.88s/it]

[→] Embedding file: Quan nhà Tiền Lý.jsonl (idx=431)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.50it/s]
Processing files:  57%|█████▋    | 432/762 [12:20<11:19,  2.06s/it]

[→] Embedding file: Quân chủ Việt Nam.jsonl (idx=432)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.17it/s]
Processing files:  57%|█████▋    | 433/762 [12:22<10:46,  1.97s/it]

[→] Embedding file: Quân lực Việt Nam Cộng hòa.jsonl (idx=433)


Embedding batches: 100%|██████████| 22/22 [00:04<00:00,  5.16it/s]
Processing files:  57%|█████▋    | 434/762 [12:26<14:32,  2.66s/it]

[→] Embedding file: Quân nhân Mỹ trong Chiến tranh Việt Nam.jsonl (idx=434)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  5.12it/s]
Processing files:  57%|█████▋    | 435/762 [12:28<12:43,  2.33s/it]

[→] Embedding file: Quân nhân trong Chiến tranh Việt Nam.jsonl (idx=435)


Embedding batches: 100%|██████████| 37/37 [00:07<00:00,  5.28it/s]
Processing files:  57%|█████▋    | 436/762 [12:35<20:19,  3.74s/it]

[→] Embedding file: Quân nổi dậy Việt Nam.jsonl (idx=436)


Embedding batches: 100%|██████████| 21/21 [00:03<00:00,  5.30it/s]
Processing files:  57%|█████▋    | 437/762 [12:39<20:39,  3.81s/it]

[→] Embedding file: Quân sự Việt Nam theo thời kỳ.jsonl (idx=437)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.94it/s]
Processing files:  57%|█████▋    | 438/762 [12:39<14:39,  2.71s/it]

[→] Embedding file: Quân ủy Trung ương Việt Nam.jsonl (idx=438)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  7.58it/s]
Processing files:  58%|█████▊    | 439/762 [12:39<10:53,  2.02s/it]

[→] Embedding file: Quảng trường Việt Nam.jsonl (idx=439)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.54it/s]
Processing files:  58%|█████▊    | 440/762 [12:40<08:06,  1.51s/it]

[→] Embedding file: Quảng trường tại Hà Nội.jsonl (idx=440)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 11.11it/s]


[→] Embedding file: Quảng trường tại Thành phố Hồ Chí Minh.jsonl (idx=441)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  7.40it/s]
Processing files:  58%|█████▊    | 442/762 [12:40<04:48,  1.11it/s]

[→] Embedding file: Quần đảo Việt Nam.jsonl (idx=442)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.14it/s]
Processing files:  58%|█████▊    | 443/762 [12:42<05:55,  1.12s/it]

[→] Embedding file: Quốc ca Việt Nam.jsonl (idx=443)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.85it/s]
Processing files:  58%|█████▊    | 444/762 [12:42<04:51,  1.09it/s]

[→] Embedding file: Quốc hiệu Việt Nam.jsonl (idx=444)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.04it/s]
Processing files:  58%|█████▊    | 445/762 [12:43<04:06,  1.28it/s]

[→] Embedding file: Quốc tang tại Việt Nam.jsonl (idx=445)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.10it/s]
Processing files:  59%|█████▊    | 446/762 [12:44<04:59,  1.05it/s]

[→] Embedding file: Rừng Việt Nam.jsonl (idx=446)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  6.69it/s]
Processing files:  59%|█████▊    | 447/762 [12:45<04:28,  1.17it/s]

[→] Embedding file: Rừng phòng hộ Việt Nam.jsonl (idx=447)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.80it/s]
Processing files:  59%|█████▉    | 448/762 [12:45<03:20,  1.56it/s]

[→] Embedding file: Rừng sản xuất Việt Nam.jsonl (idx=448)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.98it/s]
Processing files:  59%|█████▉    | 449/762 [12:45<02:53,  1.81it/s]

[→] Embedding file: Scandal Việt Nam.jsonl (idx=449)


Embedding batches: 100%|██████████| 21/21 [00:04<00:00,  4.89it/s]
Processing files:  59%|█████▉    | 450/762 [12:49<08:38,  1.66s/it]

[→] Embedding file: Sài Gòn.jsonl (idx=450)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.78it/s]
Processing files:  59%|█████▉    | 451/762 [12:50<07:08,  1.38s/it]

[→] Embedding file: Sách lịch sử Việt Nam.jsonl (idx=451)


Embedding batches: 100%|██████████| 16/16 [00:02<00:00,  5.40it/s]
Processing files:  59%|█████▉    | 452/762 [12:53<09:34,  1.85s/it]

[→] Embedding file: Sách về Chiến tranh Việt Nam.jsonl (idx=452)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.36it/s]
Processing files:  59%|█████▉    | 453/762 [12:54<07:34,  1.47s/it]

[→] Embedding file: Sân bay tại Việt Nam.jsonl (idx=453)


Embedding batches: 100%|██████████| 14/14 [00:02<00:00,  5.37it/s]
Processing files:  60%|█████▉    | 454/762 [12:56<09:19,  1.82s/it]

[→] Embedding file: Sân golf Việt Nam.jsonl (idx=454)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.29it/s]
Processing files:  60%|█████▉    | 455/762 [12:57<07:40,  1.50s/it]

[→] Embedding file: Sòng bạc tại Việt Nam.jsonl (idx=455)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.35it/s]
Processing files:  60%|█████▉    | 456/762 [12:57<05:35,  1.09s/it]

[→] Embedding file: Sĩ quan Công an nhân dân Việt Nam.jsonl (idx=456)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 10.75it/s]


[→] Embedding file: Sơ khai An Giang.jsonl (idx=457)


Embedding batches: 100%|██████████| 87/87 [00:12<00:00,  6.90it/s]
Processing files:  60%|██████    | 458/762 [13:10<17:51,  3.53s/it]

[→] Embedding file: Sơ khai Bà Rịa – Vũng Tàu.jsonl (idx=458)


Embedding batches: 100%|██████████| 55/55 [00:08<00:00,  6.18it/s]
Processing files:  60%|██████    | 459/762 [13:19<24:33,  4.86s/it]

[→] Embedding file: Sơ khai Bình Dương.jsonl (idx=459)


Embedding batches: 100%|██████████| 61/61 [00:08<00:00,  6.97it/s]
Processing files:  60%|██████    | 460/762 [13:28<29:37,  5.88s/it]

[→] Embedding file: Sơ khai Bình Phước.jsonl (idx=460)


Embedding batches: 100%|██████████| 41/41 [00:05<00:00,  6.95it/s]
Processing files:  60%|██████    | 461/762 [13:34<29:33,  5.89s/it]

[→] Embedding file: Sơ khai Bình Thuận.jsonl (idx=461)


Embedding batches: 100%|██████████| 57/57 [00:08<00:00,  6.77it/s]
Processing files:  61%|██████    | 462/762 [13:42<33:00,  6.60s/it]

[→] Embedding file: Sơ khai Bình Định.jsonl (idx=462)


Embedding batches: 100%|██████████| 71/71 [00:09<00:00,  7.23it/s]
Processing files:  61%|██████    | 463/762 [13:52<37:30,  7.53s/it]

[→] Embedding file: Sơ khai Bạc Liêu.jsonl (idx=463)


Embedding batches: 100%|██████████| 58/58 [00:10<00:00,  5.79it/s]
Processing files:  61%|██████    | 464/762 [14:02<40:59,  8.25s/it]

[→] Embedding file: Sơ khai Bắc Giang.jsonl (idx=464)


Embedding batches: 100%|██████████| 101/101 [00:15<00:00,  6.37it/s]
Processing files:  61%|██████    | 465/762 [14:18<51:54, 10.49s/it]

[→] Embedding file: Sơ khai Bắc Kạn.jsonl (idx=465)


Embedding batches: 100%|██████████| 48/48 [00:06<00:00,  7.02it/s]
Processing files:  61%|██████    | 466/762 [14:25<46:27,  9.42s/it]

[→] Embedding file: Sơ khai Bắc Ninh.jsonl (idx=466)


Embedding batches: 100%|██████████| 70/70 [00:11<00:00,  6.27it/s]
Processing files:  61%|██████▏   | 467/762 [14:36<48:52,  9.94s/it]

[→] Embedding file: Sơ khai Bến Tre.jsonl (idx=467)


Embedding batches: 100%|██████████| 59/59 [00:08<00:00,  7.07it/s]
Processing files:  61%|██████▏   | 468/762 [14:44<46:24,  9.47s/it]

[→] Embedding file: Sơ khai Cao Bằng.jsonl (idx=468)


Embedding batches: 100%|██████████| 71/71 [00:11<00:00,  6.19it/s]
Processing files:  62%|██████▏   | 469/762 [14:56<49:12, 10.08s/it]

[→] Embedding file: Sơ khai Chiến tranh Việt Nam.jsonl (idx=469)


Embedding batches: 100%|██████████| 13/13 [00:02<00:00,  5.50it/s]
Processing files:  62%|██████▏   | 470/762 [14:58<37:51,  7.78s/it]

[→] Embedding file: Sơ khai Cà Mau.jsonl (idx=470)


Embedding batches: 100%|██████████| 56/56 [00:09<00:00,  6.12it/s]
Processing files:  62%|██████▏   | 471/762 [15:07<39:44,  8.19s/it]

[→] Embedding file: Sơ khai Cần Thơ.jsonl (idx=471)


Embedding batches: 100%|██████████| 37/37 [00:06<00:00,  5.85it/s]
Processing files:  62%|██████▏   | 472/762 [15:14<36:56,  7.64s/it]

[→] Embedding file: Sơ khai Gia Lai.jsonl (idx=472)


Embedding batches: 100%|██████████| 62/62 [00:08<00:00,  7.01it/s]
Processing files:  62%|██████▏   | 473/762 [15:22<38:35,  8.01s/it]

[→] Embedding file: Sơ khai Huế.jsonl (idx=473)


Embedding batches: 100%|██████████| 102/102 [00:16<00:00,  6.21it/s]
Processing files:  62%|██████▏   | 474/762 [15:39<50:35, 10.54s/it]

[→] Embedding file: Sơ khai Hà Giang.jsonl (idx=474)


Embedding batches: 100%|██████████| 100/100 [00:12<00:00,  7.78it/s]
Processing files:  62%|██████▏   | 475/762 [15:52<53:45, 11.24s/it]

[→] Embedding file: Sơ khai Hà Nam.jsonl (idx=475)


Embedding batches: 100%|██████████| 46/46 [00:07<00:00,  6.54it/s]
Processing files:  62%|██████▏   | 476/762 [15:59<47:35,  9.99s/it]

[→] Embedding file: Sơ khai Hà Nội.jsonl (idx=476)


Embedding batches: 100%|██████████| 244/244 [00:38<00:00,  6.28it/s]
Processing files:  63%|██████▎   | 477/762 [16:38<1:28:36, 18.66s/it]

[→] Embedding file: Sơ khai Hà Tĩnh.jsonl (idx=477)


Embedding batches: 100%|██████████| 108/108 [00:16<00:00,  6.62it/s]
Processing files:  63%|██████▎   | 478/762 [16:54<1:25:03, 17.97s/it]

[→] Embedding file: Sơ khai Hòa Bình.jsonl (idx=478)


Embedding batches: 100%|██████████| 48/48 [00:07<00:00,  6.23it/s]
Processing files:  63%|██████▎   | 479/762 [17:02<1:10:15, 14.90s/it]

[→] Embedding file: Sơ khai Hưng Yên.jsonl (idx=479)


Embedding batches: 100%|██████████| 97/97 [00:14<00:00,  6.82it/s]
Processing files:  63%|██████▎   | 480/762 [17:16<1:09:07, 14.71s/it]

[→] Embedding file: Sơ khai Hải Dương.jsonl (idx=480)


Embedding batches: 100%|██████████| 94/94 [00:14<00:00,  6.43it/s]
Processing files:  63%|██████▎   | 481/762 [17:31<1:08:47, 14.69s/it]

[→] Embedding file: Sơ khai Hải Phòng.jsonl (idx=481)


Embedding batches: 100%|██████████| 79/79 [00:14<00:00,  5.61it/s]
Processing files:  63%|██████▎   | 482/762 [17:45<1:07:44, 14.51s/it]

[→] Embedding file: Sơ khai Hậu Giang.jsonl (idx=482)


Embedding batches: 100%|██████████| 60/60 [00:09<00:00,  6.30it/s]
Processing files:  63%|██████▎   | 483/762 [17:54<1:00:34, 13.03s/it]

[→] Embedding file: Sơ khai Khánh Hòa.jsonl (idx=483)


Embedding batches: 100%|██████████| 47/47 [00:06<00:00,  6.81it/s]
Processing files:  64%|██████▎   | 484/762 [18:01<51:52, 11.20s/it]  

[→] Embedding file: Sơ khai Kiên Giang.jsonl (idx=484)


Embedding batches: 100%|██████████| 88/88 [00:12<00:00,  7.13it/s]
Processing files:  64%|██████▎   | 485/762 [18:14<53:18, 11.55s/it]

[→] Embedding file: Sơ khai Kon Tum.jsonl (idx=485)


Embedding batches: 100%|██████████| 39/39 [00:05<00:00,  6.73it/s]
Processing files:  64%|██████▍   | 486/762 [18:19<45:12,  9.83s/it]

[→] Embedding file: Sơ khai Lai Châu.jsonl (idx=486)


Embedding batches: 100%|██████████| 43/43 [00:06<00:00,  6.51it/s]
Processing files:  64%|██████▍   | 487/762 [18:26<40:38,  8.87s/it]

[→] Embedding file: Sơ khai Long An.jsonl (idx=487)


Embedding batches: 100%|██████████| 91/91 [00:12<00:00,  7.48it/s]
Processing files:  64%|██████▍   | 488/762 [18:38<45:03,  9.87s/it]

[→] Embedding file: Sơ khai Lào Cai.jsonl (idx=488)


Embedding batches: 100%|██████████| 83/83 [00:12<00:00,  6.53it/s]
Processing files:  64%|██████▍   | 489/762 [18:51<48:48, 10.73s/it]

[→] Embedding file: Sơ khai Lâm Đồng.jsonl (idx=489)


Embedding batches: 100%|██████████| 81/81 [00:12<00:00,  6.52it/s]
Processing files:  64%|██████▍   | 490/762 [19:03<50:56, 11.24s/it]

[→] Embedding file: Sơ khai Lạng Sơn.jsonl (idx=490)


Embedding batches: 100%|██████████| 49/49 [00:07<00:00,  6.66it/s]
Processing files:  64%|██████▍   | 491/762 [19:11<45:31, 10.08s/it]

[→] Embedding file: Sơ khai Nam Định.jsonl (idx=491)


Embedding batches: 100%|██████████| 103/103 [00:16<00:00,  6.10it/s]
Processing files:  65%|██████▍   | 492/762 [19:28<54:35, 12.13s/it]

[→] Embedding file: Sơ khai Nghệ An.jsonl (idx=492)


Embedding batches: 100%|██████████| 150/150 [00:23<00:00,  6.43it/s]
Processing files:  65%|██████▍   | 493/762 [19:51<1:09:31, 15.51s/it]

[→] Embedding file: Sơ khai Ninh Bình.jsonl (idx=493)


Embedding batches: 100%|██████████| 95/95 [00:15<00:00,  6.05it/s]
Processing files:  65%|██████▍   | 494/762 [20:07<1:09:33, 15.57s/it]

[→] Embedding file: Sơ khai Ninh Thuận.jsonl (idx=494)


Embedding batches: 100%|██████████| 39/39 [00:05<00:00,  6.82it/s]
Processing files:  65%|██████▍   | 495/762 [20:13<56:10, 12.62s/it]  

[→] Embedding file: Sơ khai Phú Thọ.jsonl (idx=495)


Embedding batches: 100%|██████████| 59/59 [00:09<00:00,  6.37it/s]
Processing files:  65%|██████▌   | 496/762 [20:22<51:31, 11.62s/it]

[→] Embedding file: Sơ khai Phú Yên.jsonl (idx=496)


Embedding batches: 100%|██████████| 46/46 [00:06<00:00,  7.11it/s]
Processing files:  65%|██████▌   | 497/762 [20:28<44:31, 10.08s/it]

[→] Embedding file: Sơ khai Quảng Bình.jsonl (idx=497)


Embedding batches: 100%|██████████| 77/77 [00:10<00:00,  7.17it/s]
Processing files:  65%|██████▌   | 498/762 [20:39<45:14, 10.28s/it]

[→] Embedding file: Sơ khai Quảng Nam.jsonl (idx=498)


Embedding batches: 100%|██████████| 91/91 [00:13<00:00,  6.99it/s]
Processing files:  65%|██████▌   | 499/762 [20:52<48:42, 11.11s/it]

[→] Embedding file: Sơ khai Quảng Ngãi.jsonl (idx=499)


Embedding batches: 100%|██████████| 59/59 [00:08<00:00,  6.63it/s]
Processing files:  66%|██████▌   | 500/762 [21:01<45:39, 10.45s/it]

[→] Embedding file: Sơ khai Quảng Ninh.jsonl (idx=500)


Embedding batches: 100%|██████████| 61/61 [00:09<00:00,  6.25it/s]
Processing files:  66%|██████▌   | 501/762 [21:11<44:36, 10.25s/it]

[→] Embedding file: Sơ khai Quảng Trị.jsonl (idx=501)


Embedding batches: 100%|██████████| 62/62 [00:08<00:00,  7.00it/s]
Processing files:  66%|██████▌   | 502/762 [21:20<42:38,  9.84s/it]

[→] Embedding file: Sơ khai Sóc Trăng.jsonl (idx=502)


Embedding batches: 100%|██████████| 67/67 [00:09<00:00,  6.97it/s]
Processing files:  66%|██████▌   | 503/762 [21:29<42:11,  9.78s/it]

[→] Embedding file: Sơ khai Sơn La.jsonl (idx=503)


Embedding batches: 100%|██████████| 63/63 [00:09<00:00,  6.90it/s]
Processing files:  66%|██████▌   | 504/762 [21:38<41:13,  9.59s/it]

[→] Embedding file: Sơ khai Thanh Hóa.jsonl (idx=504)


Embedding batches: 100%|██████████| 201/201 [00:33<00:00,  6.08it/s]
Processing files:  66%|██████▋   | 505/762 [22:12<1:11:15, 16.63s/it]

[→] Embedding file: Sơ khai Thành phố Hồ Chí Minh.jsonl (idx=505)


Embedding batches: 100%|██████████| 109/109 [00:16<00:00,  6.68it/s]
Processing files:  66%|██████▋   | 506/762 [22:28<1:10:37, 16.55s/it]

[→] Embedding file: Sơ khai Thái Bình.jsonl (idx=506)


Embedding batches: 100%|██████████| 90/90 [00:12<00:00,  7.05it/s]
Processing files:  67%|██████▋   | 507/762 [22:41<1:05:33, 15.42s/it]

[→] Embedding file: Sơ khai Thái Nguyên.jsonl (idx=507)


Embedding batches: 100%|██████████| 105/105 [00:15<00:00,  6.83it/s]
Processing files:  67%|██████▋   | 508/762 [22:56<1:05:15, 15.42s/it]

[→] Embedding file: Sơ khai Tiền Giang.jsonl (idx=508)


Embedding batches: 100%|██████████| 83/83 [00:12<00:00,  6.65it/s]
Processing files:  67%|██████▋   | 509/762 [23:09<1:01:19, 14.54s/it]

[→] Embedding file: Sơ khai Trà Vinh.jsonl (idx=509)


Embedding batches: 100%|██████████| 73/73 [00:11<00:00,  6.45it/s]
Processing files:  67%|██████▋   | 510/762 [23:20<57:03, 13.58s/it]  

[→] Embedding file: Sơ khai Tuyên Quang.jsonl (idx=510)


Embedding batches: 100%|██████████| 48/48 [00:07<00:00,  6.28it/s]
Processing files:  67%|██████▋   | 511/762 [23:28<49:23, 11.81s/it]

[→] Embedding file: Sơ khai Tây Ninh.jsonl (idx=511)


Embedding batches: 100%|██████████| 51/51 [00:07<00:00,  7.19it/s]
Processing files:  67%|██████▋   | 512/762 [23:35<43:20, 10.40s/it]

[→] Embedding file: Sơ khai Việt Nam.jsonl (idx=512)


Embedding batches: 100%|██████████| 151/151 [00:26<00:00,  5.62it/s]
Processing files:  67%|██████▋   | 513/762 [24:02<1:03:41, 15.35s/it]

[→] Embedding file: Sơ khai Vĩnh Long.jsonl (idx=513)


Embedding batches: 100%|██████████| 58/58 [00:07<00:00,  7.34it/s]
Processing files:  67%|██████▋   | 514/762 [24:10<54:13, 13.12s/it]  

[→] Embedding file: Sơ khai Vĩnh Phúc.jsonl (idx=514)


Embedding batches: 100%|██████████| 42/42 [00:06<00:00,  6.44it/s]
Processing files:  68%|██████▊   | 515/762 [24:16<45:53, 11.15s/it]

[→] Embedding file: Sơ khai Yên Bái.jsonl (idx=515)


Embedding batches: 100%|██████████| 65/65 [00:08<00:00,  7.49it/s]
Processing files:  68%|██████▊   | 516/762 [24:25<42:41, 10.41s/it]

[→] Embedding file: Sơ khai công trình xây dựng Việt Nam.jsonl (idx=516)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.73it/s]
Processing files:  68%|██████▊   | 517/762 [24:25<29:56,  7.33s/it]

[→] Embedding file: Sơ khai công ty Việt Nam.jsonl (idx=517)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.35it/s]
Processing files:  68%|██████▊   | 518/762 [24:27<22:56,  5.64s/it]

[→] Embedding file: Sơ khai giao thông Việt Nam.jsonl (idx=518)


Embedding batches: 100%|██████████| 37/37 [00:05<00:00,  6.73it/s]
Processing files:  68%|██████▊   | 519/762 [24:32<22:41,  5.60s/it]

[→] Embedding file: Sơ khai hành chính Việt Nam.jsonl (idx=519)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.50it/s]
Processing files:  68%|██████▊   | 520/762 [24:33<17:22,  4.31s/it]

[→] Embedding file: Sơ khai kiến trúc Việt Nam.jsonl (idx=520)


Embedding batches: 100%|██████████| 79/79 [00:13<00:00,  5.91it/s]
Processing files:  68%|██████▊   | 521/762 [24:47<28:14,  7.03s/it]

[→] Embedding file: Sơ khai lịch sử Việt Nam.jsonl (idx=521)


Embedding batches: 100%|██████████| 25/25 [00:04<00:00,  5.55it/s]
Processing files:  69%|██████▊   | 522/762 [24:51<25:06,  6.28s/it]

[→] Embedding file: Sơ khai nhân vật Việt Nam.jsonl (idx=522)


Embedding batches: 100%|██████████| 214/214 [00:37<00:00,  5.69it/s]
Processing files:  69%|██████▊   | 523/762 [25:29<1:02:32, 15.70s/it]

[→] Embedding file: Sơ khai thể thao Việt Nam.jsonl (idx=523)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  6.21it/s]
Processing files:  69%|██████▉   | 524/762 [25:30<45:08, 11.38s/it]  

[→] Embedding file: Sơ khai truyền thông đại chúng Việt Nam.jsonl (idx=524)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.56it/s]
Processing files:  69%|██████▉   | 525/762 [25:31<32:19,  8.18s/it]

[→] Embedding file: Sơ khai tôn giáo Việt Nam.jsonl (idx=525)


Embedding batches: 100%|██████████| 8/8 [00:01<00:00,  5.26it/s]
Processing files:  69%|██████▉   | 526/762 [25:33<24:20,  6.19s/it]

[→] Embedding file: Sơ khai tổ chức Việt Nam.jsonl (idx=526)


Embedding batches: 100%|██████████| 28/28 [00:04<00:00,  6.69it/s]
Processing files:  69%|██████▉   | 527/762 [25:37<21:54,  5.59s/it]

[→] Embedding file: Sơ khai văn hóa Việt Nam.jsonl (idx=527)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.27it/s]
Processing files:  69%|██████▉   | 528/762 [25:37<15:30,  3.98s/it]

[→] Embedding file: Sơ khai âm nhạc Việt Nam.jsonl (idx=528)


Embedding batches: 100%|██████████| 22/22 [00:03<00:00,  5.90it/s]
Processing files:  69%|██████▉   | 529/762 [25:41<15:10,  3.91s/it]

[→] Embedding file: Sơ khai Điện Biên.jsonl (idx=529)


Embedding batches: 100%|██████████| 76/76 [00:11<00:00,  6.74it/s]
Processing files:  70%|██████▉   | 530/762 [25:52<23:41,  6.13s/it]

[→] Embedding file: Sơ khai Đà Nẵng.jsonl (idx=530)


Embedding batches: 100%|██████████| 30/30 [00:04<00:00,  6.29it/s]
Processing files:  70%|██████▉   | 531/762 [25:57<22:01,  5.72s/it]

[→] Embedding file: Sơ khai Đắk Lắk.jsonl (idx=531)


Embedding batches: 100%|██████████| 77/77 [00:12<00:00,  6.09it/s]
Processing files:  70%|██████▉   | 532/762 [26:10<29:54,  7.80s/it]

[→] Embedding file: Sơ khai Đắk Nông.jsonl (idx=532)


Embedding batches: 100%|██████████| 46/46 [00:07<00:00,  6.34it/s]
Processing files:  70%|██████▉   | 533/762 [26:17<29:10,  7.64s/it]

[→] Embedding file: Sơ khai Đồng Nai.jsonl (idx=533)


Embedding batches: 100%|██████████| 87/87 [00:12<00:00,  6.85it/s]
Processing files:  70%|███████   | 534/762 [26:30<34:50,  9.17s/it]

[→] Embedding file: Sơ khai Đồng Tháp.jsonl (idx=534)


Embedding batches: 100%|██████████| 75/75 [00:11<00:00,  6.81it/s]
Processing files:  70%|███████   | 535/762 [26:41<36:48,  9.73s/it]

[→] Embedding file: Sơ khai điện lực Việt Nam.jsonl (idx=535)


Embedding batches: 100%|██████████| 62/62 [00:09<00:00,  6.88it/s]
Processing files:  70%|███████   | 536/762 [26:50<35:51,  9.52s/it]

[→] Embedding file: Sơ khai địa lý Việt Nam.jsonl (idx=536)


Embedding batches: 100%|██████████| 69/69 [00:10<00:00,  6.64it/s]
Processing files:  70%|███████   | 537/762 [27:00<36:42,  9.79s/it]

[→] Embedding file: Sơ khai ẩm thực Việt Nam.jsonl (idx=537)


Embedding batches: 100%|██████████| 63/63 [00:11<00:00,  5.66it/s]
Processing files:  71%|███████   | 538/762 [27:11<38:05, 10.20s/it]

[→] Embedding file: Sản xuất tại Việt Nam.jsonl (idx=538)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.34it/s]
Processing files:  71%|███████   | 539/762 [27:11<26:43,  7.19s/it]

[→] Embedding file: Sử học Việt Nam Cộng hòa.jsonl (idx=539)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.68it/s]
Processing files:  71%|███████   | 540/762 [27:12<18:57,  5.13s/it]

[→] Embedding file: Sử học Việt Nam.jsonl (idx=540)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.30it/s]
Processing files:  71%|███████   | 541/762 [27:12<13:44,  3.73s/it]

[→] Embedding file: Sử thi Việt Nam.jsonl (idx=541)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.11it/s]
Processing files:  71%|███████   | 542/762 [27:13<10:07,  2.76s/it]

[→] Embedding file: Sự kiện kinh tế Việt Nam.jsonl (idx=542)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.57it/s]
Processing files:  71%|███████▏  | 543/762 [27:13<07:39,  2.10s/it]

[→] Embedding file: Sự kiện lịch sử Việt Nam.jsonl (idx=543)


Embedding batches: 100%|██████████| 26/26 [00:04<00:00,  5.30it/s]
Processing files:  71%|███████▏  | 544/762 [27:18<10:41,  2.94s/it]

[→] Embedding file: Sự kiện văn hóa Việt Nam.jsonl (idx=544)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 10.74it/s]
Processing files:  72%|███████▏  | 545/762 [27:18<07:33,  2.09s/it]

[→] Embedding file: Tai nạn tại Việt Nam.jsonl (idx=545)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.98it/s]
Processing files:  72%|███████▏  | 546/762 [27:19<05:38,  1.57s/it]

[→] Embedding file: Tham nhũng tại Việt Nam.jsonl (idx=546)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  6.02it/s]
Processing files:  72%|███████▏  | 547/762 [27:19<04:39,  1.30s/it]

[→] Embedding file: Thi cử tại Việt Nam.jsonl (idx=547)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.09it/s]
Processing files:  72%|███████▏  | 548/762 [27:20<03:40,  1.03s/it]

[→] Embedding file: Thiên tai tại Việt Nam.jsonl (idx=548)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.57it/s]
Processing files:  72%|███████▏  | 549/762 [27:21<04:29,  1.26s/it]

[→] Embedding file: Thiết bị sử dụng trong chiến tranh Việt Nam.jsonl (idx=549)


Embedding batches: 100%|██████████| 29/29 [00:05<00:00,  5.23it/s]
Processing files:  72%|███████▏  | 550/762 [27:27<09:00,  2.55s/it]

[→] Embedding file: Thuyền nhân.jsonl (idx=550)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.18it/s]
Processing files:  72%|███████▏  | 551/762 [27:27<06:27,  1.84s/it]

[→] Embedding file: Thuế Việt Nam.jsonl (idx=551)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.69it/s]
Processing files:  72%|███████▏  | 552/762 [27:28<04:59,  1.42s/it]

[→] Embedding file: Thành phần thứ ba trong chiến tranh Việt Nam.jsonl (idx=552)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.30it/s]
Processing files:  73%|███████▎  | 553/762 [27:29<05:15,  1.51s/it]

[→] Embedding file: Thành phố của Việt Nam.jsonl (idx=553)


Embedding batches: 100%|██████████| 37/37 [00:07<00:00,  5.25it/s]
Processing files:  73%|███████▎  | 554/762 [27:36<11:00,  3.17s/it]

[→] Embedding file: Thác nước Việt Nam.jsonl (idx=554)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.65it/s]
Processing files:  73%|███████▎  | 555/762 [27:37<08:08,  2.36s/it]

[→] Embedding file: Thái thú Giao Chỉ.jsonl (idx=555)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.26it/s]
Processing files:  73%|███████▎  | 556/762 [27:37<05:50,  1.70s/it]

[→] Embedding file: Thánh Công giáo Việt Nam.jsonl (idx=556)


Embedding batches: 100%|██████████| 26/26 [00:04<00:00,  5.64it/s]
Processing files:  73%|███████▎  | 557/762 [27:42<08:48,  2.58s/it]

[→] Embedding file: Tháp Chăm.jsonl (idx=557)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.80it/s]
Processing files:  73%|███████▎  | 558/762 [27:43<07:11,  2.12s/it]

[→] Embedding file: Thông tin nhân khẩu học Việt Nam.jsonl (idx=558)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.99it/s]
Processing files:  73%|███████▎  | 559/762 [27:43<05:32,  1.64s/it]

[→] Embedding file: Thông tấn xã Việt Nam.jsonl (idx=559)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.21it/s]
Processing files:  73%|███████▎  | 560/762 [27:43<03:58,  1.18s/it]

[→] Embedding file: Thư viện tại Việt Nam.jsonl (idx=560)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.91it/s]
Processing files:  74%|███████▎  | 561/762 [27:44<03:04,  1.09it/s]

[→] Embedding file: Thương cảng cổ Việt Nam.jsonl (idx=561)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.42it/s]
Processing files:  74%|███████▍  | 562/762 [27:45<03:27,  1.04s/it]

[→] Embedding file: Thương mại Việt Nam.jsonl (idx=562)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.73it/s]
Processing files:  74%|███████▍  | 563/762 [27:46<03:06,  1.07it/s]

[→] Embedding file: Thường trực Ban Bí thư Trung ương Đảng Cộng sản Việt Nam.jsonl (idx=563)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.38it/s]
Processing files:  74%|███████▍  | 564/762 [27:46<02:28,  1.33it/s]

[→] Embedding file: Thảm họa tại Việt Nam.jsonl (idx=564)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 11.23it/s]


[→] Embedding file: Thảm sát Huế Tết Mậu thân.jsonl (idx=565)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.01it/s]
Processing files:  74%|███████▍  | 566/762 [27:46<01:39,  1.97it/s]

[→] Embedding file: Thảm sát tại Việt Nam.jsonl (idx=566)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  4.97it/s]
Processing files:  74%|███████▍  | 567/762 [27:47<01:24,  2.30it/s]

[→] Embedding file: Thể thao Việt Nam Cộng hòa.jsonl (idx=567)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.41it/s]
Processing files:  75%|███████▍  | 568/762 [27:47<01:18,  2.48it/s]

[→] Embedding file: Thể thao Việt Nam Dân chủ Cộng hòa.jsonl (idx=568)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.06it/s]
Processing files:  75%|███████▍  | 569/762 [27:47<01:05,  2.94it/s]

[→] Embedding file: Thể thao Việt Nam.jsonl (idx=569)


Embedding batches: 100%|██████████| 22/22 [00:03<00:00,  6.51it/s]
Processing files:  75%|███████▍  | 570/762 [27:50<03:49,  1.19s/it]

[→] Embedding file: Thị trường chứng khoán Việt Nam.jsonl (idx=570)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  6.02it/s]
Processing files:  75%|███████▍  | 571/762 [27:51<03:19,  1.05s/it]

[→] Embedding file: Thị trấn Việt Nam.jsonl (idx=571)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.94it/s]
Processing files:  75%|███████▌  | 572/762 [27:51<02:37,  1.21it/s]

[→] Embedding file: Thủ lĩnh quân sự phong trào Cần Vương.jsonl (idx=572)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.81it/s]
Processing files:  75%|███████▌  | 573/762 [27:52<02:39,  1.19it/s]

[→] Embedding file: Thủy vực Việt Nam.jsonl (idx=573)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.07it/s]
Processing files:  75%|███████▌  | 574/762 [27:53<02:10,  1.44it/s]

[→] Embedding file: Tiến sĩ Kinh tế Việt Nam.jsonl (idx=574)


Embedding batches: 100%|██████████| 52/52 [00:09<00:00,  5.57it/s]
Processing files:  75%|███████▌  | 575/762 [28:02<10:09,  3.26s/it]

[→] Embedding file: Tiến sĩ nhà Hậu Lê.jsonl (idx=575)


Embedding batches: 100%|██████████| 69/69 [00:12<00:00,  5.74it/s]
Processing files:  76%|███████▌  | 576/762 [28:14<18:11,  5.87s/it]

[→] Embedding file: Tiểu quốc J'rai.jsonl (idx=576)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.32it/s]
Processing files:  76%|███████▌  | 577/762 [28:15<13:24,  4.35s/it]

[→] Embedding file: Tiểu thuyết chiến tranh Việt Nam.jsonl (idx=577)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.48it/s]
Processing files:  76%|███████▌  | 578/762 [28:15<09:47,  3.19s/it]

[→] Embedding file: Tiểu thuyết lịch sử Việt Nam.jsonl (idx=578)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.82it/s]
Processing files:  76%|███████▌  | 579/762 [28:16<07:08,  2.34s/it]

[→] Embedding file: Toàn quyền Đông Dương.jsonl (idx=579)


Embedding batches: 100%|██████████| 12/12 [00:02<00:00,  5.65it/s]
Processing files:  76%|███████▌  | 580/762 [28:18<06:54,  2.28s/it]

[→] Embedding file: Trang phục Việt Nam.jsonl (idx=580)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.33it/s]
Processing files:  76%|███████▌  | 581/762 [28:19<06:21,  2.11s/it]

[→] Embedding file: Tranh chấp đất đai tại Việt Nam.jsonl (idx=581)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.31it/s]
Processing files:  76%|███████▋  | 582/762 [28:20<05:06,  1.70s/it]

[→] Embedding file: Tranh cãi ở Việt Nam.jsonl (idx=582)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.38it/s]
Processing files:  77%|███████▋  | 583/762 [28:20<03:44,  1.25s/it]

[→] Embedding file: Triết học Việt Nam.jsonl (idx=583)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  6.97it/s]
Processing files:  77%|███████▋  | 584/762 [28:21<03:07,  1.05s/it]

[→] Embedding file: Triều đại Việt Nam.jsonl (idx=584)


Embedding batches: 100%|██████████| 39/39 [00:07<00:00,  5.21it/s]
Processing files:  77%|███████▋  | 585/762 [28:29<08:49,  2.99s/it]

[→] Embedding file: Trung Bộ.jsonl (idx=585)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.62it/s]
Processing files:  77%|███████▋  | 586/762 [28:29<06:46,  2.31s/it]

[→] Embedding file: Truyền thuyết Việt Nam.jsonl (idx=586)


Embedding batches: 100%|██████████| 21/21 [00:03<00:00,  5.45it/s]
Processing files:  77%|███████▋  | 587/762 [28:33<08:05,  2.77s/it]

[→] Embedding file: Truyền thông Việt Nam Cộng hòa.jsonl (idx=587)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.52it/s]
Processing files:  77%|███████▋  | 588/762 [28:34<06:25,  2.22s/it]

[→] Embedding file: Truyền thông đại chúng Việt Nam.jsonl (idx=588)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.34it/s]
Processing files:  77%|███████▋  | 589/762 [28:36<06:06,  2.12s/it]

[→] Embedding file: Trào lưu văn hóa Việt Nam.jsonl (idx=589)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.07it/s]
Processing files:  77%|███████▋  | 590/762 [28:36<04:41,  1.63s/it]

[→] Embedding file: Trò chơi Việt Nam.jsonl (idx=590)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.53it/s]
Processing files:  78%|███████▊  | 591/762 [28:38<04:49,  1.69s/it]

[→] Embedding file: Trường học Việt Nam.jsonl (idx=591)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.75it/s]
Processing files:  78%|███████▊  | 592/762 [28:39<04:24,  1.56s/it]

[→] Embedding file: Trường học âm nhạc ở Việt Nam.jsonl (idx=592)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  6.47it/s]
Processing files:  78%|███████▊  | 593/762 [28:40<03:43,  1.32s/it]

[→] Embedding file: Trường đại học và cao đẳng tại Việt Nam.jsonl (idx=593)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  6.03it/s]
Processing files:  78%|███████▊  | 594/762 [28:41<03:18,  1.18s/it]

[→] Embedding file: Trưởng Ban Tuyên giáo Trung ương Đảng Cộng sản Việt Nam.jsonl (idx=594)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.62it/s]
Processing files:  78%|███████▊  | 595/762 [28:42<02:53,  1.04s/it]

[→] Embedding file: Trận đánh liên quan tới Việt Nam.jsonl (idx=595)


Embedding batches: 100%|██████████| 40/40 [00:07<00:00,  5.23it/s]
Processing files:  78%|███████▊  | 596/762 [28:49<08:22,  3.03s/it]

[→] Embedding file: Trận đánh và chiến dịch trong Chiến tranh Việt Nam.jsonl (idx=596)


Embedding batches: 100%|██████████| 15/15 [00:02<00:00,  5.27it/s]
Processing files:  78%|███████▊  | 597/762 [28:52<08:10,  2.98s/it]

[→] Embedding file: Trận đánh và chiến dịch trong Chiến tranh Đông Dương.jsonl (idx=597)


Embedding batches: 100%|██████████| 22/22 [00:04<00:00,  5.22it/s]
Processing files:  78%|███████▊  | 598/762 [28:57<09:09,  3.35s/it]

[→] Embedding file: Trống đồng Đông Sơn.jsonl (idx=598)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.29it/s]
Processing files:  79%|███████▊  | 599/762 [28:57<06:30,  2.40s/it]

[→] Embedding file: Tuổi trẻ ở Việt Nam.jsonl (idx=599)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.17it/s]
Processing files:  79%|███████▊  | 600/762 [28:57<04:39,  1.73s/it]

[→] Embedding file: Tàu của Cảnh sát biển Việt Nam.jsonl (idx=600)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.68it/s]
Processing files:  79%|███████▉  | 601/762 [28:58<03:48,  1.42s/it]

[→] Embedding file: Tác phẩm nghệ thuật về Chiến tranh Việt Nam.jsonl (idx=601)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.59it/s]
Processing files:  79%|███████▉  | 602/762 [28:58<02:56,  1.10s/it]

[→] Embedding file: Tác phẩm về chiến tranh Việt Nam.jsonl (idx=602)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.46it/s]
Processing files:  79%|███████▉  | 603/762 [28:59<02:30,  1.06it/s]

[→] Embedding file: Tên người Việt Nam.jsonl (idx=603)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.17it/s]
Processing files:  79%|███████▉  | 604/762 [29:00<02:40,  1.01s/it]

[→] Embedding file: Tình báo Việt Nam Cộng hòa.jsonl (idx=604)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.17it/s]
Processing files:  79%|███████▉  | 605/762 [29:00<02:07,  1.23it/s]

[→] Embedding file: Tình báo Việt Nam.jsonl (idx=605)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.65it/s]
Processing files:  80%|███████▉  | 606/762 [29:00<01:34,  1.66it/s]

[→] Embedding file: Tôn giáo tại Việt Nam.jsonl (idx=606)


Embedding batches: 100%|██████████| 46/46 [00:08<00:00,  5.30it/s]
Processing files:  80%|███████▉  | 607/762 [29:09<07:50,  3.03s/it]

[→] Embedding file: Tù binh Chiến tranh Việt Nam.jsonl (idx=607)


Embedding batches: 100%|██████████| 11/11 [00:02<00:00,  5.19it/s]
Processing files:  80%|███████▉  | 608/762 [29:11<07:05,  2.77s/it]

[→] Embedding file: Tù nhân Việt Nam.jsonl (idx=608)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.25it/s]
Processing files:  80%|███████▉  | 609/762 [29:11<05:14,  2.05s/it]

[→] Embedding file: Tư tưởng Hồ Chí Minh.jsonl (idx=609)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.02it/s]
Processing files:  80%|████████  | 610/762 [29:13<04:42,  1.86s/it]

[→] Embedding file: Tướng nhà Triệu.jsonl (idx=610)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.59it/s]
Processing files:  80%|████████  | 611/762 [29:13<03:23,  1.35s/it]

[→] Embedding file: Tệ nạn xã hội tại Việt Nam.jsonl (idx=611)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.18it/s]
Processing files:  80%|████████  | 612/762 [29:13<02:36,  1.04s/it]

[→] Embedding file: Tỉnh thành Việt Nam.jsonl (idx=612)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.23it/s]
Processing files:  80%|████████  | 613/762 [29:14<01:57,  1.27it/s]

[→] Embedding file: Tỉnh ủy (Việt Nam).jsonl (idx=613)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.35it/s]
Processing files:  81%|████████  | 614/762 [29:14<01:36,  1.54it/s]

[→] Embedding file: Tị nạn Việt Nam.jsonl (idx=614)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.10it/s]
Processing files:  81%|████████  | 615/762 [29:14<01:21,  1.80it/s]

[→] Embedding file: Tổ chức Chính phủ Việt Nam.jsonl (idx=615)


Embedding batches: 100%|██████████| 6/6 [00:00<00:00,  6.42it/s]
Processing files:  81%|████████  | 616/762 [29:15<01:38,  1.49it/s]

[→] Embedding file: Tổ chức có trụ sở tại Việt Nam.jsonl (idx=616)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.09it/s]
Processing files:  81%|████████  | 617/762 [29:17<02:25,  1.00s/it]

[→] Embedding file: Tổ chức của Đảng Cộng sản Việt Nam.jsonl (idx=617)


Embedding batches: 100%|██████████| 11/11 [00:02<00:00,  5.18it/s]
Processing files:  81%|████████  | 618/762 [29:19<03:13,  1.34s/it]

[→] Embedding file: Tổ chức giáo dục Việt Nam.jsonl (idx=618)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  9.09it/s]
Processing files:  81%|████████  | 619/762 [29:19<02:19,  1.03it/s]

[→] Embedding file: Tổ chức phi chính phủ tại Việt Nam.jsonl (idx=619)


Embedding batches: 100%|██████████| 6/6 [00:00<00:00,  6.68it/s]
Processing files:  81%|████████▏ | 620/762 [29:20<02:15,  1.05it/s]

[→] Embedding file: Tổ chức phi lợi nhuận Việt Nam.jsonl (idx=620)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.14it/s]
Processing files:  81%|████████▏ | 621/762 [29:20<01:48,  1.30it/s]

[→] Embedding file: Tổ chức thanh thiếu niên Việt Nam.jsonl (idx=621)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.53it/s]
Processing files:  82%|████████▏ | 622/762 [29:21<01:38,  1.42it/s]

[→] Embedding file: Tổ chức thành viên của Mặt trận Tổ quốc Việt Nam.jsonl (idx=622)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.58it/s]
Processing files:  82%|████████▏ | 623/762 [29:22<01:46,  1.31it/s]

[→] Embedding file: Tổ chức xã hội Việt Nam.jsonl (idx=623)


Embedding batches: 100%|██████████| 13/13 [00:02<00:00,  5.51it/s]
Processing files:  82%|████████▏ | 624/762 [29:24<02:52,  1.25s/it]

[→] Embedding file: Tổng biên tập Báo Nhân dân.jsonl (idx=624)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.45it/s]
Processing files:  82%|████████▏ | 625/762 [29:25<02:37,  1.15s/it]

[→] Embedding file: Tổng cục Chính trị, Bộ Công an (Việt Nam).jsonl (idx=625)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  6.87it/s]
Processing files:  82%|████████▏ | 626/762 [29:26<02:31,  1.12s/it]

[→] Embedding file: Tổng cục chính trị Quân đội nhân dân Việt Nam.jsonl (idx=626)


Embedding batches: 100%|██████████| 11/11 [00:01<00:00,  7.50it/s]
Processing files:  82%|████████▏ | 627/762 [29:28<02:45,  1.22s/it]

[→] Embedding file: Tội phạm Việt Nam.jsonl (idx=627)


Embedding batches: 100%|██████████| 15/15 [00:02<00:00,  5.23it/s]
Processing files:  82%|████████▏ | 628/762 [29:31<03:50,  1.72s/it]

[→] Embedding file: Tội phạm ở Việt Nam.jsonl (idx=628)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.81it/s]
Processing files:  83%|████████▎ | 629/762 [29:31<02:52,  1.30s/it]

[→] Embedding file: Tội ác chiến tranh ở Việt Nam.jsonl (idx=629)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.12it/s]
Processing files:  83%|████████▎ | 630/762 [29:31<02:12,  1.01s/it]

[→] Embedding file: Tội ác và vi phạm nhân quyền trong Chiến tranh Việt Nam.jsonl (idx=630)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.36it/s]
Processing files:  83%|████████▎ | 631/762 [29:32<02:09,  1.01it/s]

[→] Embedding file: Từ và cụm từ tiếng Việt.jsonl (idx=631)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.29it/s]
Processing files:  83%|████████▎ | 632/762 [29:34<02:44,  1.26s/it]

[→] Embedding file: Tỷ phú Việt Nam.jsonl (idx=632)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.23it/s]
Processing files:  83%|████████▎ | 633/762 [29:34<02:09,  1.00s/it]

[→] Embedding file: Viện Hàn lâm Khoa học và Công nghệ Việt Nam.jsonl (idx=633)


Embedding batches: 100%|██████████| 16/16 [00:02<00:00,  5.40it/s]
Processing files:  83%|████████▎ | 634/762 [29:37<03:23,  1.59s/it]

[→] Embedding file: Viện Hàn lâm Khoa học xã hội Việt Nam.jsonl (idx=634)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.74it/s]
Processing files:  83%|████████▎ | 635/762 [29:39<03:28,  1.64s/it]

[→] Embedding file: Viện Kinh tế và Chính trị Thế giới.jsonl (idx=635)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  9.05it/s]
Processing files:  83%|████████▎ | 636/762 [29:39<02:29,  1.19s/it]

[→] Embedding file: Viện nghiên cứu Việt Nam.jsonl (idx=636)


Embedding batches: 100%|██████████| 19/19 [00:03<00:00,  5.46it/s]
Processing files:  84%|████████▎ | 637/762 [29:43<03:54,  1.88s/it]

[→] Embedding file: Việt Minh.jsonl (idx=637)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.40it/s]
Processing files:  84%|████████▎ | 638/762 [29:44<03:31,  1.71s/it]

[→] Embedding file: Việt Nam Cộng hòa.jsonl (idx=638)


Embedding batches: 100%|██████████| 51/51 [00:09<00:00,  5.31it/s]
Processing files:  84%|████████▍ | 639/762 [29:54<08:22,  4.09s/it]

[→] Embedding file: Việt Nam Dân chủ Cộng hòa.jsonl (idx=639)


Embedding batches: 100%|██████████| 18/18 [00:03<00:00,  5.19it/s]
Processing files:  84%|████████▍ | 640/762 [29:57<07:56,  3.90s/it]

[→] Embedding file: Việt Nam Tuyên truyền Giải phóng quân.jsonl (idx=640)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.54it/s]
Processing files:  84%|████████▍ | 641/762 [29:57<05:42,  2.83s/it]

[→] Embedding file: Việt Nam cổ đại.jsonl (idx=641)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.89it/s]
Processing files:  84%|████████▍ | 642/762 [29:58<04:16,  2.13s/it]

[→] Embedding file: Việt Nam hải ngoại.jsonl (idx=642)


Embedding batches: 100%|██████████| 18/18 [00:03<00:00,  5.37it/s]
Processing files:  84%|████████▍ | 643/762 [30:01<04:57,  2.50s/it]

[→] Embedding file: Việt Nam theo năm.jsonl (idx=643)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  9.90it/s]
Processing files:  85%|████████▍ | 644/762 [30:01<03:30,  1.79s/it]

[→] Embedding file: Việt Nam tại Olympic Khoa học Quốc tế.jsonl (idx=644)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.77it/s]
Processing files:  85%|████████▍ | 645/762 [30:02<02:31,  1.30s/it]

[→] Embedding file: Việt Nam.jsonl (idx=645)


Embedding batches: 100%|██████████| 29/29 [00:05<00:00,  5.26it/s]
Processing files:  85%|████████▍ | 646/762 [30:07<04:58,  2.57s/it]

[→] Embedding file: Vua Việt Nam.jsonl (idx=646)


Embedding batches: 100%|██████████| 36/36 [00:06<00:00,  5.32it/s]
Processing files:  85%|████████▍ | 647/762 [30:14<07:20,  3.83s/it]

[→] Embedding file: Vua nhà Lê sơ.jsonl (idx=647)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.40it/s]
Processing files:  85%|████████▌ | 648/762 [30:15<05:44,  3.02s/it]

[→] Embedding file: Vua nhà Lê trung hưng.jsonl (idx=648)


Embedding batches: 100%|██████████| 6/6 [00:00<00:00,  6.04it/s]
Processing files:  85%|████████▌ | 649/762 [30:16<04:33,  2.42s/it]

[→] Embedding file: Vua nhà Mạc.jsonl (idx=649)


Embedding batches: 100%|██████████| 16/16 [00:02<00:00,  5.43it/s]
Processing files:  85%|████████▌ | 650/762 [30:19<04:49,  2.58s/it]

[→] Embedding file: Vua nhà Nguyễn.jsonl (idx=650)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  7.03it/s]
Processing files:  85%|████████▌ | 651/762 [30:19<03:30,  1.90s/it]

[→] Embedding file: Vua nhà Ngô.jsonl (idx=651)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.71it/s]
Processing files:  86%|████████▌ | 652/762 [30:20<02:31,  1.38s/it]

[→] Embedding file: Vua nhà Tiền Lý.jsonl (idx=652)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.22it/s]
Processing files:  86%|████████▌ | 653/762 [30:20<02:01,  1.12s/it]

[→] Embedding file: Vua nhà Triệu.jsonl (idx=653)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.01it/s]
Processing files:  86%|████████▌ | 654/762 [30:21<01:50,  1.03s/it]

[→] Embedding file: Vua nhà Trần.jsonl (idx=654)


Embedding batches: 100%|██████████| 22/22 [00:04<00:00,  5.35it/s]
Processing files:  86%|████████▌ | 655/762 [30:25<03:29,  1.96s/it]

[→] Embedding file: Võ thuật Việt Nam.jsonl (idx=655)


Embedding batches: 100%|██████████| 15/15 [00:02<00:00,  5.34it/s]
Processing files:  86%|████████▌ | 656/762 [30:28<03:55,  2.22s/it]

[→] Embedding file: Võ tướng nhà Hồ.jsonl (idx=656)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.56it/s]
Processing files:  86%|████████▌ | 657/762 [30:28<02:47,  1.60s/it]

[→] Embedding file: Võ tướng nhà Lê sơ.jsonl (idx=657)


Embedding batches: 100%|██████████| 34/34 [00:06<00:00,  5.35it/s]
Processing files:  86%|████████▋ | 658/762 [30:34<05:15,  3.03s/it]

[→] Embedding file: Võ tướng nhà Ngô.jsonl (idx=658)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.13it/s]
Processing files:  86%|████████▋ | 659/762 [30:35<03:48,  2.22s/it]

[→] Embedding file: Võ tướng nhà Tiền Lê.jsonl (idx=659)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.81it/s]
Processing files:  87%|████████▋ | 660/762 [30:35<02:54,  1.71s/it]

[→] Embedding file: Võ tướng nhà Tiền Lý.jsonl (idx=660)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.44it/s]
Processing files:  87%|████████▋ | 661/762 [30:36<02:23,  1.42s/it]

[→] Embedding file: Võ tướng nhà Tây Sơn.jsonl (idx=661)


Embedding batches: 100%|██████████| 42/42 [00:08<00:00,  5.22it/s]
Processing files:  87%|████████▋ | 662/762 [30:44<05:41,  3.42s/it]

[→] Embedding file: Vùng sinh thái ngập nước Việt Nam.jsonl (idx=662)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  4.95it/s]
Processing files:  87%|████████▋ | 663/762 [30:44<04:02,  2.45s/it]

[→] Embedding file: Văn Lang.jsonl (idx=663)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.78it/s]
Processing files:  87%|████████▋ | 664/762 [30:44<02:53,  1.77s/it]

[→] Embedding file: Văn hóa Sa Huỳnh.jsonl (idx=664)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.40it/s]
Processing files:  87%|████████▋ | 665/762 [30:45<02:03,  1.28s/it]

[→] Embedding file: Văn hóa Việt Nam Cộng hòa.jsonl (idx=665)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.17it/s]
Processing files:  87%|████████▋ | 666/762 [30:45<01:42,  1.07s/it]

[→] Embedding file: Văn hóa Việt Nam thời Nguyễn.jsonl (idx=666)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.30it/s]
Processing files:  88%|████████▊ | 667/762 [30:45<01:14,  1.27it/s]

[→] Embedding file: Văn hóa Việt Nam.jsonl (idx=667)


Embedding batches: 100%|██████████| 71/71 [00:13<00:00,  5.19it/s]
Processing files:  88%|████████▊ | 668/762 [30:59<07:18,  4.66s/it]

[→] Embedding file: Văn hóa cổ Việt Nam.jsonl (idx=668)


Embedding batches: 100%|██████████| 20/20 [00:03<00:00,  5.13it/s]
Processing files:  88%|████████▊ | 669/762 [31:03<06:52,  4.44s/it]

[→] Embedding file: Văn hóa dân gian Việt Nam.jsonl (idx=669)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.56it/s]
Processing files:  88%|████████▊ | 670/762 [31:04<05:10,  3.38s/it]

[→] Embedding file: Văn hóa làng xã Việt Nam.jsonl (idx=670)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.56it/s]
Processing files:  88%|████████▊ | 671/762 [31:04<03:55,  2.58s/it]

[→] Embedding file: Văn hóa triều Lê trung hưng.jsonl (idx=671)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.24it/s]
Processing files:  88%|████████▊ | 672/762 [31:05<02:46,  1.85s/it]

[→] Embedding file: Văn hóa xứ Đông.jsonl (idx=672)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.24it/s]
Processing files:  88%|████████▊ | 673/762 [31:06<02:31,  1.70s/it]

[→] Embedding file: Văn hóa Đông Sơn.jsonl (idx=673)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.27it/s]
Processing files:  88%|████████▊ | 674/762 [31:06<01:57,  1.34s/it]

[→] Embedding file: Văn hóa đại chúng Việt Nam.jsonl (idx=674)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.99it/s]
Processing files:  89%|████████▊ | 675/762 [31:07<01:25,  1.01it/s]

[→] Embedding file: Văn phòng Chính phủ Việt Nam.jsonl (idx=675)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.77it/s]
Processing files:  89%|████████▊ | 676/762 [31:07<01:13,  1.18it/s]

[→] Embedding file: Vũ khí Việt Nam.jsonl (idx=676)


Embedding batches: 100%|██████████| 14/14 [00:02<00:00,  5.42it/s]
Processing files:  89%|████████▉ | 677/762 [31:10<01:56,  1.37s/it]

[→] Embedding file: Vũ khí trong Chiến tranh Việt Nam.jsonl (idx=677)


Embedding batches: 100%|██████████| 78/78 [00:14<00:00,  5.25it/s]
Processing files:  89%|████████▉ | 678/762 [31:25<07:35,  5.43s/it]

[→] Embedding file: Vương quốc Sedang.jsonl (idx=678)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.68it/s]
Processing files:  89%|████████▉ | 679/762 [31:25<05:19,  3.85s/it]

[→] Embedding file: Vương triều Thonburi.jsonl (idx=679)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.14it/s]
Processing files:  89%|████████▉ | 680/762 [31:26<04:05,  2.99s/it]

[→] Embedding file: Vương tước Việt Nam.jsonl (idx=680)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.38it/s]
Processing files:  89%|████████▉ | 681/762 [31:27<03:12,  2.38s/it]

[→] Embedding file: Vương tước nhà Ngô.jsonl (idx=681)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.82it/s]
Processing files:  90%|████████▉ | 682/762 [31:27<02:21,  1.77s/it]

[→] Embedding file: Vườn quốc gia Việt Nam.jsonl (idx=682)


Embedding batches: 100%|██████████| 23/23 [00:04<00:00,  5.34it/s]
Processing files:  90%|████████▉ | 683/762 [31:31<03:20,  2.54s/it]

[→] Embedding file: Vườn thú tại Việt Nam.jsonl (idx=683)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.53it/s]
Processing files:  90%|████████▉ | 684/762 [31:32<02:29,  1.92s/it]

[→] Embedding file: Vấn đề xã hội Việt Nam.jsonl (idx=684)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.33it/s]
Processing files:  90%|████████▉ | 685/762 [31:32<01:52,  1.46s/it]

[→] Embedding file: Vụ án Xét lại Chống Đảng.jsonl (idx=685)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.00it/s]
Processing files:  90%|█████████ | 686/762 [31:33<01:25,  1.13s/it]

[→] Embedding file: Vụ án kinh tế Việt Nam.jsonl (idx=686)


Embedding batches: 100%|██████████| 24/24 [00:04<00:00,  5.31it/s]
Processing files:  90%|█████████ | 687/762 [31:37<02:41,  2.15s/it]

[→] Embedding file: Xe máy Việt Nam.jsonl (idx=687)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.60it/s]
Processing files:  90%|█████████ | 688/762 [31:38<02:01,  1.65s/it]

[→] Embedding file: Xã hội Việt Nam thời Pháp thuộc.jsonl (idx=688)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.17it/s]
Processing files:  90%|█████████ | 689/762 [31:39<01:49,  1.50s/it]

[→] Embedding file: Xã hội Việt Nam.jsonl (idx=689)


Embedding batches: 100%|██████████| 27/27 [00:05<00:00,  5.35it/s]
Processing files:  91%|█████████ | 690/762 [31:44<03:05,  2.57s/it]

[→] Embedding file: Xô Viết Nghệ Tĩnh.jsonl (idx=690)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.93it/s]
Processing files:  91%|█████████ | 691/762 [31:44<02:14,  1.89s/it]

[→] Embedding file: Xứ Nghệ.jsonl (idx=691)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.60it/s]
Processing files:  91%|█████████ | 692/762 [31:45<01:55,  1.65s/it]

[→] Embedding file: Xứ Thượng Nam Đông Dương.jsonl (idx=692)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 10.38it/s]
Processing files:  91%|█████████ | 693/762 [31:45<01:21,  1.19s/it]

[→] Embedding file: Xứ Đông.jsonl (idx=693)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  7.15it/s]
Processing files:  91%|█████████ | 694/762 [31:46<01:02,  1.09it/s]

[→] Embedding file: Xứ ủy.jsonl (idx=694)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.55it/s]
Processing files:  91%|█████████ | 695/762 [31:46<00:46,  1.43it/s]

[→] Embedding file: Y học cổ truyền Việt Nam.jsonl (idx=695)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.46it/s]
Processing files:  91%|█████████▏| 696/762 [31:47<00:57,  1.14it/s]

[→] Embedding file: Y tế Hà Nội.jsonl (idx=696)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.21it/s]
Processing files:  91%|█████████▏| 697/762 [31:47<00:42,  1.54it/s]

[→] Embedding file: Y tế Thành phố Hồ Chí Minh.jsonl (idx=697)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.43it/s]
Processing files:  92%|█████████▏| 698/762 [31:48<00:36,  1.76it/s]

[→] Embedding file: Y tế Việt Nam.jsonl (idx=698)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.78it/s]
Processing files:  92%|█████████▏| 699/762 [31:48<00:38,  1.64it/s]

[→] Embedding file: Án lệ hành chính Việt Nam.jsonl (idx=699)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.75it/s]
Processing files:  92%|█████████▏| 700/762 [31:49<00:39,  1.57it/s]

[→] Embedding file: Ô nhiễm môi trường Việt Nam.jsonl (idx=700)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.50it/s]
Processing files:  92%|█████████▏| 701/762 [31:49<00:30,  2.02it/s]

[→] Embedding file: Điểm chuẩn tính Đường cơ sở Việt Nam.jsonl (idx=701)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  6.44it/s]
Processing files:  92%|█████████▏| 702/762 [31:50<00:32,  1.87it/s]

[→] Embedding file: Điểm cực trị của Việt Nam.jsonl (idx=702)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.82it/s]
Processing files:  92%|█████████▏| 703/762 [31:51<00:37,  1.57it/s]

[→] Embedding file: Điểm tham quan ở Việt Nam.jsonl (idx=703)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  6.20it/s]
Processing files:  92%|█████████▏| 704/762 [31:51<00:37,  1.56it/s]

[→] Embedding file: Đài Tiếng nói Việt Nam.jsonl (idx=704)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.40it/s]
Processing files:  93%|█████████▎| 705/762 [31:51<00:27,  2.05it/s]

[→] Embedding file: Đài Truyền hình Việt Nam.jsonl (idx=705)


Embedding batches: 100%|██████████| 23/23 [00:04<00:00,  5.21it/s]
Processing files:  93%|█████████▎| 706/762 [31:56<01:33,  1.67s/it]

[→] Embedding file: Đá ngầm Việt Nam.jsonl (idx=706)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  7.29it/s]
Processing files:  93%|█████████▎| 707/762 [31:56<01:11,  1.29s/it]

[→] Embedding file: Đèo Việt Nam.jsonl (idx=707)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 10.55it/s]
Processing files:  93%|█████████▎| 708/762 [31:56<00:50,  1.07it/s]

[→] Embedding file: Đô thị Việt Nam.jsonl (idx=708)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  9.70it/s]
Processing files:  93%|█████████▎| 709/762 [31:57<00:36,  1.45it/s]

[→] Embedding file: Đông Kinh Nghĩa Thục.jsonl (idx=709)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.02it/s]
Processing files:  93%|█████████▎| 710/762 [31:57<00:34,  1.51it/s]

[→] Embedding file: Đơn vị hành chính Việt Nam đặt tên theo tên người.jsonl (idx=710)


Embedding batches: 100%|██████████| 124/124 [00:19<00:00,  6.46it/s]
Processing files:  93%|█████████▎| 711/762 [32:16<05:17,  6.23s/it]

[→] Embedding file: Đơn vị hành chính cấp huyện Việt Nam.jsonl (idx=711)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.43it/s]
Processing files:  93%|█████████▎| 712/762 [32:17<03:49,  4.59s/it]

[→] Embedding file: Đơn vị hành chính thời Lý.jsonl (idx=712)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 10.84it/s]
Processing files:  94%|█████████▎| 713/762 [32:17<02:38,  3.24s/it]

[→] Embedding file: Đơn vị tiền tệ Việt Nam.jsonl (idx=713)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.51it/s]
Processing files:  94%|█████████▎| 714/762 [32:19<02:07,  2.65s/it]

[→] Embedding file: Đại học Việt Nam.jsonl (idx=714)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.62it/s]
Processing files:  94%|█████████▍| 715/762 [32:19<01:33,  2.00s/it]

[→] Embedding file: Đại hội Đảng Cộng sản Việt Nam.jsonl (idx=715)


Embedding batches: 100%|██████████| 10/10 [00:01<00:00,  5.40it/s]
Processing files:  94%|█████████▍| 716/762 [32:21<01:29,  1.96s/it]

[→] Embedding file: Đảng Cộng sản Việt Nam.jsonl (idx=716)


Embedding batches: 100%|██████████| 27/27 [00:05<00:00,  5.33it/s]
Processing files:  94%|█████████▍| 717/762 [32:26<02:10,  2.90s/it]

[→] Embedding file: Đảng phái chính trị Việt Nam.jsonl (idx=717)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.87it/s]
Processing files:  94%|█████████▍| 718/762 [32:26<01:30,  2.07s/it]

[→] Embedding file: Đảng viên Đảng Cộng sản Việt Nam bị kỉ luật.jsonl (idx=718)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.14it/s]
Processing files:  94%|█████████▍| 719/762 [32:26<01:07,  1.57s/it]

[→] Embedding file: Đảng viên Đảng Cộng sản Việt Nam.jsonl (idx=719)


Embedding batches: 100%|██████████| 432/432 [01:14<00:00,  5.76it/s]
Processing files:  94%|█████████▍| 720/762 [33:42<16:31, 23.61s/it]

[→] Embedding file: Đảng ủy Công an Trung ương Việt Nam.jsonl (idx=720)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 10.26it/s]
Processing files:  95%|█████████▍| 721/762 [33:42<11:18, 16.56s/it]

[→] Embedding file: Đảng ủy Khối các cơ quan Trung ương Việt Nam.jsonl (idx=721)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  6.15it/s]
Processing files:  95%|█████████▍| 722/762 [33:42<07:49, 11.74s/it]

[→] Embedding file: Đảo Việt Nam.jsonl (idx=722)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.63it/s]
Processing files:  95%|█████████▍| 723/762 [33:43<05:28,  8.44s/it]

[→] Embedding file: Đế quốc Khmer.jsonl (idx=723)


Embedding batches: 100%|██████████| 7/7 [00:01<00:00,  5.71it/s]
Processing files:  95%|█████████▌| 724/762 [33:44<03:58,  6.28s/it]

[→] Embedding file: Địa chất Việt Nam.jsonl (idx=724)


Embedding batches: 100%|██████████| 9/9 [00:01<00:00,  5.06it/s]
Processing files:  95%|█████████▌| 725/762 [33:46<03:02,  4.93s/it]

[→] Embedding file: Địa danh cũ Thành phố Hồ Chí Minh.jsonl (idx=725)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.19it/s]
Processing files:  95%|█████████▌| 726/762 [33:47<02:12,  3.68s/it]

[→] Embedding file: Địa danh cũ Việt Nam.jsonl (idx=726)


Embedding batches: 100%|██████████| 16/16 [00:03<00:00,  5.15it/s]
Processing files:  95%|█████████▌| 727/762 [33:50<02:03,  3.52s/it]

[→] Embedding file: Địa danh lịch sử Việt Nam.jsonl (idx=727)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.77it/s]
Processing files:  96%|█████████▌| 728/762 [33:50<01:24,  2.50s/it]

[→] Embedding file: Địa danh trong Chiến tranh Việt Nam.jsonl (idx=728)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.54it/s]
Processing files:  96%|█████████▌| 729/762 [33:50<01:00,  1.84s/it]

[→] Embedding file: Địa danh tại Hà Nội.jsonl (idx=729)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  4.93it/s]
Processing files:  96%|█████████▌| 730/762 [33:50<00:43,  1.35s/it]

[→] Embedding file: Địa danh tại Thành phố Hồ Chí Minh.jsonl (idx=730)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.54it/s]
Processing files:  96%|█████████▌| 731/762 [33:51<00:30,  1.01it/s]

[→] Embedding file: Địa lý An Giang.jsonl (idx=731)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  7.14it/s]
Processing files:  96%|█████████▌| 732/762 [33:51<00:23,  1.28it/s]

[→] Embedding file: Địa lý Bà Rịa – Vũng Tàu.jsonl (idx=732)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 10.99it/s]


[→] Embedding file: Địa lý Cà Mau.jsonl (idx=733)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.96it/s]
Processing files:  96%|█████████▋| 734/762 [33:51<00:13,  2.07it/s]

[→] Embedding file: Địa lý Hà Nội.jsonl (idx=734)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  4.80it/s]
Processing files:  96%|█████████▋| 735/762 [33:51<00:11,  2.40it/s]

[→] Embedding file: Địa lý Hà Tĩnh.jsonl (idx=735)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  8.77it/s]
Processing files:  97%|█████████▋| 736/762 [33:51<00:08,  2.94it/s]

[→] Embedding file: Địa lý Hòa Bình.jsonl (idx=736)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.87it/s]
Processing files:  97%|█████████▋| 737/762 [33:52<00:08,  2.92it/s]

[→] Embedding file: Địa lý Khánh Hòa.jsonl (idx=737)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.88it/s]
Processing files:  97%|█████████▋| 738/762 [33:52<00:07,  3.02it/s]

[→] Embedding file: Địa lý Kiên Giang.jsonl (idx=738)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.44it/s]
Processing files:  97%|█████████▋| 739/762 [33:52<00:06,  3.53it/s]

[→] Embedding file: Địa lý Long An.jsonl (idx=739)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 10.45it/s]
Processing files:  97%|█████████▋| 740/762 [33:52<00:05,  4.34it/s]

[→] Embedding file: Địa lý Phú Yên.jsonl (idx=740)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  5.86it/s]
Processing files:  97%|█████████▋| 741/762 [33:53<00:04,  4.66it/s]

[→] Embedding file: Địa lý Quảng Ninh.jsonl (idx=741)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  6.17it/s]
Processing files:  97%|█████████▋| 742/762 [33:53<00:05,  3.99it/s]

[→] Embedding file: Địa lý Sóc Trăng.jsonl (idx=742)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  9.50it/s]
Processing files:  98%|█████████▊| 743/762 [33:53<00:03,  4.76it/s]

[→] Embedding file: Địa lý Thanh Hóa.jsonl (idx=743)


Embedding batches: 100%|██████████| 2/2 [00:00<00:00,  5.63it/s]
Processing files:  98%|█████████▊| 744/762 [33:53<00:04,  3.90it/s]

[→] Embedding file: Địa lý Thành phố Hồ Chí Minh.jsonl (idx=744)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.37it/s]
Processing files:  98%|█████████▊| 745/762 [33:54<00:03,  4.36it/s]

[→] Embedding file: Địa lý Tiền Giang.jsonl (idx=745)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.23it/s]
Processing files:  98%|█████████▊| 746/762 [33:54<00:03,  4.72it/s]

[→] Embedding file: Địa lý Việt Nam.jsonl (idx=746)


Embedding batches: 100%|██████████| 16/16 [00:03<00:00,  5.22it/s]
Processing files:  98%|█████████▊| 747/762 [33:57<00:16,  1.07s/it]

[→] Embedding file: Địa lý kinh tế Việt Nam.jsonl (idx=747)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.32it/s]
Processing files:  98%|█████████▊| 748/762 [33:57<00:12,  1.08it/s]

[→] Embedding file: Địa mạo Việt Nam.jsonl (idx=748)


Embedding batches: 100%|██████████| 5/5 [00:00<00:00,  5.20it/s]
Processing files:  98%|█████████▊| 749/762 [33:58<00:12,  1.07it/s]

[→] Embedding file: Đồng Tháp Mười.jsonl (idx=749)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 10.52it/s]
Processing files:  98%|█████████▊| 750/762 [33:58<00:08,  1.45it/s]

[→] Embedding file: Đội tuyển bóng đá quốc gia Việt Nam Cộng hòa.jsonl (idx=750)


Embedding batches: 100%|██████████| 3/3 [00:00<00:00,  5.22it/s]
Processing files:  99%|█████████▊| 751/762 [33:59<00:07,  1.53it/s]

[→] Embedding file: Động đất tại Việt Nam.jsonl (idx=751)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.06it/s]
Processing files:  99%|█████████▊| 752/762 [33:59<00:05,  1.96it/s]

[→] Embedding file: Ẩm thực Việt Nam.jsonl (idx=752)


Embedding batches: 100%|██████████| 85/85 [00:15<00:00,  5.34it/s]
Processing files:  99%|█████████▉| 753/762 [34:15<00:46,  5.14s/it]

[→] Embedding file: Ủy ban Dân tộc Việt Nam.jsonl (idx=753)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  9.06it/s]
Processing files:  99%|█████████▉| 754/762 [34:15<00:29,  3.63s/it]

[→] Embedding file: Ủy ban Kiểm tra Đảng ủy Công an Trung ương.jsonl (idx=754)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  6.67it/s]
Processing files:  99%|█████████▉| 755/762 [34:15<00:18,  2.59s/it]

[→] Embedding file: Ủy ban Quản lý vốn Nhà nước tại doanh nghiệp.jsonl (idx=755)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.72it/s]
Processing files:  99%|█████████▉| 756/762 [34:16<00:12,  2.03s/it]

[→] Embedding file: Ủy viên Ban Chấp hành Trung ương Đảng Cộng sản Việt Nam.jsonl (idx=756)


Embedding batches: 100%|██████████| 21/21 [00:03<00:00,  5.62it/s]
Processing files:  99%|█████████▉| 757/762 [34:20<00:12,  2.54s/it]

[→] Embedding file: Ủy viên Bộ Chính trị Ban Chấp hành Trung ương Đảng Cộng sản Việt Nam khóa XII.jsonl (idx=757)


Embedding batches: 100%|██████████| 11/11 [00:01<00:00,  5.54it/s]
Processing files:  99%|█████████▉| 758/762 [34:22<00:09,  2.38s/it]

[→] Embedding file: Ủy viên Bộ Chính trị Ban Chấp hành Trung ương Đảng Cộng sản Việt Nam khóa XIII.jsonl (idx=758)


Embedding batches: 100%|██████████| 6/6 [00:01<00:00,  5.67it/s]
Processing files: 100%|█████████▉| 759/762 [34:23<00:05,  1.98s/it]

[→] Embedding file: Ủy viên Dự khuyết Ban Chấp hành Trung ương Đảng Cộng sản Việt Nam.jsonl (idx=759)


Embedding batches: 100%|██████████| 4/4 [00:00<00:00,  5.60it/s]
Processing files: 100%|█████████▉| 760/762 [34:24<00:03,  1.61s/it]

[→] Embedding file: Ủy viên Trung ương Đảng Cộng sản Việt Nam.jsonl (idx=760)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00,  7.86it/s]
Processing files: 100%|█████████▉| 761/762 [34:24<00:01,  1.16s/it]

[→] Embedding file: Ủy viên Đảng ủy Công an Trung ương Việt Nam.jsonl (idx=761)


Embedding batches: 100%|██████████| 1/1 [00:00<00:00, 10.08it/s]
Processing files: 100%|██████████| 762/762 [34:24<00:00,  2.71s/it]


In [5]:
# --------------------
# 3. Gộp tất cả các embedding và metadata
# --------------------
print("[+] Merging all embeddings and metadata...")

all_embeddings = []
all_metadata = []

for fname in sorted(os.listdir(output_dir)):
    if fname.startswith("embeddings_part_") and fname.endswith(".npy"):
        emb = np.load(os.path.join(output_dir, fname))
        all_embeddings.append(emb)

    if fname.startswith("metadata_part_") and fname.endswith(".json"):
        with open(os.path.join(output_dir, fname), "r", encoding="utf-8") as f:
            metadata = json.load(f)
            all_metadata.extend(metadata)

all_embeddings = np.vstack(all_embeddings)

d = all_embeddings.shape[1]
index = faiss.IndexFlatIP(d)
index.add(all_embeddings)

faiss.write_index(index, "wiki_chunks.index")
with open("wiki_metadata.json", "w", encoding="utf-8") as f:
    json.dump(all_metadata, f, ensure_ascii=False, indent=2)

print(f"[✓] Done! Total vectors: {len(all_embeddings)}")

[+] Merging all embeddings and metadata...
[✓] Done! Total vectors: 93996
