In [32]:
from pathlib import Path
from typing import Optional, Iterator

from docling.datamodel.accelerator_options import AcceleratorOptions, AcceleratorDevice
from docling_core.transforms.chunker import BaseChunker, PageChunker
from docling_core.types import DoclingDocument
from langchain_docling.loader import BaseMetaExtractor, MetaExtractor
from langchain_core.documents import Document

from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import PdfPipelineOptions, TesseractCliOcrOptions

DEFAULT_EXTENSIONS = ["."+ fmt.value for fmt in InputFormat]


def collect_documents_paths(
        directory_path: Optional[str] = None,
        recursive: bool = True,
        file_extensions: Optional[list[str]] = None
) -> list[Path]:
    if file_extensions is None:
        file_extensions = DEFAULT_EXTENSIONS

    if not directory_path:
        raise ValueError("Either file_path, directory_path, or config.FILE_PATH must be provided")

    dir_path = Path(directory_path)
    if not dir_path.exists() or not dir_path.is_dir():
        raise ValueError(f"Invalid directory path: {directory_path}")

    return _collect_files(dir_path, file_extensions, recursive)


def chunk(document_list: Iterator[DoclingDocument],
          chunker: Optional[BaseChunker] = PageChunker,
          meta_extractor: Optional[BaseMetaExtractor] = MetaExtractor()) \
        -> Iterator[Document]:
    for doc in document_list:
        chunk_iter = chunker.chunk(doc)
        for _chunk in chunk_iter:
            yield Document(
                page_content=chunker.contextualize(chunk=_chunk),
                metadata=meta_extractor.extract_chunk_meta(
                    file_path=doc.name,  # todo change this later
                    chunk=_chunk,
                ),
            )

def convert_to_markdown(documents: list[Path]) -> Iterator[DoclingDocument]:
    if not documents:
        raise ValueError("Documents list cannot be empty")

    # Create document converter with format-specific options
    converter = DocumentConverter(
        format_options={
            InputFormat.PDF: PdfFormatOption(pipeline_options=_get_pdf_pipline())
        }
    )

    for doc in documents:
        try:
            result = converter.convert(doc)

            if not result.errors:
                yield result.document

        except Exception:
            continue


def _get_pdf_pipline() -> PdfPipelineOptions:
    pipeline_options = PdfPipelineOptions()
    pipeline_options.do_ocr = True
    pipeline_options.do_table_structure = True
    pipeline_options.table_structure_options.do_cell_matching = True
    pipeline_options.generate_picture_images = True
    pipeline_options.generate_table_images = True
    pipeline_options.ocr_options = TesseractCliOcrOptions()

    pipeline_options.accelerator_options = AcceleratorOptions(
        num_threads=4, device=AcceleratorDevice.AUTO
    )
    return pipeline_options


def _collect_files(directory: Path, extensions: list[str], recursive: bool) -> list[Path]:
    files = []
    pattern_func = directory.rglob if recursive else directory.glob
    for ext in extensions:
        files.extend(pattern_func(f"*{ext}"))
    return files


In [57]:
from typing import Optional
from langchain_core.documents import Document
from langchain_core.vectorstores import VectorStoreRetriever
from langchain_milvus import Milvus

from langchain_core.embeddings import Embeddings


class VectorStore:

    def __init__(
            self,
            embedding: Embeddings,
            collection_name: str = "docling_demo",
            drop_old: bool = False,
    ) -> None:

        # consistency_level = "Session"
        # auto_id = True

        self.embedding = embedding
        self.collection_name: str = collection_name
        self.vectorstore: Optional[Milvus] = None
        # self.vectorstore = Milvus(
        #     embedding_function=embedding,
        #     collection_name=self.collection_name,
        #     connection_args={
        #         "host": config.VECTOR_HOST,
        #         "port": config.VECTOR_PORT,
        #     },
        #     index_params={"index_type": "FLAT"},
        #     drop_old=drop_old,
        #     consistency_level=consistency_level,
        #     auto_id=auto_id,
        # )

    def create_from_documents(
        self,
        documents: list[Document],
        drop_old: bool = True
    ) -> Milvus:
        if not documents:
            raise ValueError("Documents list cannot be empty")

        self.vectorstore = Milvus.from_documents(
            documents=documents,
            embedding=self.embedding,
            collection_name=self.collection_name,
            connection_args={"host": "localhost", "port": "19530"},
            index_params={
                "index_type": "FLAT",
                "metric_type": "IP"  # use "L2" for Euclidean
            },
            drop_old=drop_old,
        )
        return self.vectorstore


    def add_documents(self, documents: list[Document]) -> list[str]:
        if not self.vectorstore:
            raise RuntimeError(
                "Vector store not initialized. Call create_from_documents() first."
            )

        if not documents:
            raise ValueError("Documents list cannot be empty")

        return self.vectorstore.add_documents(documents)

    def get_retriever(self, top_k: int = 5) -> VectorStoreRetriever:
        if not self.vectorstore:
            raise RuntimeError(
                "Vector store not initialized. Call create_from_documents() first."
            )

        return self.vectorstore.as_retriever(search_kwargs={"k": top_k})

    def similarity_search(
            self,
            query: str,
            k: int = 5
    ) -> list[Document]:
        if not self.vectorstore:
            raise RuntimeError(
                "Vector store not initialized. Call create_from_documents() first."
            )

        return self.vectorstore.similarity_search(query, k=k)


In [3]:
# from typing import Optional, List
# import faiss
# import numpy as np
# from langchain.docstore.document import Document
# from langchain_community.vectorstores import FAISS
# from langchain.embeddings.base import Embeddings
# from langchain.vectorstores.base import VectorStoreRetriever
#
#
# class VectorStore:
#     def __init__(
#         self,
#         embedding: Embeddings,
#         collection_name: str = "faiss_demo",
#         drop_old: bool = False,
#     ) -> None:
#         self.embedding = embedding
#         self.collection_name = collection_name
#         self.vectorstore: Optional[FAISS] = None
#         self.drop_old = drop_old
#
#         if drop_old:
#             # Start fresh
#             self.vectorstore = None
#
#     def create_from_documents(self, documents: List[Document]) -> FAISS:
#         """
#         Create a FAISS index from a list of documents.
#         """
#         if not documents:
#             raise ValueError("Documents list cannot be empty")
#
#         # Build FAISS index
#         self.vectorstore = FAISS.from_documents(
#             documents=documents,
#             embedding=self.embedding
#         )
#
#         return self.vectorstore
#
#     def add_documents(self, documents: List[Document]) -> List[str]:
#         """
#         Add new documents to an existing FAISS index.
#         """
#         if not self.vectorstore:
#             raise RuntimeError(
#                 "Vector store not initialized. Call create_from_documents() first."
#             )
#
#         if not documents:
#             raise ValueError("Documents list cannot be empty")
#
#         return self.vectorstore.add_documents(documents)
#
#     def get_retriever(self, top_k: int = 5) -> VectorStoreRetriever:
#         """
#         Return a retriever object for semantic search.
#         """
#         if not self.vectorstore:
#             raise RuntimeError(
#                 "Vector store not initialized. Call create_from_documents() first."
#             )
#
#         return self.vectorstore.as_retriever(search_kwargs={"k": top_k})
#
#     def similarity_search(self, query: str, k: int = 5) -> List[Document]:
#         """
#         Perform similarity search on the FAISS index.
#         """
#         if not self.vectorstore:
#             raise RuntimeError(
#                 "Vector store not initialized. Call create_from_documents() first."
#             )
#
#         return self.vectorstore.similarity_search(query, k=k)
#
#     def save_local(self, path: str) -> None:
#         """
#         Save FAISS index to disk.
#         """
#         if not self.vectorstore:
#             raise RuntimeError("No FAISS index to save.")
#
#         self.vectorstore.save_local(path)
#
#     def load_local(self, path: str) -> None:
#         """
#         Load FAISS index from disk.
#         """
#         self.vectorstore = FAISS.load_local(
#             path=path,
#             embeddings=self.embedding,
#             allow_dangerous_deserialization=True,
#         )
#

2025-10-16 03:57:54,531 - INFO - Loading faiss with AVX2 support.
2025-10-16 03:57:54,606 - INFO - Successfully loaded faiss with AVX2 support.


In [52]:
from typing import Optional
from langchain_huggingface.embeddings import HuggingFaceEmbeddings


class EmbeddingModel:

    def __init__(self, model_id: Optional[str] = None) -> None:
        self.model_id: str = "sentence-transformers/all-MiniLM-L6-v2"
        self.embedding: HuggingFaceEmbeddings = HuggingFaceEmbeddings(
            model_name=self.model_id
        )

    def get_embedding(self) -> HuggingFaceEmbeddings:
        return self.embedding

In [58]:
from typing import Optional, Iterator

from colorama import Fore, Style, init
from docling_core.transforms.chunker.hybrid_chunker import HybridChunker
from langchain_huggingface import HuggingFaceEmbeddings

# Initialize colorama
init(autoreset=True)


def _print_step(emoji: str, message: str, color: str = Fore.CYAN):
    """Print a colored step message."""
    print(f"{color}{emoji} {message}{Style.RESET_ALL}")


def _print_success(emoji: str, message: str):
    """Print a success message."""
    print(f"{Fore.GREEN}{emoji} {message}{Style.RESET_ALL}")


def _print_info(emoji: str, message: str):
    """Print an info message."""
    print(f"{Fore.YELLOW}{emoji} {message}{Style.RESET_ALL}")


def ingest_pipeline(
        directory_path: Optional[str] = None,
        recursive: bool = True,
        file_extensions: Optional[list[str]] = None,
        batch_size: int = 100
) -> VectorStore:
    """
    Complete ingestion pipeline: collect, convert, chunk, embed, and store documents.

    Args:
        directory_path: Path to directory containing documents
        recursive: Whether to search subdirectories
        file_extensions: List of file extensions to process
        batch_size: Number of chunks to collect before adding to vector store

    Returns:
        VectorStore instance with embedded documents
    """
    print(f"\n{Fore.MAGENTA}{'='*70}{Style.RESET_ALL}")
    _print_step("🚀", "Starting Document Ingestion Pipeline", Fore.MAGENTA)
    print(f"{Fore.MAGENTA}{'='*70}{Style.RESET_ALL}\n")

    # Step 1: Collect document paths
    _print_step("📁", f"Collecting documents from: {directory_path}", Fore.CYAN)
    _print_info("  ℹ️", f"Recursive search: {recursive}")
    if file_extensions:
        _print_info("  ℹ️", f"File extensions: {', '.join(file_extensions)}")

    paths = collect_documents_paths(directory_path, recursive, file_extensions)
    _print_success("  ✅", f"Found {len(paths)} document(s)")

    # Step 2: Convert documents to markdown
    _print_step("📄", "Converting documents to markdown format...", Fore.CYAN)
    iter_document = convert_to_markdown(paths)

    # Step 3: Chunking
    _print_step("✂️", "Chunking documents with HybridChunker...", Fore.CYAN)
    _print_info("  ℹ️", f"Batch size: {batch_size} chunks")
    chunker = HybridChunker()
    chunk_iter = chunk(iter_document, chunker)

    # Step 4: Initialize embeddings and vector store
    _print_step("🧠", "Initializing embedding model...", Fore.CYAN)
    model_name = "all-MiniLM-L6-v2"
    device = "cpu"

    embedding = HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs={'device': device},
        encode_kwargs={'normalize_embeddings': True}
    )

    _print_success("  ✅", "Embedding model loaded")

    _print_step("🗄️", "Creating vector store...", Fore.CYAN)
    vector_store = VectorStore(embedding)
    _print_success("  ✅", "Vector store initialized")

    # Step 5: Add chunks to vector store in batches
    _print_step("💾", "Adding chunks to vector store in batches...", Fore.CYAN)

    batch = []
    total_chunks = 0
    batch_count = 0

    for doc in chunk_iter:
        if total_chunks == 0:
            temp = [doc]
            vector_store.create_from_documents(temp)
        batch.append(doc)
        total_chunks += 1

        if len(batch) >= batch_size:
            batch_count += 1
            _print_info("  📦", f"Processing batch {batch_count} ({len(batch)} chunks)...")
            vector_store.add_documents(batch)
            _print_success("  ✅", f"Batch {batch_count} added successfully")
            batch = []

    # Add any remaining chunks
    if batch:
        batch_count += 1
        _print_info("  📦", f"Processing final batch ({len(batch)} chunks)...")
        vector_store.add_documents(batch)
        _print_success("  ✅", f"Final batch added successfully")

    # Summary
    print(f"\n{Fore.GREEN}{'='*70}{Style.RESET_ALL}")
    _print_success("🎉", "Ingestion Pipeline Completed Successfully!")
    print(f"{Fore.GREEN}{'='*70}{Style.RESET_ALL}")
    _print_info("📊", f"Total documents processed: {len(paths)}")
    _print_info("📊", f"Total chunks created: {total_chunks}")
    _print_info("📊", f"Total batches processed: {batch_count}")
    print()

    return vector_store




In [55]:
directory = "test"

In [59]:
store = ingest_pipeline(directory)


🚀 Starting Document Ingestion Pipeline

📁 Collecting documents from: test
  ℹ️ Recursive search: True
  ✅ Found 804 document(s)
📄 Converting documents to markdown format...
✂️ Chunking documents with HybridChunker...
  ℹ️ Batch size: 100 chunks
🧠 Initializing embedding model...


2025-10-16 04:48:56,620 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


  ✅ Embedding model loaded
🗄️ Creating vector store...
  ✅ Vector store initialized
💾 Adding chunks to vector store in batches...


2025-10-16 04:48:59,520 - INFO - detected formats: [<InputFormat.DOCX: 'docx'>]
2025-10-16 04:48:59,535 - INFO - Going to convert document batch...
2025-10-16 04:48:59,536 - INFO - Initializing pipeline for SimplePipeline with options hash 995a146ad601044538e6a923bea22f4e
2025-10-16 04:48:59,537 - INFO - Processing document Content_Assets_20240722IRF.docx
2025-10-16 04:48:59,549 - INFO - deleted item in tree at stack: (1,) => #/texts/0
2025-10-16 04:48:59,556 - INFO - deleted item in tree at stack: (1,) => #/texts/0
2025-10-16 04:48:59,561 - INFO - deleted item in tree at stack: (1,) => #/texts/0
2025-10-16 04:48:59,577 - INFO - deleted item in tree at stack: (6,) => #/texts/9
2025-10-16 04:48:59,585 - INFO - deleted item in tree at stack: (8,) => #/texts/13
2025-10-16 04:48:59,607 - INFO - deleted item in tree at stack: (10,) => #/texts/28
2025-10-16 04:48:59,610 - INFO - deleted item in tree at stack: (10,) => #/texts/28
2025-10-16 04:48:59,620 - INFO - deleted item in tree at stack:

  📦 Processing batch 1 (100 chunks)...
  ✅ Batch 1 added successfully


2025-10-16 04:49:06,886 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:06,904 - INFO - Going to convert document batch...
2025-10-16 04:49:06,905 - INFO - Processing document library.html
2025-10-16 04:49:06,928 - INFO - Finished converting document library.html in 0.04 sec.
2025-10-16 04:49:06,997 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:07,005 - INFO - Going to convert document batch...
2025-10-16 04:49:07,006 - INFO - Processing document business.html
2025-10-16 04:49:07,021 - INFO - Finished converting document business.html in 0.03 sec.
2025-10-16 04:49:07,117 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:07,132 - INFO - Going to convert document batch...
2025-10-16 04:49:07,133 - INFO - Processing document give-usek.html
2025-10-16 04:49:07,150 - INFO - Finished converting document give-usek.html in 0.04 sec.
2025-10-16 04:49:07,181 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-

  📦 Processing batch 2 (100 chunks)...
  ✅ Batch 2 added successfully


2025-10-16 04:49:11,378 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:11,393 - INFO - Going to convert document batch...
2025-10-16 04:49:11,394 - INFO - Processing document en_academics.html
2025-10-16 04:49:11,409 - INFO - Finished converting document en_academics.html in 0.03 sec.
2025-10-16 04:49:11,534 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:11,543 - INFO - Going to convert document batch...
2025-10-16 04:49:11,544 - INFO - Processing document fr_etudiants.html
2025-10-16 04:49:11,556 - INFO - Finished converting document fr_etudiants.html in 0.02 sec.
2025-10-16 04:49:11,632 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:11,645 - INFO - Going to convert document batch...
2025-10-16 04:49:11,645 - INFO - Processing document students_cao.html
2025-10-16 04:49:11,659 - INFO - Finished converting document students_cao.html in 0.03 sec.
2025-10-16 04:49:11,777 - INFO - detected formats: [<InputFormat

  📦 Processing batch 3 (100 chunks)...
  ✅ Batch 3 added successfully


2025-10-16 04:49:16,311 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:16,321 - INFO - Going to convert document batch...
2025-10-16 04:49:16,322 - INFO - Processing document en_about-usek.html
2025-10-16 04:49:16,334 - INFO - Finished converting document en_about-usek.html in 0.02 sec.
2025-10-16 04:49:16,420 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:16,427 - INFO - Going to convert document batch...
2025-10-16 04:49:16,427 - INFO - Processing document rcmme_contact.html
2025-10-16 04:49:16,435 - INFO - Finished converting document rcmme_contact.html in 0.02 sec.
2025-10-16 04:49:16,460 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:16,471 - INFO - Going to convert document batch...
2025-10-16 04:49:16,472 - INFO - Processing document usekpresident.html
2025-10-16 04:49:16,492 - INFO - Finished converting document usekpresident.html in 0.03 sec.
2025-10-16 04:49:16,620 - INFO - detected formats: [<Input

  📦 Processing batch 4 (100 chunks)...
  ✅ Batch 4 added successfully


2025-10-16 04:49:21,106 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:21,115 - INFO - Going to convert document batch...
2025-10-16 04:49:21,116 - INFO - Processing document doctoral-college.html
2025-10-16 04:49:21,127 - INFO - Finished converting document doctoral-college.html in 0.02 sec.
2025-10-16 04:49:21,202 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:21,207 - INFO - Going to convert document batch...
2025-10-16 04:49:21,208 - INFO - Processing document rcmme_activities.html
2025-10-16 04:49:21,214 - INFO - Finished converting document rcmme_activities.html in 0.01 sec.
2025-10-16 04:49:21,238 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:21,245 - INFO - Going to convert document batch...
2025-10-16 04:49:21,245 - INFO - Processing document rcmme_objectives.html
2025-10-16 04:49:21,253 - INFO - Finished converting document rcmme_objectives.html in 0.02 sec.
2025-10-16 04:49:21,288 - INFO - detecte

  📦 Processing batch 5 (100 chunks)...
  ✅ Batch 5 added successfully


2025-10-16 04:49:25,841 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:25,852 - INFO - Going to convert document batch...
2025-10-16 04:49:25,853 - INFO - Processing document law-journal_about.html
2025-10-16 04:49:25,866 - INFO - Finished converting document law-journal_about.html in 0.03 sec.
2025-10-16 04:49:25,960 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:25,972 - INFO - Going to convert document batch...
2025-10-16 04:49:25,973 - INFO - Processing document uclc_testimonials.html
2025-10-16 04:49:25,986 - INFO - Finished converting document uclc_testimonials.html in 0.03 sec.
2025-10-16 04:49:26,099 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:26,106 - INFO - Going to convert document batch...
2025-10-16 04:49:26,107 - INFO - Processing document Site_LandingGlobal.html
2025-10-16 04:49:26,123 - INFO - Finished converting document Site_LandingGlobal.html in 0.02 sec.
2025-10-16 04:49:26,194 - INFO -

  📦 Processing batch 6 (100 chunks)...
  ✅ Batch 6 added successfully


2025-10-16 04:49:31,059 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:31,071 - INFO - Going to convert document batch...
2025-10-16 04:49:31,072 - INFO - Processing document ubs_administration.html
2025-10-16 04:49:31,088 - INFO - Finished converting document ubs_administration.html in 0.03 sec.
2025-10-16 04:49:31,198 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:31,206 - INFO - Going to convert document batch...
2025-10-16 04:49:31,207 - INFO - Processing document academics_uclc_news.html
2025-10-16 04:49:31,219 - INFO - Finished converting document academics_uclc_news.html in 0.02 sec.
2025-10-16 04:49:31,300 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:31,309 - INFO - Going to convert document batch...
2025-10-16 04:49:31,310 - INFO - Processing document fr_a-propos_emplois.html
2025-10-16 04:49:31,325 - INFO - Finished converting document fr_a-propos_emplois.html in 0.03 sec.
2025-10-16 04:49:31,403 

  📦 Processing batch 7 (100 chunks)...
  ✅ Batch 7 added successfully


2025-10-16 04:49:35,854 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:35,871 - INFO - Going to convert document batch...
2025-10-16 04:49:35,872 - INFO - Processing document business_hecmontreal.html
2025-10-16 04:49:35,888 - INFO - Finished converting document business_hecmontreal.html in 0.04 sec.
2025-10-16 04:49:35,997 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:36,003 - INFO - Going to convert document batch...
2025-10-16 04:49:36,004 - INFO - Processing document give-usek_contact-us.html
2025-10-16 04:49:36,015 - INFO - Finished converting document give-usek_contact-us.html in 0.02 sec.
2025-10-16 04:49:36,027 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:36,037 - INFO - Going to convert document batch...
2025-10-16 04:49:36,039 - INFO - Processing document uclc_ongoing-courses.html
2025-10-16 04:49:36,057 - INFO - Finished converting document uclc_ongoing-courses.html in 0.03 sec.
2025-10-16 04:49

  📦 Processing batch 8 (100 chunks)...
  ✅ Batch 8 added successfully


2025-10-16 04:49:40,755 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:40,769 - INFO - Going to convert document batch...
2025-10-16 04:49:40,770 - INFO - Processing document en_international_news.html
2025-10-16 04:49:40,782 - INFO - Finished converting document en_international_news.html in 0.03 sec.
2025-10-16 04:49:40,884 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:40,898 - INFO - Going to convert document batch...
2025-10-16 04:49:40,899 - INFO - Processing document fr_faculte-de-gestion.html
2025-10-16 04:49:40,911 - INFO - Finished converting document fr_faculte-de-gestion.html in 0.03 sec.
2025-10-16 04:49:40,989 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:40,997 - INFO - Going to convert document batch...
2025-10-16 04:49:40,997 - INFO - Processing document news_ai-workshop-2019.html
2025-10-16 04:49:41,010 - INFO - Finished converting document news_ai-workshop-2019.html in 0.02 sec.
2025-10-16

  📦 Processing batch 9 (100 chunks)...
  ✅ Batch 9 added successfully


2025-10-16 04:49:45,507 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:45,525 - INFO - Going to convert document batch...
2025-10-16 04:49:45,526 - INFO - Processing document fr_a-propos_classement.html
2025-10-16 04:49:45,542 - INFO - Finished converting document fr_a-propos_classement.html in 0.04 sec.
2025-10-16 04:49:45,629 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:45,637 - INFO - Going to convert document batch...
2025-10-16 04:49:45,637 - INFO - Processing document fr_vie-academique_ltec.html
2025-10-16 04:49:45,649 - INFO - Finished converting document fr_vie-academique_ltec.html in 0.02 sec.
2025-10-16 04:49:45,732 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:45,743 - INFO - Going to convert document batch...
2025-10-16 04:49:45,744 - INFO - Processing document library_english_ask-us.html
2025-10-16 04:49:45,752 - INFO - deleted item in tree at stack: (9, 2, 2) => #/texts/38
2025-10-16 04:49:45

  📦 Processing batch 10 (100 chunks)...
  ✅ Batch 10 added successfully


2025-10-16 04:49:50,458 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:50,468 - INFO - Going to convert document batch...
2025-10-16 04:49:50,469 - INFO - Processing document fr_actualites-et-medias.html
2025-10-16 04:49:50,481 - INFO - Finished converting document fr_actualites-et-medias.html in 0.02 sec.
2025-10-16 04:49:50,568 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:50,576 - INFO - Going to convert document batch...
2025-10-16 04:49:50,577 - INFO - Processing document fr_etudiants_reglements.html
2025-10-16 04:49:50,592 - INFO - Finished converting document fr_etudiants_reglements.html in 0.03 sec.
2025-10-16 04:49:50,672 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:50,681 - INFO - Going to convert document batch...
2025-10-16 04:49:50,681 - INFO - Processing document fr_university-catalogue.html
2025-10-16 04:49:50,693 - INFO - Finished converting document fr_university-catalogue.html in 0.02 sec

  📦 Processing batch 11 (100 chunks)...
  ✅ Batch 11 added successfully


2025-10-16 04:49:55,125 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:55,137 - INFO - Going to convert document batch...
2025-10-16 04:49:55,138 - INFO - Processing document en_students_registration.html
2025-10-16 04:49:55,153 - INFO - Finished converting document en_students_registration.html in 0.03 sec.
2025-10-16 04:49:55,240 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:55,250 - INFO - Going to convert document batch...
2025-10-16 04:49:55,250 - INFO - Processing document faculty-of-law_colloquim.html
2025-10-16 04:49:55,264 - INFO - Finished converting document faculty-of-law_colloquim.html in 0.02 sec.
2025-10-16 04:49:55,374 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:49:55,392 - INFO - Going to convert document batch...
2025-10-16 04:49:55,392 - INFO - Processing document fr_a-propos_usek-en-bref.html
2025-10-16 04:49:55,407 - INFO - deleted item in tree at stack: (6, 1, 29) => #/texts/133
2025-10

  📦 Processing batch 12 (100 chunks)...
  ✅ Batch 12 added successfully


2025-10-16 04:50:00,799 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:00,812 - INFO - Going to convert document batch...
2025-10-16 04:50:00,812 - INFO - Processing document about-usek_one-step-ahead.html
2025-10-16 04:50:00,826 - INFO - Finished converting document about-usek_one-step-ahead.html in 0.03 sec.
2025-10-16 04:50:00,921 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:00,935 - INFO - Going to convert document batch...
2025-10-16 04:50:00,936 - INFO - Processing document academics_ltec_activities.html
2025-10-16 04:50:00,953 - INFO - Finished converting document academics_ltec_activities.html in 0.03 sec.
2025-10-16 04:50:01,060 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:01,068 - INFO - Going to convert document batch...
2025-10-16 04:50:01,069 - INFO - Processing document en_about-usek_usek-bylaws.html
2025-10-16 04:50:01,082 - INFO - Finished converting document en_about-usek_usek-bylaws.html

  📦 Processing batch 13 (100 chunks)...
  ✅ Batch 13 added successfully


2025-10-16 04:50:05,766 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:05,779 - INFO - Going to convert document batch...
2025-10-16 04:50:05,779 - INFO - Processing document library_english_fact-book.html
2025-10-16 04:50:05,791 - INFO - Finished converting document library_english_fact-book.html in 0.03 sec.
2025-10-16 04:50:05,843 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:05,851 - INFO - Going to convert document batch...
2025-10-16 04:50:05,852 - INFO - Processing document academics_freshman-program.html
2025-10-16 04:50:05,863 - INFO - Finished converting document academics_freshman-program.html in 0.02 sec.
2025-10-16 04:50:05,943 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:05,952 - INFO - Going to convert document batch...
2025-10-16 04:50:05,952 - INFO - Processing document academics_ltec_past-events.html
2025-10-16 04:50:05,968 - INFO - Finished converting document academics_ltec_past-events.

  📦 Processing batch 14 (100 chunks)...
  ✅ Batch 14 added successfully


2025-10-16 04:50:10,348 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:10,359 - INFO - Going to convert document batch...
2025-10-16 04:50:10,359 - INFO - Processing document international_ajyal-program.html
2025-10-16 04:50:10,380 - INFO - Finished converting document international_ajyal-program.html in 0.03 sec.
2025-10-16 04:50:10,555 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:10,563 - INFO - Going to convert document batch...
2025-10-16 04:50:10,564 - INFO - Processing document law-journal_editorial-board.html
2025-10-16 04:50:10,580 - INFO - Finished converting document law-journal_editorial-board.html in 0.03 sec.
2025-10-16 04:50:10,694 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:10,702 - INFO - Going to convert document batch...
2025-10-16 04:50:10,702 - INFO - Processing document library_english_fine-appeal.html
2025-10-16 04:50:10,712 - INFO - Finished converting document library_english_fine

  📦 Processing batch 15 (100 chunks)...
  ✅ Batch 15 added successfully


2025-10-16 04:50:15,274 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:15,283 - INFO - Going to convert document batch...
2025-10-16 04:50:15,284 - INFO - Processing document news_hike-in-their-footsteps.html
2025-10-16 04:50:15,295 - INFO - Finished converting document news_hike-in-their-footsteps.html in 0.02 sec.
2025-10-16 04:50:15,371 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:15,381 - INFO - Going to convert document batch...
2025-10-16 04:50:15,382 - INFO - Processing document rcmme_researchers_jean-akiki.html
2025-10-16 04:50:15,392 - INFO - Finished converting document rcmme_researchers_jean-akiki.html in 0.02 sec.
2025-10-16 04:50:15,453 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:15,462 - INFO - Going to convert document batch...
2025-10-16 04:50:15,463 - INFO - Processing document academics_ltec_administration.html
2025-10-16 04:50:15,481 - INFO - Finished converting document academics_ltec

  📦 Processing batch 16 (100 chunks)...
  ✅ Batch 16 added successfully


2025-10-16 04:50:20,113 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:20,132 - INFO - Going to convert document batch...
2025-10-16 04:50:20,133 - INFO - Processing document en_about-usek_university-facts.html
2025-10-16 04:50:20,160 - INFO - Finished converting document en_about-usek_university-facts.html in 0.05 sec.
2025-10-16 04:50:20,455 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:20,464 - INFO - Going to convert document batch...
2025-10-16 04:50:20,464 - INFO - Processing document en_about-usek_usek-at-a-glance.html
2025-10-16 04:50:20,478 - INFO - Finished converting document en_about-usek_usek-at-a-glance.html in 0.02 sec.
2025-10-16 04:50:20,569 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:20,954 - INFO - Going to convert document batch...
2025-10-16 04:50:20,955 - INFO - Processing document en_law_center-for-human-rights.html
2025-10-16 04:50:20,968 - INFO - Finished converting document en_la

  📦 Processing batch 17 (100 chunks)...
  ✅ Batch 17 added successfully


2025-10-16 04:50:25,885 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:25,896 - INFO - Going to convert document batch...
2025-10-16 04:50:25,896 - INFO - Processing document library_english_donors-gallery.html
2025-10-16 04:50:25,908 - INFO - Finished converting document library_english_donors-gallery.html in 0.02 sec.
2025-10-16 04:50:25,959 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:25,967 - INFO - Going to convert document batch...
2025-10-16 04:50:25,968 - INFO - Processing document news_academic-theology-seminar.html
2025-10-16 04:50:25,978 - INFO - Finished converting document news_academic-theology-seminar.html in 0.02 sec.
2025-10-16 04:50:26,058 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:26,067 - INFO - Going to convert document batch...
2025-10-16 04:50:26,068 - INFO - Processing document news_exhibition-peace-by-piece.html
2025-10-16 04:50:26,082 - INFO - Finished converting document news_

  📦 Processing batch 18 (100 chunks)...
  ✅ Batch 18 added successfully


2025-10-16 04:50:30,914 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:30,931 - INFO - Going to convert document batch...
2025-10-16 04:50:30,931 - INFO - Processing document en_about-usek_former-presidents.html
2025-10-16 04:50:30,948 - INFO - Finished converting document en_about-usek_former-presidents.html in 0.03 sec.
2025-10-16 04:50:31,064 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:31,075 - INFO - Going to convert document batch...
2025-10-16 04:50:31,075 - INFO - Processing document en_about-usek_fundamental-texts.html
2025-10-16 04:50:31,089 - INFO - Finished converting document en_about-usek_fundamental-texts.html in 0.03 sec.
2025-10-16 04:50:31,189 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:31,198 - INFO - Going to convert document batch...
2025-10-16 04:50:31,199 - INFO - Processing document en_news-and-media_announcements.html
2025-10-16 04:50:31,217 - INFO - Finished converting document 

  📦 Processing batch 19 (100 chunks)...
  ✅ Batch 19 added successfully


2025-10-16 04:50:35,861 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:35,874 - INFO - Going to convert document batch...
2025-10-16 04:50:35,874 - INFO - Processing document library_news_ifpo-training-2018.html
2025-10-16 04:50:35,883 - INFO - Finished converting document library_news_ifpo-training-2018.html in 0.02 sec.
2025-10-16 04:50:35,898 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:35,910 - INFO - Going to convert document batch...
2025-10-16 04:50:35,910 - INFO - Processing document speeches-father-talal-al-hachem.html
2025-10-16 04:50:35,935 - INFO - Finished converting document speeches-father-talal-al-hachem.html in 0.04 sec.
2025-10-16 04:50:36,100 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:36,109 - INFO - Going to convert document batch...
2025-10-16 04:50:36,110 - INFO - Processing document about-usek_provost_meet-the-team.html
2025-10-16 04:50:36,124 - INFO - Finished converting document

  📦 Processing batch 20 (100 chunks)...
  ✅ Batch 20 added successfully


2025-10-16 04:50:40,884 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:40,896 - INFO - Going to convert document batch...
2025-10-16 04:50:40,897 - INFO - Processing document en_about-usek_social-development.html
2025-10-16 04:50:40,918 - INFO - Finished converting document en_about-usek_social-development.html in 0.03 sec.
2025-10-16 04:50:41,056 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:41,073 - INFO - Going to convert document batch...
2025-10-16 04:50:41,074 - INFO - Processing document en_about-usek_strategic-planning.html
2025-10-16 04:50:41,089 - INFO - Finished converting document en_about-usek_strategic-planning.html in 0.03 sec.
2025-10-16 04:50:41,214 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:41,223 - INFO - Going to convert document batch...
2025-10-16 04:50:41,224 - INFO - Processing document en_board-of-trustees_elias-ayoub.html
2025-10-16 04:50:41,244 - INFO - Finished converting docu

  📦 Processing batch 21 (100 chunks)...
  ✅ Batch 21 added successfully


2025-10-16 04:50:46,031 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:46,042 - INFO - Going to convert document batch...
2025-10-16 04:50:46,043 - INFO - Processing document library_english_hidden-treasures.html
2025-10-16 04:50:46,053 - INFO - Finished converting document library_english_hidden-treasures.html in 0.02 sec.
2025-10-16 04:50:46,091 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:46,099 - INFO - Going to convert document batch...
2025-10-16 04:50:46,099 - INFO - Processing document news_pitch-perfect-ubs-hackathon.html
2025-10-16 04:50:46,112 - INFO - Finished converting document news_pitch-perfect-ubs-hackathon.html in 0.02 sec.
2025-10-16 04:50:46,192 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:46,199 - INFO - Going to convert document batch...
2025-10-16 04:50:46,200 - INFO - Processing document news_workshop-strategic-planning.html
2025-10-16 04:50:46,210 - INFO - Finished converting docu

  📦 Processing batch 22 (100 chunks)...
  ✅ Batch 22 added successfully


2025-10-16 04:50:51,112 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:51,125 - INFO - Going to convert document batch...
2025-10-16 04:50:51,126 - INFO - Processing document fr_etudiants_pourquoi-choisi-usek.html
2025-10-16 04:50:51,140 - INFO - Finished converting document fr_etudiants_pourquoi-choisi-usek.html in 0.03 sec.
2025-10-16 04:50:51,243 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:51,253 - INFO - Going to convert document batch...
2025-10-16 04:50:51,254 - INFO - Processing document fr_vie-academique_ltec_activities.html
2025-10-16 04:50:51,266 - INFO - Finished converting document fr_vie-academique_ltec_activities.html in 0.02 sec.
2025-10-16 04:50:51,340 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:51,351 - INFO - Going to convert document batch...
2025-10-16 04:50:51,351 - INFO - Processing document library_english_citation-software.html
2025-10-16 04:50:51,360 - INFO - Finished converting

  📦 Processing batch 23 (100 chunks)...
  ✅ Batch 23 added successfully


2025-10-16 04:50:56,233 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:50:56,243 - INFO - Going to convert document batch...
2025-10-16 04:50:56,244 - INFO - Processing document fr_actualites-et-medias_calendrier.html
2025-10-16 04:50:56,253 - INFO - deleted item in tree at stack: (6, 1, 4) => #/texts/97
2025-10-16 04:50:56,255 - INFO - deleted item in tree at stack: (6, 1, 4) => #/texts/97
2025-10-16 04:50:56,258 - INFO - deleted item in tree at stack: (6, 1, 4) => #/texts/97
2025-10-16 04:50:56,261 - INFO - deleted item in tree at stack: (6, 1, 3) => #/texts/97
2025-10-16 04:50:56,263 - INFO - deleted item in tree at stack: (6, 1, 3) => #/texts/97
2025-10-16 04:50:56,265 - INFO - deleted item in tree at stack: (6, 1, 3) => #/texts/97
2025-10-16 04:50:56,268 - INFO - deleted item in tree at stack: (6, 1, 3) => #/texts/97
2025-10-16 04:50:56,270 - INFO - deleted item in tree at stack: (6, 1, 3) => #/texts/97
2025-10-16 04:50:56,273 - INFO - deleted item in tree a

  📦 Processing batch 24 (100 chunks)...
  ✅ Batch 24 added successfully


2025-10-16 04:51:01,344 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:01,355 - INFO - Going to convert document batch...
2025-10-16 04:51:01,356 - INFO - Processing document library_english_special-collections.html
2025-10-16 04:51:01,365 - INFO - Finished converting document library_english_special-collections.html in 0.02 sec.
2025-10-16 04:51:01,383 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:01,390 - INFO - Going to convert document batch...
2025-10-16 04:51:01,390 - INFO - Processing document news_the-seventh-patriarchal-letter.html
2025-10-16 04:51:01,401 - INFO - Finished converting document news_the-seventh-patriarchal-letter.html in 0.02 sec.
2025-10-16 04:51:01,482 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:01,499 - INFO - Going to convert document batch...
2025-10-16 04:51:01,500 - INFO - Processing document president_advisors-to-the-president.html
2025-10-16 04:51:01,524 - INFO - Finished 

  📦 Processing batch 25 (100 chunks)...
  ✅ Batch 25 added successfully


2025-10-16 04:51:05,947 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:05,962 - INFO - Going to convert document batch...
2025-10-16 04:51:05,962 - INFO - Processing document en_administration_presidents-council.html
2025-10-16 04:51:05,979 - INFO - deleted item in tree at stack: (6, 1, 1) => #/texts/167
2025-10-16 04:51:05,985 - INFO - Finished converting document en_administration_presidents-council.html in 0.04 sec.
2025-10-16 04:51:06,146 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:06,159 - INFO - Going to convert document batch...
2025-10-16 04:51:06,160 - INFO - Processing document en_board-of-trustees_fr-hady-mahfouz.html
2025-10-16 04:51:06,174 - INFO - Finished converting document en_board-of-trustees_fr-hady-mahfouz.html in 0.03 sec.
2025-10-16 04:51:06,278 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:06,287 - INFO - Going to convert document batch...
2025-10-16 04:51:06,287 - INFO - Processing

  📦 Processing batch 26 (100 chunks)...
  ✅ Batch 26 added successfully


2025-10-16 04:51:10,676 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:10,692 - INFO - Going to convert document batch...
2025-10-16 04:51:10,692 - INFO - Processing document photo-gallery_christmas-concert-2019.html
2025-10-16 04:51:10,706 - INFO - Finished converting document photo-gallery_christmas-concert-2019.html in 0.03 sec.
2025-10-16 04:51:10,797 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:10,809 - INFO - Going to convert document batch...
2025-10-16 04:51:10,810 - INFO - Processing document uclc_partnerships-and-collaborations.html
2025-10-16 04:51:10,821 - INFO - Finished converting document uclc_partnerships-and-collaborations.html in 0.03 sec.
2025-10-16 04:51:10,934 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:10,946 - INFO - Going to convert document batch...
2025-10-16 04:51:10,947 - INFO - Processing document about-usek_governance_provost-council.html
2025-10-16 04:51:10,968 - INFO - Fin

  📦 Processing batch 27 (100 chunks)...
  ✅ Batch 27 added successfully


2025-10-16 04:51:15,247 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:15,264 - INFO - Going to convert document batch...
2025-10-16 04:51:15,264 - INFO - Processing document uclc_international-summer-school-2025.html
2025-10-16 04:51:15,277 - INFO - Finished converting document uclc_international-summer-school-2025.html in 0.03 sec.
2025-10-16 04:51:15,374 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:15,384 - INFO - Going to convert document batch...
2025-10-16 04:51:15,384 - INFO - Processing document academics_faculty-of-arts-and-sciences.html
2025-10-16 04:51:15,398 - INFO - Finished converting document academics_faculty-of-arts-and-sciences.html in 0.02 sec.
2025-10-16 04:51:15,488 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:15,496 - INFO - Going to convert document batch...
2025-10-16 04:51:15,497 - INFO - Processing document academics_freshman-program_credit-load.html
2025-10-16 04:51:15,510 - INF

  📦 Processing batch 28 (100 chunks)...
  ✅ Batch 28 added successfully


2025-10-16 04:51:19,954 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:19,962 - INFO - Going to convert document batch...
2025-10-16 04:51:19,963 - INFO - Processing document en_admission_for-undergraduate-studies.html
2025-10-16 04:51:19,975 - INFO - Finished converting document en_admission_for-undergraduate-studies.html in 0.02 sec.
2025-10-16 04:51:20,088 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:20,099 - INFO - Going to convert document batch...
2025-10-16 04:51:20,100 - INFO - Processing document en_board-of-trustees_joseph-saliba-phd.html
2025-10-16 04:51:20,119 - INFO - Finished converting document en_board-of-trustees_joseph-saliba-phd.html in 0.03 sec.
2025-10-16 04:51:20,234 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:20,242 - INFO - Going to convert document batch...
2025-10-16 04:51:20,243 - INFO - Processing document en_contact_regional-university-centers.html
2025-10-16 04:51:20,254 - I

  📦 Processing batch 29 (100 chunks)...
  ✅ Batch 29 added successfully


2025-10-16 04:51:24,437 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:24,446 - INFO - Going to convert document batch...
2025-10-16 04:51:24,447 - INFO - Processing document fr_vie-academique_ltec_e-taleb-project.html
2025-10-16 04:51:24,463 - INFO - Finished converting document fr_vie-academique_ltec_e-taleb-project.html in 0.03 sec.
2025-10-16 04:51:24,555 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:24,581 - INFO - Going to convert document batch...
2025-10-16 04:51:24,582 - INFO - Processing document library_english_exhibition-application.html
2025-10-16 04:51:24,593 - INFO - deleted item in tree at stack: (9, 2, 3) => #/texts/55
2025-10-16 04:51:24,595 - INFO - deleted item in tree at stack: (9, 2, 6) => #/texts/56
2025-10-16 04:51:24,597 - INFO - deleted item in tree at stack: (9, 2, 6) => #/texts/56
2025-10-16 04:51:24,599 - INFO - deleted item in tree at stack: (9, 2, 6) => #/texts/56
2025-10-16 04:51:24,601 - INFO - delete

  📦 Processing batch 30 (100 chunks)...
  ✅ Batch 30 added successfully


2025-10-16 04:51:30,652 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:30,667 - INFO - Going to convert document batch...
2025-10-16 04:51:30,668 - INFO - Processing document academics_pontifical-school-of-theology.html
2025-10-16 04:51:30,690 - INFO - Finished converting document academics_pontifical-school-of-theology.html in 0.04 sec.
2025-10-16 04:51:30,784 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:30,804 - INFO - Going to convert document batch...
2025-10-16 04:51:30,804 - INFO - Processing document en_about-usek_faculty-affairs_directory.html
2025-10-16 04:51:30,833 - INFO - Finished converting document en_about-usek_faculty-affairs_directory.html in 0.05 sec.
2025-10-16 04:51:30,982 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:30,990 - INFO - Going to convert document batch...
2025-10-16 04:51:30,990 - INFO - Processing document en_board-of-trustees_thomas-sabbagh-phd.html
2025-10-16 04:51:31,00

  📦 Processing batch 31 (100 chunks)...
  ✅ Batch 31 added successfully


2025-10-16 04:51:35,147 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:35,162 - INFO - Going to convert document batch...
2025-10-16 04:51:35,163 - INFO - Processing document library_english_course-reserves-request.html
2025-10-16 04:51:35,172 - INFO - Finished converting document library_english_course-reserves-request.html in 0.03 sec.
2025-10-16 04:51:35,199 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:35,208 - INFO - Going to convert document batch...
2025-10-16 04:51:35,209 - INFO - Processing document library_english_reserve-a-library-tour2.html
2025-10-16 04:51:35,220 - INFO - Finished converting document library_english_reserve-a-library-tour2.html in 0.02 sec.
2025-10-16 04:51:35,242 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:35,249 - INFO - Going to convert document batch...
2025-10-16 04:51:35,250 - INFO - Processing document news_the-usek-honors-gala-2025-new-york.html
2025-10-16 04:51:35,26

  📦 Processing batch 32 (100 chunks)...
  ✅ Batch 32 added successfully


2025-10-16 04:51:39,590 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:39,605 - INFO - Going to convert document batch...
2025-10-16 04:51:39,606 - INFO - Processing document en_faculty-of-letters_thematic-community.html
2025-10-16 04:51:39,618 - INFO - Finished converting document en_faculty-of-letters_thematic-community.html in 0.03 sec.
2025-10-16 04:51:39,701 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:39,709 - INFO - Going to convert document batch...
2025-10-16 04:51:39,710 - INFO - Processing document faculte-de-musique-et-des-arts-sceniques.html
2025-10-16 04:51:39,720 - INFO - Finished converting document faculte-de-musique-et-des-arts-sceniques.html in 0.02 sec.
2025-10-16 04:51:39,789 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:39,798 - INFO - Going to convert document batch...
2025-10-16 04:51:39,798 - INFO - Processing document news_law-in-the-movies-absence-of-malice.html
2025-10-16 04:51:

  📦 Processing batch 33 (100 chunks)...
  ✅ Batch 33 added successfully


2025-10-16 04:51:44,714 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:44,725 - INFO - Going to convert document batch...
2025-10-16 04:51:44,726 - INFO - Processing document en_students_usek-writing-center_workshops.html
2025-10-16 04:51:44,738 - INFO - Finished converting document en_students_usek-writing-center_workshops.html in 0.04 sec.
2025-10-16 04:51:44,838 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:44,852 - INFO - Going to convert document batch...
2025-10-16 04:51:44,853 - INFO - Processing document faculty-of-engineering_advisory-committee.html
2025-10-16 04:51:44,877 - INFO - Finished converting document faculty-of-engineering_advisory-committee.html in 0.04 sec.
2025-10-16 04:51:44,995 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:45,006 - INFO - Going to convert document batch...
2025-10-16 04:51:45,006 - INFO - Processing document fr_etudiants_bourses-et-aides-financieres.html
2025-10-16 0

  📦 Processing batch 34 (100 chunks)...
  ✅ Batch 34 added successfully


2025-10-16 04:51:49,735 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:49,746 - INFO - Going to convert document batch...
2025-10-16 04:51:49,746 - INFO - Processing document en_board-of-trustees_mr-marwan-kheireddine.html
2025-10-16 04:51:49,762 - INFO - Finished converting document en_board-of-trustees_mr-marwan-kheireddine.html in 0.03 sec.
2025-10-16 04:51:49,900 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:49,910 - INFO - Going to convert document batch...
2025-10-16 04:51:49,911 - INFO - Processing document en_faculty-of-music_fmus-academic-programs.html
2025-10-16 04:51:49,924 - INFO - Finished converting document en_faculty-of-music_fmus-academic-programs.html in 0.02 sec.
2025-10-16 04:51:50,012 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:50,025 - INFO - Going to convert document batch...
2025-10-16 04:51:50,025 - INFO - Processing document en_university-fees_undergraduate-studies-1.html
2025-10

  📦 Processing batch 35 (100 chunks)...
  ✅ Batch 35 added successfully


2025-10-16 04:51:55,399 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:55,413 - INFO - Going to convert document batch...
2025-10-16 04:51:55,413 - INFO - Processing document about-usek_provost_message-from-the-provost.html
2025-10-16 04:51:55,426 - INFO - Finished converting document about-usek_provost_message-from-the-provost.html in 0.03 sec.
2025-10-16 04:51:55,541 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:55,552 - INFO - Going to convert document batch...
2025-10-16 04:51:55,553 - INFO - Processing document academics_school-of-architecture-and-design.html
2025-10-16 04:51:55,565 - INFO - Finished converting document academics_school-of-architecture-and-design.html in 0.03 sec.
2025-10-16 04:51:55,647 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:51:55,656 - INFO - Going to convert document batch...
2025-10-16 04:51:55,656 - INFO - Processing document centre-de-formation-continue_uclc_activites.html
20

  📦 Processing batch 36 (100 chunks)...
  ✅ Batch 36 added successfully


2025-10-16 04:52:00,695 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:00,710 - INFO - Going to convert document batch...
2025-10-16 04:52:00,710 - INFO - Processing document fr_etudiants_inscription_formation-generale.html
2025-10-16 04:52:00,723 - INFO - Finished converting document fr_etudiants_inscription_formation-generale.html in 0.03 sec.
2025-10-16 04:52:00,802 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:00,809 - INFO - Going to convert document batch...
2025-10-16 04:52:00,810 - INFO - Processing document institut-superieur-des-sciences-infirmieres.html
2025-10-16 04:52:00,821 - INFO - Finished converting document institut-superieur-des-sciences-infirmieres.html in 0.02 sec.
2025-10-16 04:52:00,890 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:00,902 - INFO - Going to convert document batch...
2025-10-16 04:52:00,903 - INFO - Processing document international_global-engagement-certificate.html
20

  📦 Processing batch 37 (100 chunks)...
  ✅ Batch 37 added successfully


2025-10-16 04:52:05,490 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:05,505 - INFO - Going to convert document batch...
2025-10-16 04:52:05,506 - INFO - Processing document news_the-syriac-poetic-and-musical-heritage.html
2025-10-16 04:52:05,519 - INFO - Finished converting document news_the-syriac-poetic-and-musical-heritage.html in 0.03 sec.
2025-10-16 04:52:05,610 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:05,618 - INFO - Going to convert document batch...
2025-10-16 04:52:05,619 - INFO - Processing document vie-academique_ltec_diversity-and-inclusion.html
2025-10-16 04:52:05,631 - INFO - Finished converting document vie-academique_ltec_diversity-and-inclusion.html in 0.02 sec.
2025-10-16 04:52:05,708 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:05,716 - INFO - Going to convert document batch...
2025-10-16 04:52:05,717 - INFO - Processing document about-usek_board-of-trustees_sandra-affenito.html
2

  📦 Processing batch 38 (100 chunks)...
  ✅ Batch 38 added successfully


2025-10-16 04:52:10,383 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:10,400 - INFO - Going to convert document batch...
2025-10-16 04:52:10,400 - INFO - Processing document en_doctoral-college_notice-of-public-defense.html
2025-10-16 04:52:10,417 - INFO - Finished converting document en_doctoral-college_notice-of-public-defense.html in 0.03 sec.
2025-10-16 04:52:10,510 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:10,523 - INFO - Going to convert document batch...
2025-10-16 04:52:10,524 - INFO - Processing document en_international_news_call-for-mobility-usek.html
2025-10-16 04:52:10,534 - INFO - deleted item in tree at stack: (6, 1, 31) => #/texts/141
2025-10-16 04:52:10,537 - INFO - deleted item in tree at stack: (6, 1, 31) => #/texts/141
2025-10-16 04:52:10,539 - INFO - deleted item in tree at stack: (6, 1, 31) => #/texts/141
2025-10-16 04:52:10,541 - INFO - deleted item in tree at stack: (6, 1, 31) => #/texts/141
2025-10-16 04

  📦 Processing batch 39 (100 chunks)...
  ✅ Batch 39 added successfully


2025-10-16 04:52:15,736 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:15,754 - INFO - Going to convert document batch...
2025-10-16 04:52:15,755 - INFO - Processing document news_teaching-of-theology-in-the-middle-east.html
2025-10-16 04:52:15,771 - INFO - Finished converting document news_teaching-of-theology-in-the-middle-east.html in 0.07 sec.
2025-10-16 04:52:15,852 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:15,861 - INFO - Going to convert document batch...
2025-10-16 04:52:15,862 - INFO - Processing document photo-gallery_independence-day-ceremony-2019.html
2025-10-16 04:52:15,878 - INFO - Finished converting document photo-gallery_independence-day-ceremony-2019.html in 0.03 sec.
2025-10-16 04:52:15,960 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:15,968 - INFO - Going to convert document batch...
2025-10-16 04:52:15,968 - INFO - Processing document academics_uclc_photo_free-information-session.h

  📦 Processing batch 40 (100 chunks)...
  ✅ Batch 40 added successfully


2025-10-16 04:52:20,576 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:20,587 - INFO - Going to convert document batch...
2025-10-16 04:52:20,588 - INFO - Processing document fr_contact_administrative-and-technical-units.html
2025-10-16 04:52:20,609 - INFO - Finished converting document fr_contact_administrative-and-technical-units.html in 0.03 sec.
2025-10-16 04:52:20,717 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:20,726 - INFO - Going to convert document batch...
2025-10-16 04:52:20,727 - INFO - Processing document law-journal_publications_revue-juridique-2020.html
2025-10-16 04:52:20,746 - INFO - Finished converting document law-journal_publications_revue-juridique-2020.html in 0.03 sec.
2025-10-16 04:52:20,847 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:20,856 - INFO - Going to convert document batch...
2025-10-16 04:52:20,856 - INFO - Processing document news_meeting-with-the-atime-general-secreta

  📦 Processing batch 41 (100 chunks)...
  ✅ Batch 41 added successfully


2025-10-16 04:52:25,702 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:25,718 - INFO - Going to convert document batch...
2025-10-16 04:52:25,718 - INFO - Processing document en_board-of-trustees_william-bill-clements-phd.html
2025-10-16 04:52:25,733 - INFO - Finished converting document en_board-of-trustees_william-bill-clements-phd.html in 0.03 sec.
2025-10-16 04:52:25,826 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:25,835 - INFO - Going to convert document batch...
2025-10-16 04:52:25,836 - INFO - Processing document en_students_usek-writing-center_become-a-tutor.html
2025-10-16 04:52:25,849 - INFO - Finished converting document en_students_usek-writing-center_become-a-tutor.html in 0.02 sec.
2025-10-16 04:52:25,956 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:25,967 - INFO - Going to convert document batch...
2025-10-16 04:52:25,968 - INFO - Processing document fr_etudiants_bureaux-de-services-aux-et

  📦 Processing batch 42 (100 chunks)...
  ✅ Batch 42 added successfully


2025-10-16 04:52:30,724 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:30,739 - INFO - Going to convert document batch...
2025-10-16 04:52:30,740 - INFO - Processing document about-usek_crisis-management-committee_policies.html
2025-10-16 04:52:30,754 - INFO - Finished converting document about-usek_crisis-management-committee_policies.html in 0.04 sec.
2025-10-16 04:52:30,851 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:30,865 - INFO - Going to convert document batch...
2025-10-16 04:52:30,866 - INFO - Processing document academics_faculty-of-arts-and-sciences_overview.html
2025-10-16 04:52:30,884 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_overview.html in 0.03 sec.
2025-10-16 04:52:31,012 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:31,021 - INFO - Going to convert document batch...
2025-10-16 04:52:31,022 - INFO - Processing document academics_journal-of-literature-and

  📦 Processing batch 43 (100 chunks)...
  ✅ Batch 43 added successfully


2025-10-16 04:52:35,598 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:35,612 - INFO - Going to convert document batch...
2025-10-16 04:52:35,613 - INFO - Processing document fr_actualites-et-medias_evenements-et-activites.html
2025-10-16 04:52:35,628 - INFO - Finished converting document fr_actualites-et-medias_evenements-et-activites.html in 0.03 sec.
2025-10-16 04:52:35,722 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:35,732 - INFO - Going to convert document batch...
2025-10-16 04:52:35,732 - INFO - Processing document library_news_opening-of-redha-tabets-exhibition.html
2025-10-16 04:52:35,742 - INFO - Finished converting document library_news_opening-of-redha-tabets-exhibition.html in 0.02 sec.
2025-10-16 04:52:35,758 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:35,769 - INFO - Going to convert document batch...
2025-10-16 04:52:35,770 - INFO - Processing document news_gilbert-rahbani-sings-mohammed

  📦 Processing batch 44 (100 chunks)...
  ✅ Batch 44 added successfully


2025-10-16 04:52:40,974 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:40,989 - INFO - Going to convert document batch...
2025-10-16 04:52:40,990 - INFO - Processing document law-journal_publications_revue-juridique-usek-23.html
2025-10-16 04:52:41,004 - INFO - Finished converting document law-journal_publications_revue-juridique-usek-23.html in 0.03 sec.
2025-10-16 04:52:41,112 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:41,120 - INFO - Going to convert document batch...
2025-10-16 04:52:41,121 - INFO - Processing document library_english_special-collections_finding-aids.html
2025-10-16 04:52:41,130 - INFO - Finished converting document library_english_special-collections_finding-aids.html in 0.02 sec.
2025-10-16 04:52:41,170 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:41,178 - INFO - Going to convert document batch...
2025-10-16 04:52:41,178 - INFO - Processing document photo-gallery_official-visit-to

  📦 Processing batch 45 (100 chunks)...
  ✅ Batch 45 added successfully


2025-10-16 04:52:45,937 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:45,953 - INFO - Going to convert document batch...
2025-10-16 04:52:45,953 - INFO - Processing document school-of-medicine-and-medical-sciences_overview.html
2025-10-16 04:52:45,969 - INFO - Finished converting document school-of-medicine-and-medical-sciences_overview.html in 0.03 sec.
2025-10-16 04:52:46,089 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:46,097 - INFO - Going to convert document batch...
2025-10-16 04:52:46,097 - INFO - Processing document en_about-usek_faculty-affairs_directory_badih-baz.html
2025-10-16 04:52:46,110 - INFO - Finished converting document en_about-usek_faculty-affairs_directory_badih-baz.html in 0.02 sec.
2025-10-16 04:52:46,200 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:46,207 - INFO - Going to convert document batch...
2025-10-16 04:52:46,208 - INFO - Processing document en_academics_higher-institute

  📦 Processing batch 46 (100 chunks)...
  ✅ Batch 46 added successfully


2025-10-16 04:52:50,676 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:50,689 - INFO - Going to convert document batch...
2025-10-16 04:52:50,690 - INFO - Processing document rcmme_photos_coexistence-in-lebanon-and-in-canada.html
2025-10-16 04:52:50,698 - INFO - Finished converting document rcmme_photos_coexistence-in-lebanon-and-in-canada.html in 0.02 sec.
2025-10-16 04:52:50,725 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:50,737 - INFO - Going to convert document batch...
2025-10-16 04:52:50,737 - INFO - Processing document school-of-medicine-and-medical-sciences_admission.html
2025-10-16 04:52:50,750 - INFO - deleted item in tree at stack: (6, 5, 23) => #/texts/135
2025-10-16 04:52:50,753 - INFO - deleted item in tree at stack: (6, 5, 23) => #/texts/135
2025-10-16 04:52:50,756 - INFO - deleted item in tree at stack: (6, 5, 23) => #/texts/135
2025-10-16 04:52:50,758 - INFO - deleted item in tree at stack: (6, 5, 23) => #/texts/13

  📦 Processing batch 47 (100 chunks)...
  ✅ Batch 47 added successfully


2025-10-16 04:52:55,331 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:55,346 - INFO - Going to convert document batch...
2025-10-16 04:52:55,347 - INFO - Processing document en_about-usek_faculty-affairs_directory_ali-rachini.html
2025-10-16 04:52:55,364 - INFO - Finished converting document en_about-usek_faculty-affairs_directory_ali-rachini.html in 0.03 sec.
2025-10-16 04:52:55,449 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:55,458 - INFO - Going to convert document batch...
2025-10-16 04:52:55,458 - INFO - Processing document en_about-usek_faculty-affairs_directory_elias-farah.html
2025-10-16 04:52:55,480 - INFO - Finished converting document en_about-usek_faculty-affairs_directory_elias-farah.html in 0.03 sec.
2025-10-16 04:52:55,571 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:52:55,580 - INFO - Going to convert document batch...
2025-10-16 04:52:55,581 - INFO - Processing document en_about-usek_facul

  📦 Processing batch 48 (100 chunks)...
  ✅ Batch 48 added successfully


2025-10-16 04:53:00,071 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:00,088 - INFO - Going to convert document batch...
2025-10-16 04:53:00,089 - INFO - Processing document library_english_user-policies-and-guidelines_illdds.html
2025-10-16 04:53:00,098 - INFO - deleted item in tree at stack: (9, 2, 2, 1) => #/texts/61
2025-10-16 04:53:00,099 - INFO - deleted item in tree at stack: (9, 2, 2, 1) => #/texts/61
2025-10-16 04:53:00,101 - INFO - deleted item in tree at stack: (9, 2, 2, 1) => #/texts/61
2025-10-16 04:53:00,102 - INFO - deleted item in tree at stack: (9, 2, 2, 1) => #/texts/61
2025-10-16 04:53:00,104 - INFO - deleted item in tree at stack: (9, 2, 2, 1) => #/texts/61
2025-10-16 04:53:00,106 - INFO - deleted item in tree at stack: (9, 2, 2, 1) => #/texts/61
2025-10-16 04:53:00,107 - INFO - deleted item in tree at stack: (9, 2, 2, 1) => #/texts/61
2025-10-16 04:53:00,109 - INFO - deleted item in tree at stack: (9, 2, 2, 1) => #/texts/61
2025-10-16 04:

  📦 Processing batch 49 (100 chunks)...
  ✅ Batch 49 added successfully


2025-10-16 04:53:04,750 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:04,765 - INFO - Going to convert document batch...
2025-10-16 04:53:04,766 - INFO - Processing document en_about-usek_faculty-affairs_directory_elie-barakat.html
2025-10-16 04:53:04,779 - INFO - Finished converting document en_about-usek_faculty-affairs_directory_elie-barakat.html in 0.03 sec.
2025-10-16 04:53:04,881 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:04,892 - INFO - Going to convert document batch...
2025-10-16 04:53:04,892 - INFO - Processing document faculte-des-arts-humanites-et-sciences_accreditation.html
2025-10-16 04:53:04,910 - INFO - Finished converting document faculte-des-arts-humanites-et-sciences_accreditation.html in 0.03 sec.
2025-10-16 04:53:05,011 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:05,021 - INFO - Going to convert document batch...
2025-10-16 04:53:05,021 - INFO - Processing document news_roundtable

  📦 Processing batch 50 (100 chunks)...
  ✅ Batch 50 added successfully


2025-10-16 04:53:09,440 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:09,455 - INFO - Going to convert document batch...
2025-10-16 04:53:09,456 - INFO - Processing document en_about-usek_faculty-affairs_directory_chafic-saliba.html
2025-10-16 04:53:09,471 - INFO - Finished converting document en_about-usek_faculty-affairs_directory_chafic-saliba.html in 0.03 sec.
2025-10-16 04:53:09,570 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:09,579 - INFO - Going to convert document batch...
2025-10-16 04:53:09,580 - INFO - Processing document en_about-usek_faculty-affairs_directory_charbel-fares.html
2025-10-16 04:53:09,600 - INFO - Finished converting document en_about-usek_faculty-affairs_directory_charbel-fares.html in 0.03 sec.
2025-10-16 04:53:09,692 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:09,700 - INFO - Going to convert document batch...
2025-10-16 04:53:09,701 - INFO - Processing document en_about-us

  📦 Processing batch 51 (100 chunks)...
  ✅ Batch 51 added successfully


2025-10-16 04:53:14,060 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:14,072 - INFO - Going to convert document batch...
2025-10-16 04:53:14,073 - INFO - Processing document en_students_usek-writing-center_book-your-appointment.html
2025-10-16 04:53:14,085 - INFO - Finished converting document en_students_usek-writing-center_book-your-appointment.html in 0.03 sec.
2025-10-16 04:53:14,174 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:14,182 - INFO - Going to convert document batch...
2025-10-16 04:53:14,182 - INFO - Processing document library_english_request-a-library-instruction-session.html
2025-10-16 04:53:14,192 - INFO - Finished converting document library_english_request-a-library-instruction-session.html in 0.02 sec.
2025-10-16 04:53:14,216 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:14,223 - INFO - Going to convert document batch...
2025-10-16 04:53:14,224 - INFO - Processing document news_lectur

  📦 Processing batch 52 (100 chunks)...
  ✅ Batch 52 added successfully


2025-10-16 04:53:18,597 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:18,607 - INFO - Going to convert document batch...
2025-10-16 04:53:18,608 - INFO - Processing document en_admission_for-undergraduate-studies_admission-types.html
2025-10-16 04:53:18,623 - INFO - Finished converting document en_admission_for-undergraduate-studies_admission-types.html in 0.03 sec.
2025-10-16 04:53:18,727 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:18,735 - INFO - Going to convert document batch...
2025-10-16 04:53:18,736 - INFO - Processing document en_research_latin-american-studies-and-cultures-center.html
2025-10-16 04:53:18,751 - INFO - Finished converting document en_research_latin-american-studies-and-cultures-center.html in 0.02 sec.
2025-10-16 04:53:18,839 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:18,853 - INFO - Going to convert document batch...
2025-10-16 04:53:18,854 - INFO - Processing document faculte

  📦 Processing batch 53 (100 chunks)...
  ✅ Batch 53 added successfully


2025-10-16 04:53:23,191 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:23,202 - INFO - Going to convert document batch...
2025-10-16 04:53:23,203 - INFO - Processing document en_students_usek-writing-center_group-writing-workshops.html
2025-10-16 04:53:23,219 - INFO - Finished converting document en_students_usek-writing-center_group-writing-workshops.html in 0.03 sec.
2025-10-16 04:53:23,308 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:23,322 - INFO - Going to convert document batch...
2025-10-16 04:53:23,323 - INFO - Processing document about-usek_crisis-management-committee_cmc-subcommittees.html
2025-10-16 04:53:23,339 - INFO - Finished converting document about-usek_crisis-management-committee_cmc-subcommittees.html in 0.03 sec.
2025-10-16 04:53:23,437 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:23,446 - INFO - Going to convert document batch...
2025-10-16 04:53:23,447 - INFO - Processing document a

  📦 Processing batch 54 (100 chunks)...
  ✅ Batch 54 added successfully


2025-10-16 04:53:27,861 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:27,876 - INFO - Going to convert document batch...
2025-10-16 04:53:27,876 - INFO - Processing document en_about-usek_faculty-affairs_directory_edmond-bou-dagher.html
2025-10-16 04:53:27,889 - INFO - Finished converting document en_about-usek_faculty-affairs_directory_edmond-bou-dagher.html in 0.03 sec.
2025-10-16 04:53:27,982 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:27,990 - INFO - Going to convert document batch...
2025-10-16 04:53:27,990 - INFO - Processing document en_about-usek_faculty-affairs_directory_father-jean-akiki.html
2025-10-16 04:53:28,002 - INFO - Finished converting document en_about-usek_faculty-affairs_directory_father-jean-akiki.html in 0.02 sec.
2025-10-16 04:53:28,096 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:28,106 - INFO - Going to convert document batch...
2025-10-16 04:53:28,107 - INFO - Processing docu

  📦 Processing batch 55 (100 chunks)...
  ✅ Batch 55 added successfully


2025-10-16 04:53:32,833 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:32,842 - INFO - Going to convert document batch...
2025-10-16 04:53:32,842 - INFO - Processing document pontifical-school-of-theology_agreements-and-partnerships.html
2025-10-16 04:53:32,855 - INFO - Finished converting document pontifical-school-of-theology_agreements-and-partnerships.html in 0.03 sec.
2025-10-16 04:53:32,956 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:32,964 - INFO - Going to convert document batch...
2025-10-16 04:53:32,965 - INFO - Processing document school-of-engineering_advisory-committee_maroun-el-khoury.html
2025-10-16 04:53:32,977 - INFO - Finished converting document school-of-engineering_advisory-committee_maroun-el-khoury.html in 0.02 sec.
2025-10-16 04:53:33,076 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:33,084 - INFO - Going to convert document batch...
2025-10-16 04:53:33,084 - INFO - Processing docu

  📦 Processing batch 56 (100 chunks)...
  ✅ Batch 56 added successfully


2025-10-16 04:53:37,605 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:37,614 - INFO - Going to convert document batch...
2025-10-16 04:53:37,614 - INFO - Processing document news_roundtable-discussion-solidarity-through-volunteering.html
2025-10-16 04:53:37,626 - INFO - Finished converting document news_roundtable-discussion-solidarity-through-volunteering.html in 0.03 sec.
2025-10-16 04:53:37,710 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:37,719 - INFO - Going to convert document batch...
2025-10-16 04:53:37,720 - INFO - Processing document photo-gallery_commencement-ceremony-bachir-gemayel-academy.html
2025-10-16 04:53:37,732 - INFO - Finished converting document photo-gallery_commencement-ceremony-bachir-gemayel-academy.html in 0.02 sec.
2025-10-16 04:53:37,811 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:37,819 - INFO - Going to convert document batch...
2025-10-16 04:53:37,820 - INFO - Processing 

  📦 Processing batch 57 (100 chunks)...
  ✅ Batch 57 added successfully


2025-10-16 04:53:42,607 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:42,617 - INFO - Going to convert document batch...
2025-10-16 04:53:42,617 - INFO - Processing document news_3rd-congress-of-argentinian-writers-of-lebanese-origin.html
2025-10-16 04:53:42,638 - INFO - Finished converting document news_3rd-congress-of-argentinian-writers-of-lebanese-origin.html in 0.03 sec.
2025-10-16 04:53:42,723 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:42,733 - INFO - Going to convert document batch...
2025-10-16 04:53:42,733 - INFO - Processing document academics_faculty-of-arts-and-sciences_academic-publications.html
2025-10-16 04:53:42,746 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_academic-publications.html in 0.02 sec.
2025-10-16 04:53:42,841 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:42,849 - INFO - Going to convert document batch...
2025-10-16 04:53:42,850 - INFO - Proce

  📦 Processing batch 58 (100 chunks)...
  ✅ Batch 58 added successfully


2025-10-16 04:53:47,699 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:47,715 - INFO - Going to convert document batch...
2025-10-16 04:53:47,715 - INFO - Processing document fmus_department-of-performing-arts_master-in-performing-arts.html
2025-10-16 04:53:47,734 - INFO - Finished converting document fmus_department-of-performing-arts_master-in-performing-arts.html in 0.04 sec.
2025-10-16 04:53:47,884 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:47,899 - INFO - Going to convert document batch...
2025-10-16 04:53:47,900 - INFO - Processing document library_english_news_exhibition-lebanon-a-caricature-history.html
2025-10-16 04:53:47,908 - INFO - Finished converting document library_english_news_exhibition-lebanon-a-caricature-history.html in 0.03 sec.
2025-10-16 04:53:47,922 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:47,931 - INFO - Going to convert document batch...
2025-10-16 04:53:47,932 - INFO - Pro

  📦 Processing batch 59 (100 chunks)...
  ✅ Batch 59 added successfully


2025-10-16 04:53:52,732 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:52,744 - INFO - Going to convert document batch...
2025-10-16 04:53:52,745 - INFO - Processing document news_advanced-crisis-and-hostage-incident-management-training.html
2025-10-16 04:53:52,765 - INFO - Finished converting document news_advanced-crisis-and-hostage-incident-management-training.html in 0.03 sec.
2025-10-16 04:53:52,874 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:52,882 - INFO - Going to convert document batch...
2025-10-16 04:53:52,883 - INFO - Processing document news_the-11th-bible-days-at-the-pontifical-school-of-theology.html
2025-10-16 04:53:52,897 - INFO - Finished converting document news_the-11th-bible-days-at-the-pontifical-school-of-theology.html in 0.02 sec.
2025-10-16 04:53:52,988 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:53,014 - INFO - Going to convert document batch...
2025-10-16 04:53:53,015 - INFO -

  📦 Processing batch 60 (100 chunks)...
  ✅ Batch 60 added successfully


2025-10-16 04:53:57,634 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:57,651 - INFO - Going to convert document batch...
2025-10-16 04:53:57,651 - INFO - Processing document law-journal_publications_revue-juridique-usek-special-issue-n25.html
2025-10-16 04:53:57,688 - INFO - Finished converting document law-journal_publications_revue-juridique-usek-special-issue-n25.html in 0.05 sec.
2025-10-16 04:53:57,821 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:57,834 - INFO - Going to convert document batch...
2025-10-16 04:53:57,835 - INFO - Processing document news_carlos-fuentes-a-cultural-bridge-between-france-and-mexico.html
2025-10-16 04:53:57,847 - INFO - Finished converting document news_carlos-fuentes-a-cultural-bridge-between-france-and-mexico.html in 0.03 sec.
2025-10-16 04:53:57,945 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:53:57,956 - INFO - Going to convert document batch...
2025-10-16 04:53:57,957 

  📦 Processing batch 61 (100 chunks)...
  ✅ Batch 61 added successfully


2025-10-16 04:54:02,462 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:02,472 - INFO - Going to convert document batch...
2025-10-16 04:54:02,473 - INFO - Processing document news_ai-powered-business-education-an-academic-industry-dialogue.html
2025-10-16 04:54:02,482 - INFO - deleted item in tree at stack: (6, 1, 9) => #/texts/108
2025-10-16 04:54:02,486 - INFO - deleted item in tree at stack: (6, 1, 9) => #/texts/108
2025-10-16 04:54:02,495 - INFO - Finished converting document news_ai-powered-business-education-an-academic-industry-dialogue.html in 0.03 sec.
2025-10-16 04:54:02,611 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:02,620 - INFO - Going to convert document batch...
2025-10-16 04:54:02,621 - INFO - Processing document news_conference-legal-structuring-for-a-better-mental-well-being.html
2025-10-16 04:54:02,633 - INFO - Finished converting document news_conference-legal-structuring-for-a-better-mental-well-being.html in 

  📦 Processing batch 62 (100 chunks)...
  ✅ Batch 62 added successfully


2025-10-16 04:54:07,314 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:07,323 - INFO - Going to convert document batch...
2025-10-16 04:54:07,323 - INFO - Processing document a-propos_personnel-administratif-et-technique_mission-et-activites.html
2025-10-16 04:54:07,335 - INFO - Finished converting document a-propos_personnel-administratif-et-technique_mission-et-activites.html in 0.02 sec.
2025-10-16 04:54:07,415 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:07,424 - INFO - Going to convert document batch...
2025-10-16 04:54:07,424 - INFO - Processing document academics_faculty-of-arts-and-sciences_agreements-and-partnerships.html
2025-10-16 04:54:07,438 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_agreements-and-partnerships.html in 0.03 sec.
2025-10-16 04:54:07,548 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:07,557 - INFO - Going to convert document batch...
2025-10-16 0

  📦 Processing batch 63 (100 chunks)...
  ✅ Batch 63 added successfully


2025-10-16 04:54:12,215 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:12,226 - INFO - Going to convert document batch...
2025-10-16 04:54:12,227 - INFO - Processing document faculty-of-arts-and-sciences_academic-programs_department-of-biology.html
2025-10-16 04:54:12,239 - INFO - Finished converting document faculty-of-arts-and-sciences_academic-programs_department-of-biology.html in 0.02 sec.
2025-10-16 04:54:12,323 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:12,331 - INFO - Going to convert document batch...
2025-10-16 04:54:12,331 - INFO - Processing document news_inauguration-day-presidential-investiture-of-fr-joseph-moukarzel.html
2025-10-16 04:54:12,341 - INFO - Finished converting document news_inauguration-day-presidential-investiture-of-fr-joseph-moukarzel.html in 0.02 sec.
2025-10-16 04:54:12,417 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:12,426 - INFO - Going to convert document batch...
20

  📦 Processing batch 64 (100 chunks)...
  ✅ Batch 64 added successfully


2025-10-16 04:54:16,662 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:16,676 - INFO - Going to convert document batch...
2025-10-16 04:54:16,677 - INFO - Processing document en_department-of-doctoral-studies_diploma-of-specialized-studies-urology.html
2025-10-16 04:54:16,697 - INFO - Finished converting document en_department-of-doctoral-studies_diploma-of-specialized-studies-urology.html in 0.04 sec.
2025-10-16 04:54:16,840 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:16,850 - INFO - Going to convert document batch...
2025-10-16 04:54:16,851 - INFO - Processing document faculte-des-arts-humanites-et-sciences_literature-and-linguistics-journal.html
2025-10-16 04:54:16,865 - INFO - Finished converting document faculte-des-arts-humanites-et-sciences_literature-and-linguistics-journal.html in 0.03 sec.
2025-10-16 04:54:16,974 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:16,986 - INFO - Going to convert docu

  📦 Processing batch 65 (100 chunks)...
  ✅ Batch 65 added successfully


2025-10-16 04:54:21,470 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:21,477 - INFO - Going to convert document batch...
2025-10-16 04:54:21,477 - INFO - Processing document academics_faculty-of-arts-and-sciences_academic-program_institute-of-history.html
2025-10-16 04:54:21,485 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_academic-program_institute-of-history.html in 0.02 sec.
2025-10-16 04:54:21,505 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:21,516 - INFO - Going to convert document batch...
2025-10-16 04:54:21,517 - INFO - Processing document en_faculty-of-medicine-and-medical-sciences_fmsm-agreements-and-partnerships.html
2025-10-16 04:54:21,531 - INFO - Finished converting document en_faculty-of-medicine-and-medical-sciences_fmsm-agreements-and-partnerships.html in 0.03 sec.
2025-10-16 04:54:21,641 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:21,654 - INFO - Going t

  📦 Processing batch 66 (100 chunks)...
  ✅ Batch 66 added successfully


2025-10-16 04:54:26,578 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:26,594 - INFO - Going to convert document batch...
2025-10-16 04:54:26,595 - INFO - Processing document academics_faculty-of-arts-and-sciences_department-of-education_teaching-diploma.html
2025-10-16 04:54:26,623 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_department-of-education_teaching-diploma.html in 0.05 sec.
2025-10-16 04:54:26,857 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:26,867 - INFO - Going to convert document batch...
2025-10-16 04:54:26,868 - INFO - Processing document faculty-of-arts-and-sciences_department-of-biology_master-of-science-in-biology.html
2025-10-16 04:54:26,892 - INFO - Finished converting document faculty-of-arts-and-sciences_department-of-biology_master-of-science-in-biology.html in 0.04 sec.
2025-10-16 04:54:27,012 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:27,020 - IN

  📦 Processing batch 67 (100 chunks)...
  ✅ Batch 67 added successfully


2025-10-16 04:54:31,634 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:31,648 - INFO - Going to convert document batch...
2025-10-16 04:54:31,648 - INFO - Processing document academics_faculty-of-arts-and-sciences_academic-programs_department-of-sacred-art.html
2025-10-16 04:54:31,663 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_academic-programs_department-of-sacred-art.html in 0.03 sec.
2025-10-16 04:54:31,753 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:31,761 - INFO - Going to convert document batch...
2025-10-16 04:54:31,762 - INFO - Processing document faculty-of-arts-and-sciences_department-of-social-sciences_phd-in-social-sciences.html
2025-10-16 04:54:31,776 - INFO - Finished converting document faculty-of-arts-and-sciences_department-of-social-sciences_phd-in-social-sciences.html in 0.02 sec.
2025-10-16 04:54:31,875 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:31,

  📦 Processing batch 68 (100 chunks)...
  ✅ Batch 68 added successfully


2025-10-16 04:54:36,300 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:36,308 - INFO - Going to convert document batch...
2025-10-16 04:54:36,309 - INFO - Processing document en_fmus-academic-programs_department-of-higher-and-specialized-musical-education-2.html
2025-10-16 04:54:36,321 - INFO - Finished converting document en_fmus-academic-programs_department-of-higher-and-specialized-musical-education-2.html in 0.02 sec.
2025-10-16 04:54:36,407 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:36,417 - INFO - Going to convert document batch...
2025-10-16 04:54:36,417 - INFO - Processing document faculty-of-arts-and-sciences_department-of-psychology_master-of-arts-in-psychology.html
2025-10-16 04:54:36,434 - INFO - Finished converting document faculty-of-arts-and-sciences_department-of-psychology_master-of-arts-in-psychology.html in 0.03 sec.
2025-10-16 04:54:36,563 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54

  📦 Processing batch 69 (100 chunks)...
  ✅ Batch 69 added successfully


2025-10-16 04:54:41,294 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:41,313 - INFO - Going to convert document batch...
2025-10-16 04:54:41,314 - INFO - Processing document library_english_library-filming-photography-recording-permission-for-non-usek-users.html
2025-10-16 04:54:41,329 - INFO - Finished converting document library_english_library-filming-photography-recording-permission-for-non-usek-users.html in 0.04 sec.
2025-10-16 04:54:41,353 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:41,363 - INFO - Going to convert document batch...
2025-10-16 04:54:41,364 - INFO - Processing document en_department-of-doctoral-studies_diploma-of-specialized-studies-gastro-enterology-2.html
2025-10-16 04:54:41,378 - INFO - Finished converting document en_department-of-doctoral-studies_diploma-of-specialized-studies-gastro-enterology-2.html in 0.03 sec.
2025-10-16 04:54:41,505 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16

  📦 Processing batch 70 (100 chunks)...
  ✅ Batch 70 added successfully


2025-10-16 04:54:46,428 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:46,438 - INFO - Going to convert document batch...
2025-10-16 04:54:46,439 - INFO - Processing document news_brazilian-writers-abroad-and-the-literary-expressions-of-the-brazilian-diaspora.html
2025-10-16 04:54:46,455 - INFO - Finished converting document news_brazilian-writers-abroad-and-the-literary-expressions-of-the-brazilian-diaspora.html in 0.03 sec.
2025-10-16 04:54:46,558 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:46,566 - INFO - Going to convert document batch...
2025-10-16 04:54:46,567 - INFO - Processing document news_conference-exhibition-diplomacy-across-oceans-80-years-of-lebanon-latin-america.html
2025-10-16 04:54:46,578 - INFO - Finished converting document news_conference-exhibition-diplomacy-across-oceans-80-years-of-lebanon-latin-america.html in 0.02 sec.
2025-10-16 04:54:46,661 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-

  📦 Processing batch 71 (100 chunks)...
  ✅ Batch 71 added successfully


2025-10-16 04:54:51,429 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:51,438 - INFO - Going to convert document batch...
2025-10-16 04:54:51,439 - INFO - Processing document academics_ltec_academic-programs_post-graduate-ma_online-course-about-online-course-design.html
2025-10-16 04:54:51,457 - INFO - Finished converting document academics_ltec_academic-programs_post-graduate-ma_online-course-about-online-course-design.html in 0.03 sec.
2025-10-16 04:54:51,595 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:51,612 - INFO - Going to convert document batch...
2025-10-16 04:54:51,617 - INFO - Processing document academics_ltec_academic-programs_post-graduate-ma_post-graduate-intensive-track-certificate.html
2025-10-16 04:54:51,634 - INFO - Finished converting document academics_ltec_academic-programs_post-graduate-ma_post-graduate-intensive-track-certificate.html in 0.04 sec.
2025-10-16 04:54:51,737 - INFO - detected formats: [<InputForm

  📦 Processing batch 72 (100 chunks)...
  ✅ Batch 72 added successfully


2025-10-16 04:54:56,300 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:56,313 - INFO - Going to convert document batch...
2025-10-16 04:54:56,314 - INFO - Processing document faculty-of-arts-and-sciences_department-of-social-sciences_master-of-arts-in-social-sciences.html
2025-10-16 04:54:56,331 - INFO - Finished converting document faculty-of-arts-and-sciences_department-of-social-sciences_master-of-arts-in-social-sciences.html in 0.03 sec.
2025-10-16 04:54:56,454 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:54:56,464 - INFO - Going to convert document batch...
2025-10-16 04:54:56,465 - INFO - Processing document schoold-of-enginnering_department-of-agri-food-sciences_master-of-science-in-food-engineering.html
2025-10-16 04:54:56,486 - INFO - Finished converting document schoold-of-enginnering_department-of-agri-food-sciences_master-of-science-in-food-engineering.html in 0.03 sec.


  📦 Processing batch 73 (100 chunks)...
  ✅ Batch 73 added successfully


2025-10-16 04:55:01,173 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:01,189 - INFO - Going to convert document batch...
2025-10-16 04:55:01,190 - INFO - Processing document en_faculty-of-medicine-and-medical-sciences_academic-programs_department-of-hospital-university.html
2025-10-16 04:55:01,203 - INFO - Finished converting document en_faculty-of-medicine-and-medical-sciences_academic-programs_department-of-hospital-university.html in 0.03 sec.
2025-10-16 04:55:01,298 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:01,308 - INFO - Going to convert document batch...
2025-10-16 04:55:01,309 - INFO - Processing document academics_faculty-of-arts-and-sciences_academic-programs_department-of-languages-and-literatures.html
2025-10-16 04:55:01,321 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_academic-programs_department-of-languages-and-literatures.html in 0.02 sec.
2025-10-16 04:55:01,415 - INFO - detected 

  📦 Processing batch 74 (100 chunks)...
  ✅ Batch 74 added successfully


2025-10-16 04:55:06,285 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:06,298 - INFO - Going to convert document batch...
2025-10-16 04:55:06,299 - INFO - Processing document academics_faculty-of-arts-and-sciences_academic-programs_minors_minor-in-web-and-mobile-programming.html
2025-10-16 04:55:06,316 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_academic-programs_minors_minor-in-web-and-mobile-programming.html in 0.03 sec.
2025-10-16 04:55:06,452 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:06,464 - INFO - Going to convert document batch...
2025-10-16 04:55:06,464 - INFO - Processing document department-of-doctoral-studies_diploma-of-specialized-studies-internal-medicine-clinical-immunology.html
2025-10-16 04:55:06,480 - INFO - Finished converting document department-of-doctoral-studies_diploma-of-specialized-studies-internal-medicine-clinical-immunology.html in 0.03 sec.
2025-10-16 04:55:06,635 - IN

  📦 Processing batch 75 (100 chunks)...
  ✅ Batch 75 added successfully


2025-10-16 04:55:11,142 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:11,158 - INFO - Going to convert document batch...
2025-10-16 04:55:11,159 - INFO - Processing document academics_faculty-of-arts-and-sciences_academic-programs_department-of-religious-and-oriental-sciences.html
2025-10-16 04:55:11,175 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_academic-programs_department-of-religious-and-oriental-sciences.html in 0.03 sec.
2025-10-16 04:55:11,265 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:11,274 - INFO - Going to convert document batch...
2025-10-16 04:55:11,276 - INFO - Processing document academics_faculty-of-arts-and-sciences_institute-of-history_bachelor-of-arts-in-history_phd-in-history.html
2025-10-16 04:55:11,289 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_institute-of-history_bachelor-of-arts-in-history_phd-in-history.html in 0.03 sec.
2025-10-16 04:55

  📦 Processing batch 76 (100 chunks)...
  ✅ Batch 76 added successfully


2025-10-16 04:55:15,936 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:15,952 - INFO - Going to convert document batch...
2025-10-16 04:55:15,953 - INFO - Processing document faculty-of-arts-and-sciences_chemistry-and-biochemistry-department_bachelor-of-science-in-biochemistry.html
2025-10-16 04:55:15,974 - INFO - Finished converting document faculty-of-arts-and-sciences_chemistry-and-biochemistry-department_bachelor-of-science-in-biochemistry.html in 0.04 sec.
2025-10-16 04:55:16,132 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:16,149 - INFO - Going to convert document batch...
2025-10-16 04:55:16,149 - INFO - Processing document faculty-of-arts-and-sciences_department-of-biology_master-of-science-in-neuroscience-and-biotechnology.html
2025-10-16 04:55:16,166 - INFO - Finished converting document faculty-of-arts-and-sciences_department-of-biology_master-of-science-in-neuroscience-and-biotechnology.html in 0.03 sec.
2025-10-16 04:55

  📦 Processing batch 77 (100 chunks)...
  ✅ Batch 77 added successfully


2025-10-16 04:55:21,539 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:21,552 - INFO - Going to convert document batch...
2025-10-16 04:55:21,553 - INFO - Processing document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_diploma-in-interpretation.html
2025-10-16 04:55:21,571 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_diploma-in-interpretation.html in 0.03 sec.
2025-10-16 04:55:21,718 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:21,728 - INFO - Going to convert document batch...
2025-10-16 04:55:21,729 - INFO - Processing document academics_faculty-of-arts-and-sciences_academic-programs_minors_minor-in-modern-languages-and-translation.html
2025-10-16 04:55:21,744 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_academic-programs_minors_minor-in-modern-languages-and-translation.html in 0.03 sec.
2025-1

  📦 Processing batch 78 (100 chunks)...
  ✅ Batch 78 added successfully


2025-10-16 04:55:26,466 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:26,479 - INFO - Going to convert document batch...
2025-10-16 04:55:26,480 - INFO - Processing document academics_ltec_academic-programs_post-graduate-ma_post-graduate-online-certificate-in-teaching-and-learning.html
2025-10-16 04:55:26,493 - INFO - Finished converting document academics_ltec_academic-programs_post-graduate-ma_post-graduate-online-certificate-in-teaching-and-learning.html in 0.03 sec.
2025-10-16 04:55:26,581 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:26,601 - INFO - Going to convert document batch...
2025-10-16 04:55:26,601 - INFO - Processing document en_department-of-telecommunications-engineering_bachelor-of-engineering-in-telecommunications-engineering-2.html
2025-10-16 04:55:26,624 - INFO - Finished converting document en_department-of-telecommunications-engineering_bachelor-of-engineering-in-telecommunications-engineering-2.html in 0.05 s

  📦 Processing batch 79 (100 chunks)...
  ✅ Batch 79 added successfully


2025-10-16 04:55:31,704 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:31,720 - INFO - Going to convert document batch...
2025-10-16 04:55:31,721 - INFO - Processing document academics_faculty-of-arts-and-sciences_department-of-education_bachelor-of-arts-in-education-basic-education.html
2025-10-16 04:55:31,740 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_department-of-education_bachelor-of-arts-in-education-basic-education.html in 0.04 sec.
2025-10-16 04:55:31,929 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:31,939 - INFO - Going to convert document batch...
2025-10-16 04:55:31,940 - INFO - Processing document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_master-of-arts-in-translation.html
2025-10-16 04:55:31,956 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_master-of-arts-in-translation.html in 0.

  📦 Processing batch 80 (100 chunks)...
  ✅ Batch 80 added successfully


2025-10-16 04:55:36,770 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:36,779 - INFO - Going to convert document batch...
2025-10-16 04:55:36,779 - INFO - Processing document academics_uclc_news_navigating-crisis-in-higher-education-strategies-for-effective-leadership-and-resilience.html
2025-10-16 04:55:36,791 - INFO - Finished converting document academics_uclc_news_navigating-crisis-in-higher-education-strategies-for-effective-leadership-and-resilience.html in 0.02 sec.
2025-10-16 04:55:36,881 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:36,892 - INFO - Going to convert document batch...
2025-10-16 04:55:36,893 - INFO - Processing document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_master-of-arts-in-arabic-language.html
2025-10-16 04:55:36,907 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_master-of-arts-in-arabic-language.ht

  📦 Processing batch 81 (100 chunks)...
  ✅ Batch 81 added successfully


2025-10-16 04:55:41,589 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:41,608 - INFO - Going to convert document batch...
2025-10-16 04:55:41,609 - INFO - Processing document faculty-of-arts-and-sciences_chemistry-and-biochemistry-department_master-of-science-in-environmental-technologies.html
2025-10-16 04:55:41,637 - INFO - Finished converting document faculty-of-arts-and-sciences_chemistry-and-biochemistry-department_master-of-science-in-environmental-technologies.html in 0.05 sec.
2025-10-16 04:55:41,761 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:41,773 - INFO - Going to convert document batch...
2025-10-16 04:55:41,774 - INFO - Processing document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_bachelor-of-arts-in-languages-arabic.html
2025-10-16 04:55:41,799 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_bachelor-of-arts-in-l

  📦 Processing batch 82 (100 chunks)...
  ✅ Batch 82 added successfully


2025-10-16 04:55:46,690 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:46,709 - INFO - Going to convert document batch...
2025-10-16 04:55:46,710 - INFO - Processing document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_phd-in-english-language-and-literature.html
2025-10-16 04:55:46,736 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_phd-in-english-language-and-literature.html in 0.05 sec.
2025-10-16 04:55:46,878 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:46,890 - INFO - Going to convert document batch...
2025-10-16 04:55:46,891 - INFO - Processing document en_fi-academic-programs_department-of-telecommunications-engineering-2_master-of-science-in-communication-engineering.html
2025-10-16 04:55:46,910 - INFO - Finished converting document en_fi-academic-programs_department-of-telecommunications-engineering-2_master-of-science-in-

  📦 Processing batch 83 (100 chunks)...
  ✅ Batch 83 added successfully


2025-10-16 04:55:51,800 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:51,814 - INFO - Going to convert document batch...
2025-10-16 04:55:51,814 - INFO - Processing document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_phd-in-language-sciences-and-traductology.html
2025-10-16 04:55:51,828 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_phd-in-language-sciences-and-traductology.html in 0.04 sec.
2025-10-16 04:55:51,972 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:51,985 - INFO - Going to convert document batch...
2025-10-16 04:55:51,986 - INFO - Processing document academics_faculty-of-arts-and-sciences_institute-of-history_bachelor-of-arts-in-history_phd-in-archeology-and-art-history.html
2025-10-16 04:55:52,013 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_institute-of-history_bachelor-of-arts-in-hi

  📦 Processing batch 84 (100 chunks)...
  ✅ Batch 84 added successfully


2025-10-16 04:55:56,410 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:56,422 - INFO - Going to convert document batch...
2025-10-16 04:55:56,423 - INFO - Processing document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_master-of-arts-in-french-language-and-literature.html
2025-10-16 04:55:56,441 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_master-of-arts-in-french-language-and-literature.html in 0.03 sec.
2025-10-16 04:55:56,582 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:55:56,600 - INFO - Going to convert document batch...
2025-10-16 04:55:56,601 - INFO - Processing document academics_faculty-of-arts-and-sciences_department-of-languages-and-literatures_bachelor-of-arts-in-modern-languages-and-translation.html
2025-10-16 04:55:56,628 - INFO - Finished converting document academics_faculty-of-arts-and-sciences_department-of-langu

  📦 Processing batch 85 (100 chunks)...
  ✅ Batch 85 added successfully


2025-10-16 04:56:01,360 - INFO - detected formats: [<InputFormat.HTML: 'html'>]
2025-10-16 04:56:01,374 - INFO - Going to convert document batch...
2025-10-16 04:56:01,374 - INFO - Processing document faculty-of-medicine-and-medical-sciences_academic-programs_department-of-hospital-university_diploma-of-specialized-studies-pulmonary-diseases-and-intensive-care.html
2025-10-16 04:56:01,394 - INFO - Finished converting document faculty-of-medicine-and-medical-sciences_academic-programs_department-of-hospital-university_diploma-of-specialized-studies-pulmonary-diseases-and-intensive-care.html in 0.03 sec.
2025-10-16 04:56:01,551 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-10-16 04:56:01,557 - INFO - Going to convert document batch...
2025-10-16 04:56:01,558 - INFO - Initializing pipeline for StandardPdfPipeline with options hash 179b71bca3dd49ae1a11df32498db557
2025-10-16 04:56:01,590 - INFO - command: tesseract --list-langs
2025-10-16 04:56:01,606 - INFO - Accelerator devi

  📦 Processing batch 86 (100 chunks)...
  ✅ Batch 86 added successfully
  📦 Processing batch 87 (100 chunks)...
  ✅ Batch 87 added successfully


2025-10-16 04:59:26,932 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-10-16 04:59:26,942 - INFO - Going to convert document batch...
2025-10-16 04:59:26,943 - INFO - Processing document Content_Assets_20190911VTR.pdf
2025-10-16 05:00:33,992 - INFO - command: tesseract --psm 0 -l osd /tmp/tmpp9fg3er4.png stdout
2025-10-16 05:00:34,654 - INFO - command: tesseract -l fra+deu+spa+eng /tmp/tmpp9fg3er4.png stdout tsv
2025-10-16 05:00:41,678 - INFO - command: tesseract --psm 0 -l osd /tmp/tmpjwsypkoa.png stdout
2025-10-16 05:00:42,306 - INFO - command: tesseract -l fra+deu+spa+eng /tmp/tmpjwsypkoa.png stdout tsv
2025-10-16 05:00:45,175 - INFO - command: tesseract --psm 0 -l osd /tmp/tmpsw03qshr.png stdout
2025-10-16 05:00:45,876 - INFO - command: tesseract -l fra+deu+spa+eng /tmp/tmpsw03qshr.png stdout tsv
2025-10-16 05:00:49,384 - INFO - command: tesseract --psm 0 -l osd /tmp/tmpw1ixslhd.png stdout
2025-10-16 05:00:50,090 - INFO - command: tesseract -l fra+deu+spa+eng /tmp/tmpw1

  📦 Processing batch 88 (100 chunks)...
  ✅ Batch 88 added successfully


2025-10-16 05:01:05,673 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-10-16 05:01:05,675 - INFO - Going to convert document batch...
2025-10-16 05:01:05,676 - INFO - Processing document Content_Files_JeanAkiki_01.pdf
2025-10-16 05:01:23,075 - INFO - Finished converting document Content_Files_JeanAkiki_01.pdf in 17.40 sec.


  📦 Processing batch 89 (100 chunks)...
  ✅ Batch 89 added successfully


2025-10-16 05:01:27,492 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-10-16 05:01:27,494 - INFO - Going to convert document batch...
2025-10-16 05:01:27,495 - INFO - Processing document Content_Files_JeanAkiki_02.pdf
2025-10-16 05:01:43,071 - INFO - Finished converting document Content_Files_JeanAkiki_02.pdf in 15.58 sec.


  📦 Processing batch 90 (100 chunks)...
  ✅ Batch 90 added successfully


2025-10-16 05:01:47,574 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-10-16 05:01:47,577 - INFO - Going to convert document batch...
2025-10-16 05:01:47,578 - INFO - Processing document Content_Files_JeanAkiki_03.pdf
2025-10-16 05:01:56,761 - INFO - Finished converting document Content_Files_JeanAkiki_03.pdf in 9.19 sec.
2025-10-16 05:01:57,100 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-10-16 05:01:57,102 - INFO - Going to convert document batch...
2025-10-16 05:01:57,103 - INFO - Processing document Content_Files_JeanAkiki_04.pdf
2025-10-16 05:02:12,709 - INFO - Finished converting document Content_Files_JeanAkiki_04.pdf in 15.61 sec.


  📦 Processing batch 91 (100 chunks)...
  ✅ Batch 91 added successfully


2025-10-16 05:02:17,682 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-10-16 05:02:17,685 - INFO - Going to convert document batch...
2025-10-16 05:02:17,686 - INFO - Processing document Content_Files_JeanAkiki_05.pdf
2025-10-16 05:02:40,025 - INFO - Finished converting document Content_Files_JeanAkiki_05.pdf in 22.34 sec.


  📦 Processing batch 92 (100 chunks)...
  ✅ Batch 92 added successfully


2025-10-16 05:02:44,980 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-10-16 05:02:44,984 - INFO - Going to convert document batch...
2025-10-16 05:02:44,985 - INFO - Processing document Content_Files_JeanAkiki_06.pdf
2025-10-16 05:02:56,537 - INFO - Finished converting document Content_Files_JeanAkiki_06.pdf in 11.56 sec.
2025-10-16 05:02:56,779 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-10-16 05:02:56,781 - INFO - Going to convert document batch...
2025-10-16 05:02:56,782 - INFO - Processing document Content_Files_JeanAkiki_08.pdf
2025-10-16 05:03:02,981 - INFO - Finished converting document Content_Files_JeanAkiki_08.pdf in 6.21 sec.


  📦 Processing batch 93 (100 chunks)...


2025-10-16 05:06:39,517 [ERROR][handler]: RPC error: [insert_rows], <MilvusException: (code=<bound method _InactiveRpcError.code of <_InactiveRpcError of RPC that terminated with:
	status = StatusCode.UNAVAILABLE
	details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:19530: Failed to connect to remote host: connect: Connection refused (111)"
	debug_error_string = "UNKNOWN:Error received from peer  {grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:19530: Failed to connect to remote host: connect: Connection refused (111)"}"
>>, message=[insert_rows] Retry run out of 75 retry times, message=failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:19530: Failed to connect to remote host: connect: Connection refused (111))>, <Time:{'RPC start': '2025-10-16 05:03:07.823892', 'RPC error': '2025-10-16 05:06:39.517450'}> (decorators.py:140)
2025-10-16 05:06:39,520 - ERROR - Failed to insert batch s

MilvusException: <MilvusException: (code=<bound method _InactiveRpcError.code of <_InactiveRpcError of RPC that terminated with:
	status = StatusCode.UNAVAILABLE
	details = "failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:19530: Failed to connect to remote host: connect: Connection refused (111)"
	debug_error_string = "UNKNOWN:Error received from peer  {grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:19530: Failed to connect to remote host: connect: Connection refused (111)"}"
>>, message=[insert_rows] Retry run out of 75 retry times, message=failed to connect to all addresses; last error: UNKNOWN: ipv6:%5B::1%5D:19530: Failed to connect to remote host: connect: Connection refused (111))>