In [1]:
from IPython.display import IFrame
from docling.document_converter import DocumentConverter
import boto3
import os
from sdg_hub.core.flow import FlowRegistry
from sdg_hub.core.blocks import BlockRegistry
import pypdfium2 as pdfium
from langchain_openai import ChatOpenAI
from langchain_community.vectorstores import LanceDB
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.document_loaders import TextLoader
from langchain_community.graph_vectorstores import GraphVectorStoreRetriever
from langchain_core.documents import Document
from lancedb.rerankers import LinearCombinationReranker
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter
from langchain.docstore.document import Document
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
import lancedb
from huggingface_hub import snapshot_download
from langchain_community.embeddings import HuggingFaceBgeEmbeddings, SentenceTransformerEmbeddings
from transformers import AutoTokenizer
from enum import Enum
import traceback
import re

In [2]:
endpoint_url = os.getenv('AWS_S3_ENDPOINT')
access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
config = boto3.session.Config(signature_version='s3v4')
bucket = os.getenv("AWS_S3_BUCKET")
source_path = 'pdf/'
target_path = 'pdf'
target_path_chapters = 'pdf_chunked'
target_path_markdown = 'markdown'
CODE_LANGUAGE='ColdFusion'

embedding_model = SentenceTransformerEmbeddings(
    model_name="BAAI/bge-small-en-v1.5", 
    model_kwargs={"trust_remote_code":True
})

llm = ChatOpenAI(
    model="openai/gpt-oss-20b", # os.getenv('QWEN25CODER_MODEL_ID'),
    api_key=os.getenv('OPENROUTER_TOKEN'),
    base_url=os.getenv('OPENROUTER_API_BASE'),
    temperature=0.1,
)

vectorstore_connection = lancedb.connect(f"s3://data/lancedb-rag",
    storage_options={
        "endpoint_url": endpoint_url,
        "aws_access_key_id": access_key_id,
        "aws_secret_access_key": secret_access_key,
        "s3_force_path_style": "true",
        "allow_http": "true",
    }
)

vectorstore = LanceDB(
    mode="append",
    embedding=embedding_model,
    connection=vectorstore_connection,
)

minio = boto3.client(
    's3',
    endpoint_url=endpoint_url,
    aws_access_key_id=access_key_id,
    aws_secret_access_key=secret_access_key,
    config=boto3.session.Config(signature_version='s3v4')
)

  embedding_model = HuggingFaceBgeEmbeddings(
2025-10-23 08:39:29,777 - INFO - Use pytorch device_name: cuda:0
2025-10-23 08:39:29,778 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5


In [3]:
try:
    os.makedirs(target_path, exist_ok=True)
    os.makedirs(target_path_chapters, exist_ok=True)
    files = minio.list_objects_v2(Bucket=bucket, Prefix=source_path)
    if 'Contents' in files:
        for obj in files['Contents']:
            file = obj['Key']
            minio.download_file(bucket, file, f"{target_path}/{file.split('/')[-1]}")
            print(f"File '{source_path}' downloaded successfully to {target_path}/{file.split('/')[-1]}")
except Exception as e:
    print(f"Error downloading file: {e}")

File 'pdf/' downloaded successfully to pdf/Developing_Apps_coldfusion.pdf


In [4]:
def get_chapter_ranges(sourcefilename, do_print=True):
    """
    Returns a list of (beginPage, endPage) ranges for chunks that represent chapters in the given pdf.
    """
    print("Getting chapter ranges...\n")
    
    pdf = pdfium.PdfDocument(sourcefilename)
    ranges = []
    begin, end = None, None
    
    for item in pdf.get_toc():
        state = "*" if item.n_kids == 0 else "-" if item.is_closed else "+"
        target = "?" if item.page_index is None else item.page_index+1
        boundary = None
        
        if item.page_index and ((item.n_kids == 0 and item.level < 2) or item.level == 2):
            if begin is not None:
                end = item.page_index - 1
                boundary = [begin, max(begin, end)]
                ranges.append(boundary)
            begin = item.page_index
            
        if do_print:
            if boundary:
                print("    " * 2 +  f"(Pages {(boundary[0]+1)} - {(boundary[1]+1)})" + "\n")
            print(("    " * item.level) + f"[{state}] {item.title} -> {target}  # {item.view_mode} {item.view_pos}")
    return ranges

In [5]:
def split_chapters(sourcefilename, targetfilename, pagerange):
    """
    Splits the pdf into chapters using the provided page ranges.
    Returns the name of the new pdf chunk.
    """
    try:
        source_pdf = pdfium.PdfDocument(sourcefilename)
        new_pdf = pdfium.PdfDocument.new()
    
        print(f"Saving chapter...{targetfilename}, Pages {pagerange[0]} to {pagerange[1]}")
        new_page_index = new_pdf.import_pages(source_pdf, pages=list(range(pagerange[0], pagerange[1]+1)))
        new_pdf.save(targetfilename)
        
        source_pdf.close()
        new_pdf.close()
        
    except Exception as e:
        print(f"Error saving {targetfilename}: {e}")

In [6]:
def convert_to_markdown(pdffile, markdownfile):
    """
    Converts the pdf into a markdown file.
    """
    try:
        print(f"Converting {pdffile} to markdown...")
        
        converter = DocumentConverter()
        
        result = converter.convert(pdffile)
        
        markdown_output = result.document.export_to_markdown()

        with open(markdownfile, "w") as file:
            file.write(markdown_output)

        print(f"{markdownfile} generated.")
        
    except Exception as e:
        print(f"Error saving {markdownfile}: {e}")
    

In [7]:
class CodeType(Enum):
    """
    Types of entity relationships
    """
    MARKDOWN_SECTION = "markdown-section"
    CODE_TO_MARKDOWN = "markdown"
    CODE_TO_TOPICS = "topics"
    CODE_TO_KEYWORDS = "keywords"
    CODE_TO_TAGS = "tags"
    CODE_TO_FUNCTIONS = "functions"
    CODE_TO_STRUCTURE = "structure"
    CODE_TO_SUMMARY = "summary"

In [8]:
def strip_code_section(content):
    """
    Strips out code sections of file.
    """
    code_sections = re.findall(r'```([^`]+)```', content, re.DOTALL)
    return code_sections

def llm_tool(code_sections, code_language, code_task, code_instructions):
    system_template = """
    You are an expert software engineer with extensive experience in {code_language}.
    Your task is to {code_task}.
    **Instructions:**
    {code_instructions}
    **Code to analyze:**
    """
    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
    prompt = ChatPromptTemplate.from_messages(
        [
            system_message_prompt,
            HumanMessagePromptTemplate.from_template("{input}"),
        ]
    )
    inputs = [{"input": section, 
               "code_language": code_language,
               "code_task": code_task,
               "code_instructions": code_instructions} 
              for section in code_sections]
    chain = prompt | llm
    responses = chain.batch(inputs)
    return responses
"""
1. Markdown sections
"""
def build_markdown_section(file):
    """
    Generates markdown section chunks from the file and stores them in the vector database.
    """
    try:
        print(f"Saving markdown {file}...")
        filecontent = None
        with open(file, mode="r") as f: 
            filecontent = f.read()
        headers_to_split = [("#", "Header 1"), ("##", "Header 2"),("###", "Header 3")]
        text_splitter = MarkdownHeaderTextSplitter(headers_to_split, strip_headers=False)
        splits = text_splitter.split_text(filecontent)
        
        # reranker = LinearCombinationReranker()
        if strip_code_section(filecontent):
            docs = [Document(page_content=s.page_content, 
                             metadata= {"source": file, "code_type": CodeType.MARKDOWN_SECTION.value} | s.metadata )
                    for s in splits]
            print(f"Saving {len(docs)} docs...")
            vectorstore.add_documents(documents=docs)
            print(f"{file} markdown saved to db, starting code-to-text mappings...")
    
            saved_docs = []
            for doc in docs:
                sections = strip_code_section(doc.page_content)

                if sections:
                    # Generate code-to-markdown chunks
                    saved_docs += build_code_to_markdown(sections, doc) or []
            
                    # Generate code-to-concept chunks
                    saved_docs += build_code_to_topics(sections, doc) or []
            
                    # Generate code-to-tags chunks
                    saved_docs += build_code_to_tags(sections, doc) or []
            
                    # Generate code-to-functions chunks
                    saved_docs += build_code_to_functions(sections, doc) or []
            
                    # Generate code-to-structure chunks
                    saved_docs += build_code_to_structure(sections, doc) or []
            
                    # Generate code-to-summary chunks
                    saved_docs += build_code_to_summary(sections, doc) or []
    
                    print(f"Number of code-to-text mappings generated: {len(saved_docs)}....")
                    vectorstore.add_documents(documents=saved_docs)
                    print("Code-to-text mappings saved.")
            print(f"Code to text mappings completed for {file}.")
    except Exception as e:
        print(f"Error saving {file}: {e}")
        traceback.print_exc() 

"""
2. Code-to-markdown sections
"""
def build_code_to_markdown(sections, doc):
    """
    Generates code-to-markdown mappings from the list of code sections using a parent-child hierarchy 
    and stores them in the vector database.
    """
    content=[f"***Code***:\n```{section}```\n\n***Markdown***:\n{doc.page_content}" for section in sections]
    docs = [Document(page_content=c, metadata=({"code_type": CodeType.CODE_TO_MARKDOWN.value} | doc.metadata)) for c in content]
    return docs

"""
3. Code-to-topics
"""
def build_code_to_topics(sections, doc):
    """
    Generates code-to-topic mappings from the list of code sections using a parent-child hierarchy 
    and stores them in the vector database.
    """
    try:
        code_task = f"""
        analyze this code and generate an outline of general {CODE_LANGUAGE} topics that are connected to the code
        """
        code_instructions = """
            1.  **Provide a list of the topics that you find.**
            2.  **Format your response clearly and concisely** using a numbered list.
        """
        responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
        docs = [Document(page_content=f"***Code***:\n```{sections[i]}```\n\n***Topics***:\n{response.content}", 
                         metadata=(doc.metadata | {"code_type": CodeType.CODE_TO_TOPICS.value})) for i, response in enumerate(responses)]
        return docs
    except Exception as e:
        print(f"Error in CODE_TO_TOPICS: {e}")
        traceback.print_exc() 
        

"""
4. Code-to-tags
"""
def build_code_to_tags(sections, doc):
    """
    Generates code-to-Coldfusion tag mappings from the list of code sections using a parent-child hierarchy 
    and stores them in the vector database.
    """
    try:
        code_task = f"""
        analyze this code and generate an outline of components that you can find in the code
        """
        code_instructions = """
            1.  **Analyze the code for:** Various ColdFusion tags, HTML / CSS elements, and other similar code elements.
            2.  **Provide a detailed explanation of your findings.**
            3.  **Format your response clearly and concisely** using a numbered list.
        """
        responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
        docs = [Document(page_content=f"***Code***:\n```{sections[i]}```\n\n***Components***:\n{response.content}", 
                         metadata=( doc.metadata | {"code_type": CodeType.CODE_TO_TAGS.value})) for i, response in enumerate(responses)]
        return docs
    except Exception as e:
        print(f"Error in code_to_tags: {e}")
        traceback.print_exc() 

"""
5. Code-to-functions
"""
def build_code_to_functions(sections, doc):
    """
    Generates code-to-function mappings from the list of code sections using a parent-child hierarchy 
    and stores them in the vector database.
    """
    try:
        code_task = f"""
        analyze this code and generate an outline of functions that you can find in the code
        """
        code_instructions = """
            1.  **Analyze the code for:** Any functions that you can locate in the code.
            2.  **Provide a detailed explanation of your findings.**
            3.  **Format your response clearly and concisely** using a numbered list.
        """
        responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
        docs = [Document(page_content=f"***Code***:\n```{sections[i]}```\n\n***Functions***:\n{response.content}", 
                         metadata=( doc.metadata | {"code_type": CodeType.CODE_TO_FUNCTIONS.value})) for i, response in enumerate(responses)]
        return docs
    except Exception as e:
        print(f"Error in code_to_functions: {e}")
        traceback.print_exc() 

"""
6. Code-to-structure
"""
def build_code_to_structure(sections, doc):
    """
    Generates code-to-structure mappings from the list of code sections using a parent-child hierarchy 
    and stores them in the vector database.
    """
    try:
        code_task = f"""
        describe the general structure of this code
        """
        code_instructions = """
            1.  **Provide a detailed explanation of your findings.**
            2.  **Format your response clearly and concisely** using bullet points.
        """
        responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
        docs = [Document(page_content=f"***Code***:\n```{sections[i]}```\n\n***Structure***:\n{response.content}", 
                         metadata=( doc.metadata | {"code_type": CodeType.CODE_TO_STRUCTURE.value})) for i, response in enumerate(responses)]
        return docs
    except Exception as e:
        print(f"Error in code_to_structure: {e}")
        traceback.print_exc() 
        
"""
7. Code-to-summary
"""
def build_code_to_summary(sections, doc):
    """
    Generates code-to-summary mappings from the list of code sections using a parent-child hierarchy 
    and stores them in the vector database.
    """
    try:
        code_task = f"""
        provide a summary of this code
        """
        code_instructions = """
            1.  **Provide a concise summary, including the potential business purpose and use cases for the code.**
            2.  **Format your response clearly and concisely** using bullet points.
        """
        responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
        docs = [Document(page_content=f"***Code***:\n```{sections[i]}```\n\n***Summary***:\n{response.content}", 
                         metadata=(doc.metadata | {"code_type": CodeType.CODE_TO_SUMMARY.value})) for i, response in enumerate(responses)]
        return docs
    except Exception as e:
        print(f"Error in code_to_summary: {e}")
        traceback.print_exc() 
        

In [9]:
files = [f for f in os.listdir(target_path) if ".pdf" in f]
for file in files:
    ranges = get_chapter_ranges(f"{target_path}/{file}", do_print=False)
    for idx, _range in enumerate(ranges):
        pdf = f"{target_path_chapters}/{idx}_{file}"
        md = f"{target_path_markdown}/{idx}_{file.replace('.pdf', '.md')}"
        split_chapters(f"{target_path}/{file}", pdf, _range)
        # convert_to_markdown(pdf, md)
        build_markdown_section(md)

Getting chapter ranges...

Saving chapter...pdf_chunked/0_Developing_Apps_coldfusion.pdf, Pages 2 to 20
Saving markdown markdown/0_Developing_Apps_coldfusion.md...
Saving chapter...pdf_chunked/1_Developing_Apps_coldfusion.pdf, Pages 21 to 22
Saving markdown markdown/1_Developing_Apps_coldfusion.md...
Saving chapter...pdf_chunked/2_Developing_Apps_coldfusion.pdf, Pages 23 to 23
Saving markdown markdown/2_Developing_Apps_coldfusion.md...
Saving chapter...pdf_chunked/3_Developing_Apps_coldfusion.pdf, Pages 24 to 24
Saving markdown markdown/3_Developing_Apps_coldfusion.md...
Saving chapter...pdf_chunked/4_Developing_Apps_coldfusion.pdf, Pages 25 to 25
Saving markdown markdown/4_Developing_Apps_coldfusion.md...
Saving chapter...pdf_chunked/5_Developing_Apps_coldfusion.pdf, Pages 25 to 25
Saving markdown markdown/5_Developing_Apps_coldfusion.md...
Saving chapter...pdf_chunked/6_Developing_Apps_coldfusion.pdf, Pages 26 to 39
Saving markdown markdown/6_Developing_Apps_coldfusion.md...
Saving 2

Traceback (most recent call last):
  File "/tmp/ipykernel_31828/3555746827.py", line 190, in build_code_to_structure
    responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_31828/3555746827.py", line 29, in llm_tool
    responses = chain.batch(inputs)
                ^^^^^^^^^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 3196, in batch
    inputs = step.batch(
             ^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 797, in batch
    return cast("list[Output]", list(executor.map(invoke, inputs, configs)))
                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib64/python3.11/concurrent/futures/_base.py", line 619, in result_iterator
    yield _result_or_cancel(fs.pop())
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  

Number of code-to-text mappings generated: 18....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 24....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 42....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 54....
Code-to-text mappings saved.
Code to text mappings completed for markdown/12_Developing_Apps_coldfusion.md.
Saving chapter...pdf_chunked/13_Developing_Apps_coldfusion.pdf, Pages 182 to 191
Saving markdown markdown/13_Developing_Apps_coldfusion.md...
Saving 25 docs...
markdown/13_Developing_Apps_coldfusion.md markdown saved to db, starting code-to-text mappings...
Number of code-to-text mappings generated: 6....
Code-to-text mappings saved.
Code to text mappings completed for markdown/13_Developing_Apps_coldfusion.md.
Saving chapter...pdf_chunked/14_Developing_Apps_coldfusion.pdf, Pages 192 to 221
Saving markdown markdown/14_Developing_Apps_coldfusion.md...
Saving 57 docs...
markdown/14_Developing_Apps_co

Traceback (most recent call last):
  File "/tmp/ipykernel_31828/3555746827.py", line 141, in build_code_to_tags
    responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_31828/3555746827.py", line 29, in llm_tool
    responses = chain.batch(inputs)
                ^^^^^^^^^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 3196, in batch
    inputs = step.batch(
             ^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 794, in batch
    return cast("list[Output]", [invoke(inputs[0], configs[0])])
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 790, in invoke
    return self.invoke(input_, config, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Number of code-to-text mappings generated: 5....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 11....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 17....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 35....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 41....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 47....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 59....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 65....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 71....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 83....
Code-to-text mappings saved.
Code to text mappings completed for markdown/15_Developing_Apps_coldfusion.md.
Saving chapter...pdf_chunked/16_Developing_Apps_coldfusion.pdf, Pages 242 to 267
Saving markdown markdown/16_Developing_Apps_coldfus

Traceback (most recent call last):
  File "/tmp/ipykernel_31828/3555746827.py", line 115, in build_code_to_topics
    responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_31828/3555746827.py", line 29, in llm_tool
    responses = chain.batch(inputs)
                ^^^^^^^^^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 3196, in batch
    inputs = step.batch(
             ^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 797, in batch
    return cast("list[Output]", list(executor.map(invoke, inputs, configs)))
                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib64/python3.11/concurrent/futures/_base.py", line 619, in result_iterator
    yield _result_or_cancel(fs.pop())
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil

Number of code-to-text mappings generated: 69....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 75....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 87....
Code-to-text mappings saved.
Code to text mappings completed for markdown/19_Developing_Apps_coldfusion.md.
Saving chapter...pdf_chunked/20_Developing_Apps_coldfusion.pdf, Pages 340 to 371
Saving markdown markdown/20_Developing_Apps_coldfusion.md...
Saving 69 docs...
markdown/20_Developing_Apps_coldfusion.md markdown saved to db, starting code-to-text mappings...
Number of code-to-text mappings generated: 6....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 12....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 18....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 24....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 30....
Code-to-text mappings saved.
Number of code-to-text mappings 

Traceback (most recent call last):
  File "/tmp/ipykernel_31828/3555746827.py", line 141, in build_code_to_tags
    responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_31828/3555746827.py", line 29, in llm_tool
    responses = chain.batch(inputs)
                ^^^^^^^^^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 3196, in batch
    inputs = step.batch(
             ^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 797, in batch
    return cast("list[Output]", list(executor.map(invoke, inputs, configs)))
                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib64/python3.11/concurrent/futures/_base.py", line 619, in result_iterator
    yield _result_or_cancel(fs.pop())
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File 

Number of code-to-text mappings generated: 66....
Code-to-text mappings saved.
Code to text mappings completed for markdown/20_Developing_Apps_coldfusion.md.
Saving chapter...pdf_chunked/21_Developing_Apps_coldfusion.pdf, Pages 372 to 397
Saving markdown markdown/21_Developing_Apps_coldfusion.md...
Saving 35 docs...
markdown/21_Developing_Apps_coldfusion.md markdown saved to db, starting code-to-text mappings...
Number of code-to-text mappings generated: 6....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 12....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 30....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 42....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 48....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 60....
Code-to-text mappings saved.
Error in code_to_functions: Expecting value: line 225 column 1 (char 1232)


Traceback (most recent call last):
  File "/tmp/ipykernel_31828/3555746827.py", line 166, in build_code_to_functions
    responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_31828/3555746827.py", line 29, in llm_tool
    responses = chain.batch(inputs)
                ^^^^^^^^^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 3196, in batch
    inputs = step.batch(
             ^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 797, in batch
    return cast("list[Output]", list(executor.map(invoke, inputs, configs)))
                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib64/python3.11/concurrent/futures/_base.py", line 619, in result_iterator
    yield _result_or_cancel(fs.pop())
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  

Number of code-to-text mappings generated: 75....
Code-to-text mappings saved.
Code to text mappings completed for markdown/21_Developing_Apps_coldfusion.md.
Saving chapter...pdf_chunked/22_Developing_Apps_coldfusion.pdf, Pages 398 to 413
Saving markdown markdown/22_Developing_Apps_coldfusion.md...
Saving 29 docs...
markdown/22_Developing_Apps_coldfusion.md markdown saved to db, starting code-to-text mappings...
Number of code-to-text mappings generated: 12....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 18....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 24....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 30....
Code-to-text mappings saved.
Code to text mappings completed for markdown/22_Developing_Apps_coldfusion.md.
Saving chapter...pdf_chunked/23_Developing_Apps_coldfusion.pdf, Pages 414 to 439
Saving markdown markdown/23_Developing_Apps_coldfusion.md...
Saving 44 docs...
markdown/23_Developing_Apps_c

Traceback (most recent call last):
  File "/tmp/ipykernel_31828/3555746827.py", line 115, in build_code_to_topics
    responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_31828/3555746827.py", line 29, in llm_tool
    responses = chain.batch(inputs)
                ^^^^^^^^^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 3196, in batch
    inputs = step.batch(
             ^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 797, in batch
    return cast("list[Output]", list(executor.map(invoke, inputs, configs)))
                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib64/python3.11/concurrent/futures/_base.py", line 619, in result_iterator
    yield _result_or_cancel(fs.pop())
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil

Number of code-to-text mappings generated: 46....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 58....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 64....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 70....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 76....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 88....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 94....
Code-to-text mappings saved.
Code to text mappings completed for markdown/28_Developing_Apps_coldfusion.md.
Saving chapter...pdf_chunked/29_Developing_Apps_coldfusion.pdf, Pages 546 to 577
Saving markdown markdown/29_Developing_Apps_coldfusion.md...
Saving 45 docs...
markdown/29_Developing_Apps_coldfusion.md markdown saved to db, starting code-to-text mappings...
Number of code-to-text mappings generated: 6....
Code-to-text mappings saved.
Number of code-to-text mappings 

Traceback (most recent call last):
  File "/tmp/ipykernel_31828/3555746827.py", line 115, in build_code_to_topics
    responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_31828/3555746827.py", line 29, in llm_tool
    responses = chain.batch(inputs)
                ^^^^^^^^^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 3196, in batch
    inputs = step.batch(
             ^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 797, in batch
    return cast("list[Output]", list(executor.map(invoke, inputs, configs)))
                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib64/python3.11/concurrent/futures/_base.py", line 619, in result_iterator
    yield _result_or_cancel(fs.pop())
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil

Number of code-to-text mappings generated: 176....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 188....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 200....
Code-to-text mappings saved.
Code to text mappings completed for markdown/31_Developing_Apps_coldfusion.md.
Saving chapter...pdf_chunked/32_Developing_Apps_coldfusion.pdf, Pages 632 to 669
Saving markdown markdown/32_Developing_Apps_coldfusion.md...
Saving 70 docs...
markdown/32_Developing_Apps_coldfusion.md markdown saved to db, starting code-to-text mappings...
Number of code-to-text mappings generated: 6....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 12....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 18....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 24....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 30....
Code-to-text mappings saved.
Number of code-to-text mappin

Traceback (most recent call last):
  File "/tmp/ipykernel_31828/3555746827.py", line 190, in build_code_to_structure
    responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_31828/3555746827.py", line 29, in llm_tool
    responses = chain.batch(inputs)
                ^^^^^^^^^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 3196, in batch
    inputs = step.batch(
             ^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 797, in batch
    return cast("list[Output]", list(executor.map(invoke, inputs, configs)))
                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib64/python3.11/concurrent/futures/_base.py", line 619, in result_iterator
    yield _result_or_cancel(fs.pop())
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  

Number of code-to-text mappings generated: 34....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 58....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 76....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 82....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 160....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 184....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 190....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 196....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 202....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 214....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 244....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 262....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 2

Traceback (most recent call last):
  File "/tmp/ipykernel_31828/3555746827.py", line 190, in build_code_to_structure
    responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_31828/3555746827.py", line 29, in llm_tool
    responses = chain.batch(inputs)
                ^^^^^^^^^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 3196, in batch
    inputs = step.batch(
             ^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 794, in batch
    return cast("list[Output]", [invoke(inputs[0], configs[0])])
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 790, in invoke
    return self.invoke(input_, config, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^

Number of code-to-text mappings generated: 273....
Code-to-text mappings saved.
Code to text mappings completed for markdown/36_Developing_Apps_coldfusion.md.
Saving chapter...pdf_chunked/37_Developing_Apps_coldfusion.pdf, Pages 784 to 809
Saving markdown markdown/37_Developing_Apps_coldfusion.md...
Saving 60 docs...
markdown/37_Developing_Apps_coldfusion.md markdown saved to db, starting code-to-text mappings...
Number of code-to-text mappings generated: 6....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 12....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 18....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 24....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 30....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 36....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 48....
Code-to-text mappings saved.
Number of code-to-text mappings

Traceback (most recent call last):
  File "/tmp/ipykernel_31828/3555746827.py", line 115, in build_code_to_topics
    responses = llm_tool(sections, CODE_LANGUAGE, code_task, code_instructions)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/ipykernel_31828/3555746827.py", line 29, in llm_tool
    responses = chain.batch(inputs)
                ^^^^^^^^^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 3196, in batch
    inputs = step.batch(
             ^^^^^^^^^^^
  File "/opt/app-root/lib64/python3.11/site-packages/langchain_core/runnables/base.py", line 797, in batch
    return cast("list[Output]", list(executor.map(invoke, inputs, configs)))
                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib64/python3.11/concurrent/futures/_base.py", line 619, in result_iterator
    yield _result_or_cancel(fs.pop())
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  Fil

Number of code-to-text mappings generated: 58....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 64....
Code-to-text mappings saved.
Number of code-to-text mappings generated: 70....
Code-to-text mappings saved.
Code to text mappings completed for markdown/41_Developing_Apps_coldfusion.md.
