In [None]:
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim
import numpy as np
import json
import faiss
from pathlib import Path
from tqdm import tqdm
from openai import OpenAI

## Chunking, Generating Embeddings, Vectorization

In [21]:
emb_models = ["nomic-ai/nomic-embed-text-v1", "jinaai/jina-embeddings-v2-small-en"]
model = SentenceTransformer(emb_models[0], trust_remote_code = True)

  state_dict = loader(resolved_archive_file)
<All keys matched successfully>


In [22]:
#This is for sanity testing, ignore this file v
markdown_path = Path("./postgre_chunks/accumulate.md")
markdown_text = markdown_path.read_text(encoding="utf-8")

#This is what we are using
chunks_path = Path("./postgre_chunks")
chunk_texts = []

id_count = 0

for file_path in chunks_path.glob("*.txt"):
    text = file_path.read_text(encoding="utf-8")
    chunk_texts.append({
        "id": id_count,
        "type" : "code",
        "content": text.strip()
    })
    id_count += 1

print(f"Loaded {len(chunk_texts)} chunks")


Loaded 4 chunks


In [23]:

def embed_texts(texts: list[str]) -> np.ndarray:
    vectors = model.encode(texts, normalize_embeddings=True, convert_to_numpy=True)
    return vectors

all_texts = [chunk["content"] for chunk in chunk_texts]
all_embeddings = embed_texts(all_texts)

In [13]:
vectors_np=np.array(all_embeddings).astype("float32")
dimension = vectors_np.shape[1]
index = faiss.IndexFlatIP(dimension)
index.add(vectors_np)

metadata_map = {i: chunk_texts[i] for i in range(len(chunk_texts))}

faiss.write_index(index, "rag_index.faiss")
with open("metadata_map.json","w") as f:
    json.dump(metadata_map, f)

print("embeddings stored in faiss, metadata saved.")    

embeddings stored in faiss, metadata saved.


## Retrieval

In [26]:
# Load index
index = faiss.read_index("rag_index.faiss")
 
# Load metadata
with open("metadata_map.json", "r") as f:
    metadata_map = json.load(f)

def retrieve_docs(query, k, metadata_map):
    # Embed a query
    query_vec = model.encode([query], normalize_embeddings=True).astype("float32")
    
    # Search top-5
    D, I = index.search(query_vec, k = k)

    retrieved_chunks = []
    # Lookup original chunks
    for idx in I[0]:
        chunk = metadata_map[str(idx)]
        retrieved_chunks.append(chunk['content'])
        print(f"\n[{chunk['type']}] → {chunk['content']}")

    return retrieved_chunks


In [27]:
retrieve_docs("How is card name generated?", 2, metadata_map)


[code] → Function getCardName Returns Char ():

   If Available credit-card Then
      Return '<INPUT TYPE="TEXT" CLASS="txtField" NAME="name" SIZE="20" MAXLENGTH="30" VALUE="' + credit-card.card-holder + '" tabindex="5">'. /* 030000 */
   Else
      Return '<INPUT TYPE="TEXT" CLASS="txtField" NAME="name" SIZE="20" MAXLENGTH="30" VALUE="' + v-addr-name + '" tabindex="5">'. /* 030000 */

End Function. /* getCardName() */

[code] → Function getBankName Returns Char ():

   Return '<INPUT TYPE="TEXT" CLASS="txtField" NAME="bankname" SIZE="20" MAXLENGTH="35" VALUE="' + v-bank-name + '" ' + v-disabled + ' tabindex="5">'. /* 010001 */

End Function. /* getCardExpiryYear() */


['Function getCardName Returns Char ():\n\n   If Available credit-card Then\n      Return \'<INPUT TYPE="TEXT" CLASS="txtField" NAME="name" SIZE="20" MAXLENGTH="30" VALUE="\' + credit-card.card-holder + \'" tabindex="5">\'. /* 030000 */\n   Else\n      Return \'<INPUT TYPE="TEXT" CLASS="txtField" NAME="name" SIZE="20" MAXLENGTH="30" VALUE="\' + v-addr-name + \'" tabindex="5">\'. /* 030000 */\n\nEnd Function. /* getCardName() */',
 'Function getBankName Returns Char ():\n\n   Return \'<INPUT TYPE="TEXT" CLASS="txtField" NAME="bankname" SIZE="20" MAXLENGTH="35" VALUE="\' + v-bank-name + \'" \' + v-disabled + \' tabindex="5">\'. /* 010001 */\n\nEnd Function. /* getCardExpiryYear() */']

In [None]:
#MAKE SURE TO ADD CLIENT WITH API KEY HERE!!

def query_llm_with_ret_context(client, query, model, k):
    query = "How is card name generated?"

    retrieved_chunks = retrieve_docs(query, k, metadata_map)

    print(f"Query:\n\n{query}\n\nRetrieved Chunks:\n\n{retrieved_chunks}\n\nModel Response:")

    context = "\n\n---\n\n".join(retrieved_chunks)
    prompt = f"""You are a Progress 4GL expert. Use only the following documentation to answer the user's question.

    Context:
    {context}

    ---

    Question: {query}

    Answer:"""

    # 🧾 Send request to OpenAI's chat model
    response = client.chat.completions.create(
        model=model,  # or "gpt-4o" if you have access
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message.content



In [32]:
print(query_llm_with_ret_context(client, "How is card name generated?", "gpt-3.5-turbo", 2))


[code] → Function getCardName Returns Char ():

   If Available credit-card Then
      Return '<INPUT TYPE="TEXT" CLASS="txtField" NAME="name" SIZE="20" MAXLENGTH="30" VALUE="' + credit-card.card-holder + '" tabindex="5">'. /* 030000 */
   Else
      Return '<INPUT TYPE="TEXT" CLASS="txtField" NAME="name" SIZE="20" MAXLENGTH="30" VALUE="' + v-addr-name + '" tabindex="5">'. /* 030000 */

End Function. /* getCardName() */

[code] → Function getBankName Returns Char ():

   Return '<INPUT TYPE="TEXT" CLASS="txtField" NAME="bankname" SIZE="20" MAXLENGTH="35" VALUE="' + v-bank-name + '" ' + v-disabled + ' tabindex="5">'. /* 010001 */

End Function. /* getCardExpiryYear() */
Query:

How is card name generated?

Retrieved Chunks:

['Function getCardName Returns Char ():\n\n   If Available credit-card Then\n      Return \'<INPUT TYPE="TEXT" CLASS="txtField" NAME="name" SIZE="20" MAXLENGTH="30" VALUE="\' + credit-card.card-holder + \'" tabindex="5">\'. /* 030000 */\n   Else\n      Return \'<IN