In [22]:
import os 

rag_dir = "./Agent/RAG"

if not os.path.exists(rag_dir):
    print(f"Directory {rag_dir} does not exist.")
else:
    print(f"Directory {rag_dir} exists.")

    try:
        files = os.listdir(rag_dir)
        if not files:
            print(f"No files found in directory {rag_dir}")
            vector_store_found = False

        for file in files:
            if "vector_store" in file:
                print(f"Vector store found: {file}")
                vector_store_found = True 
            else:
                print("No vector store found")
                vector_store_found = False

    except PermissionError:
        print(f"Permission denied when accessing directory {rag_dir}")
    except Exception as e:
        print(f"Error accessing directory {rag_dir}: {e}")

Directory ./Agent/RAG exists.
Vector store found: vector_store


In [23]:
import faiss 
import numpy as np
from langchain.vectorstores import FAISS 
from langchain_openai import OpenAIEmbeddings
from openai import OpenAI
from dotenv import load_dotenv
import json
from langchain_community.docstore.in_memory import InMemoryDocstore


load_dotenv(r'C:\Users\axel.grille\Documents\rules-engine-agent\Agent\.env')
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")


client = OpenAI()
embeddings = OpenAIEmbeddings()

embeddings_dim = len(embeddings.embed_query("Hello world")) 
index = faiss.IndexFlatL2(embeddings_dim)

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore({}),
    index_to_docstore_id={}
                    )

In [24]:
#Add items to vector store 

from uuid import uuid4 
from langchain_core.documents import Document 

documents = [] 

with open('Agent/metadata.jsonl', 'r') as f:
    json_list = list(f)

json_QA = [json.loads(json_str) for json_str in json_list]

for item in json_QA:
    content = f"Question: {item['Question']}\n\nFinal answer: {item['Final answer']}" 
    doc = Document(
        page_content=content,
        metadata={
            "source": item["task_id"],
            "level": item["Level"]
        }
    )
    documents.append(doc)

uuids = [str(uuid4()) for _ in range(len(documents))]
vector_store.add_documents(documents=documents, ids=uuids)

print(f"Added {len(documents)} documents to the vector store.")

Added 165 documents to the vector store.


In [25]:
vector_store.save_local("Agent/RAG/vector_store")

DICO REQUEST

In [29]:
import requests
import json

def efficy_api_call():
    session = requests.Session()
    
    try:
        # Login
        login_response = session.post(
            "https://sandbox-5.efficytest.cloud/crm/logon",
            headers={
                'X-Efficy-Customer': 'SANDBOX05',
                'X-Requested-By': 'User',
                'X-Requested-With': 'XMLHttpRequest',
                'Content-Type': 'application/x-www-form-urlencoded'
            },
            data='user=paul&password=Eff1cyDemo!'
        )
        
        if login_response.status_code == 200:
            print("Login successful")
            
            # DICO request
            dico_response = session.get(
                "https://sandbox-5.efficytest.cloud/crm/system/dico",
                headers={
                    'X-Requested-By': 'User',
                    'X-Requested-With': 'XMLHttpRequest'
                }
            )
            
            if dico_response.status_code == 200:
                print("DICO data retrieved")
                return dico_response.json()  # Return parsed JSON
            else:
                print(f"DICO request failed: {dico_response.status_code}")
                
        else:
            print(f"Login failed: {login_response.status_code}")
            
    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
    except json.JSONDecodeError as e:
        print(f"JSON parsing error: {e}")
    
    return None

# Execute the API call
dico_data = efficy_api_call()
if dico_data:
    print("DICO Data:")
    print(json.dumps(dico_data, indent=2))

Login successful
DICO data retrieved
DICO Data:
{
  "data": {
    "fields": {
      "0001000U000003fh": {
        "sfldKey": "0001000U000003fh",
        "sfldKTable": 10,
        "sfldName": "stblKey",
        "sfldQuery": true,
        "sfldGrid": true,
        "sfldDataType": "Key",
        "sfldDefaultValue": "KEYGEN",
        "sfldMassAction": true,
        "sfldType": "FOB",
        "sfldUsage": "K",
        "sfldAvailableInSegments": true
      },
      "0001000U000003fj": {
        "sfldKey": "0001000U000003fj",
        "sfldKTable": 10,
        "sfldName": "stblKTable",
        "sfldQuery": true,
        "sfldGrid": true,
        "sfldDataType": "Integer",
        "sfldMassAction": true,
        "sfldType": "INTEGER",
        "sfldAvailableInSegments": true,
        "isInlineEditAllowed": true
      },
      "0001000U000003fl": {
        "sfldKey": "0001000U000003fl",
        "sfldKTable": 10,
        "sfldName": "stblName",
        "sfldQuery": true,
        "sfldGrid": true,


In [30]:
cleaned_dico = {k:v for k, v in dico_data['data']['tables'].items() if v['stblKind'] in ['E', 'R']}

In [31]:
len(cleaned_dico)

286

In [32]:
from langchain_core.documents import Document

documents = []
 
for elem in cleaned_dico.values():
    content = json.dumps(elem)
    doc = Document(
        page_content=content, 
        metadata={"name": elem['stblName'], "type": ("Relation" if elem['stblKind'] == 'R' else "Table")}
    )
    documents.append(doc)


In [33]:
documents

[Document(metadata={'name': 'SecureToken', 'type': 'Table'}, page_content='{"stblKind": "E", "stblName": "SecureToken", "stblKTable": 10120, "stblCode": "stkn", "stblCrmModule": "administration", "stblSecured": "0"}'),
 Document(metadata={'name': 'TracerEvent', 'type': 'Table'}, page_content='{"stblKind": "E", "stblName": "TracerEvent", "stblKTable": 23000, "stblCode": "trev", "stblCrmModule": "administration", "stblSecured": "0"}'),
 Document(metadata={'name': 'TracerJob', 'type': 'Table'}, page_content='{"stblKind": "E", "stblName": "TracerJob", "stblKTable": 23500, "stblCode": "trjb", "stblCrmModule": "administration", "stblSecured": "0"}'),
 Document(metadata={'name': 'TracerRule', 'type': 'Table'}, page_content='{"stblKind": "E", "stblName": "TracerRule", "stblKTable": 23600, "stblCode": "trru", "stblCrmModule": "administration", "stblSecured": "0"}'),
 Document(metadata={'name': 'LogEvent', 'type': 'Table'}, page_content='{"stblKind": "E", "stblName": "LogEvent", "stblKTable": 24

In [34]:
filtered_items = [item for item in cleaned_dico.values() if item['stblKind'] == 'R']
filtered_items[0:5]

[{'stblKind': 'R',
  'stblName': 'Pipl_Pipa',
  'stblKTable': 35034,
  'stblCode': 'piplpipa',
  'stblCrmModule': 'relations',
  'stblSecured': '0',
  'stblFrontDeleteActive': True,
  'linkedBeans': {'EDIT': [{'detailEntity': 'Pipl_Pipa',
     'relationTableName': 'Pipl_Pipa',
     'bean': 'piplpipa',
     'relation': 'piplpipa'}],
   'CONSULT': [{'detailEntity': 'Pipl_Pipa',
     'relationTableName': 'Pipl_Pipa',
     'bean': 'piplpipa',
     'relation': 'piplpipa'}]}},
 {'stblKind': 'R',
  'stblName': 'Dupe_Dupa',
  'stblKTable': 41541,
  'stblCode': 'dupedupa',
  'stblCrmModule': 'relations',
  'stblSecured': '0',
  'stblFrontDeleteActive': True},
 {'stblKind': 'R',
  'stblName': 'Layo_User',
  'stblKTable': 59010,
  'stblCode': 'layouser',
  'stblCrmModule': 'relations',
  'stblSecured': '0',
  'stblFrontDeleteActive': True},
 {'stblKind': 'R',
  'stblName': 'Rqac_Rqat',
  'stblKTable': 62061,
  'stblCode': 'rqacrqat',
  'stblCrmModule': 'relations',
  'stblSecured': '0',
  'stblFr

In [35]:
from qdrant_client import QdrantClient, models
from qdrant_client.models import PointStruct 
import uuid

qdrant_client = QdrantClient(
    url="https://456cac0f-558d-40b4-ab89-e103423d7d7e.eu-central-1-0.aws.cloud.qdrant.io:6333", 
    api_key="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIiwiZXhwIjoxODE3MzkwMzg0fQ.Hi3c7w5CVjn_Xdpvh4Z9HEEYPbQXXPznxuXKyEEVTLg",
)

print(qdrant_client.get_collections())

collections=[]


In [36]:

encoder = OpenAIEmbeddings()

if "maxo_vector_store" not in [col.name for col in qdrant_client.get_collections().collections]:
    qdrant_client.create_collection(
        collection_name="maxo_vector_store",
        vectors_config=models.VectorParams(
            size = len(encoder.embed_query("Hello world")),
            distance=models.Distance.COSINE,
        ),
    )
    print(f"Collection {qdrant_client.get_collection} created. ")
else: 
    print(f"maxo_vector_store already exists.")


Collection <bound method QdrantClient.get_collection of <qdrant_client.qdrant_client.QdrantClient object at 0x000001DB5674F5B0>> created. 


In [None]:
# Clear vector store

# qdrant_client.delete_collection(collection_name="maxo_vector_store")

In [38]:
documents[0]

Document(metadata={'name': 'SecureToken', 'type': 'Table'}, page_content='{"stblKind": "E", "stblName": "SecureToken", "stblKTable": 10120, "stblCode": "stkn", "stblCrmModule": "administration", "stblSecured": "0"}')

In [39]:
points = []

for doc in documents: 
    embedding = encoder.embed_query(doc.page_content)
    point = PointStruct(
        id=str(uuid.uuid4()),
        vector=embedding, 
        payload={
            "content": doc.page_content,
            "metadata": doc.metadata
        }
    )
    points.append(point)


try:
    qdrant_client.upsert(
        collection_name="maxo_vector_store", 
        points=points,
    )
    print(f"Upserted {len(points)} points.")
except Exception as e:
    print(f"Error during upsert: {e}")

Upserted 286 points.


In [45]:
question = "What is the relationship between Contact and Company?"

question_embedding = encoder.embed_query(question)

search_result = qdrant_client.query_points(
    collection_name="maxo_vector_store",
    query=question_embedding,
)

In [46]:
results = []
for point in search_result.points:
    score = point.score
    payload = point.payload['content']

    results.append((score, payload))

print(results)

[(0.7774315, '{"stblKind": "E", "stblName": "Contact", "stblKTable": 101000, "stblCode": "cont", "stblCrmModule": "core_crm", "stblIndexSearch": true, "stblIndexActive": true, "stblNotify": true, "stblFavorite": true, "stblSecured": "1", "stblFrontIcon": "user-vneck", "stblFrontSidePanelActive": true, "stblFrontHomePageActive": true, "stblFrontObjectPageActive": true, "stblFrontDeleteActive": true, "stblImportEnabled": true, "linkedBeans": {"EDIT": [{"detailEntity": "User", "relationTableName": "Cont_User", "bean": "user", "relation": "contuser"}, {"detailEntity": "Comp", "relationTableName": "Cont_Comp", "bean": "comp", "relation": "contcomp"}, {"detailEntity": "Cont", "relationTableName": "Cont_Cont", "bean": "cont", "relation": "contcont"}, {"detailEntity": "Address", "relationTableName": "Cont_Addr", "bean": "addr", "relation": "contaddr"}, {"detailEntity": "ExtranetApp", "relationTableName": "Extr_Cont", "bean": "extr", "relation": "extrcont"}, {"detailEntity": "Cnst", "relationTa

In [47]:
def extract_best_result(query_response):
    """Extract the best (highest scoring) result from Qdrant response"""
    if not query_response.points:
        return None
    
    # Get the first point (highest score)
    best_point = query_response.points[0]
    
    return {
        'id': best_point.id,
        'score': best_point.score,
        'content': best_point.payload['content'],
        'metadata': best_point.payload['metadata'],
        'parsed_content': json.loads(best_point.payload['content'])  
    }

def extract_all_results(query_response, min_score=0.7):
    """Extract all results above a certain score threshold"""
    results = []
    
    for point in query_response.points:
        if point.score >= min_score:
            results.append({
                'id': point.id,
                'score': point.score,
                'name': point.payload['metadata']['name'],
                'type': point.payload['metadata']['type'],
                'parsed_content': json.loads(point.payload['content'])
            })
    
    return results



best_result = extract_best_result(search_result)
print("Best Result:")
print(f"Name: {best_result['metadata']['name']}")
print(f"Type: {best_result['metadata']['type']}")
print(f"Score: {best_result['score']:.4f}")
print(f"Table Code: {best_result['parsed_content']['stblCode']}")

print("\nAll Results:")
all_results = extract_all_results(search_result, min_score=0.75)
for result in all_results:
    print(f"- {result['name']} ({result['type']}) - Score: {result['score']:.4f}")

Best Result:
Name: Contact
Type: Table
Score: 0.7774
Table Code: cont

All Results:
- Contact (Table) - Score: 0.7774
- Cont_Cont (Relation) - Score: 0.7645
- Prof_Cont (Relation) - Score: 0.7629
- Cont_Comp (Relation) - Score: 0.7625
- Proj_Cont (Relation) - Score: 0.7605
- Prod_Cont (Relation) - Score: 0.7600
- Daqu_Cont (Relation) - Score: 0.7574
- Docu_Cont (Relation) - Score: 0.7566
- Kbas_Cont (Relation) - Score: 0.7554
- Cont_Addr (Relation) - Score: 0.7544


In [None]:
best_result = extract_best_result(search_result)
print("BEST MATCH:")
print(f"Name: {best_result['metadata']['name']}")
print(f"Type: {best_result['metadata']['type']}")
print(f"Score: {best_result['score']:.4f}")
print(f"Table Code: {best_result['parsed_content']['stblCode']}")
print(f"Module: {best_result['parsed_content'].get('stblCrmModule', 'N/A')}")

# Show the raw content for debugging
print("\nRaw Content:")
print(json.dumps(best_result['parsed_content'], indent=2))

# Get all good results
print("\nALL RELEVANT RESULTS (Score > 0.75):")
good_results = extract_all_results(search_result, min_score=0.75)
for i, result in enumerate(good_results, 1):
    print(f"{i}. {result['name']} ({result['type']}) - Score: {result['score']:.4f}")
    if 'stblCode' in result['parsed_content']:
        print(f"   Code: {result['parsed_content']['stblCode']}")
    print()

BEST MATCH:
Name: Contact
Type: Table
Score: 0.7774
Table Code: cont
Module: core_crm

📄 Raw Content:
{
  "stblKind": "E",
  "stblName": "Contact",
  "stblKTable": 101000,
  "stblCode": "cont",
  "stblCrmModule": "core_crm",
  "stblIndexSearch": true,
  "stblIndexActive": true,
  "stblNotify": true,
  "stblFavorite": true,
  "stblSecured": "1",
  "stblFrontIcon": "user-vneck",
  "stblFrontSidePanelActive": true,
  "stblFrontHomePageActive": true,
  "stblFrontObjectPageActive": true,
  "stblFrontDeleteActive": true,
  "stblImportEnabled": true,
  "linkedBeans": {
    "EDIT": [
      {
        "detailEntity": "User",
        "relationTableName": "Cont_User",
        "bean": "user",
        "relation": "contuser"
      },
      {
        "detailEntity": "Comp",
        "relationTableName": "Cont_Comp",
        "bean": "comp",
        "relation": "contcomp"
      },
      {
        "detailEntity": "Cont",
        "relationTableName": "Cont_Cont",
        "bean": "cont",
        "relation":

In [44]:
all_results

[{'id': '73217f1c-00ea-4fa1-80cb-c29b884f3168',
  'score': 0.7774315,
  'name': 'Contact',
  'type': 'Table',
  'parsed_content': {'stblKind': 'E',
   'stblName': 'Contact',
   'stblKTable': 101000,
   'stblCode': 'cont',
   'stblCrmModule': 'core_crm',
   'stblIndexSearch': True,
   'stblIndexActive': True,
   'stblNotify': True,
   'stblFavorite': True,
   'stblSecured': '1',
   'stblFrontIcon': 'user-vneck',
   'stblFrontSidePanelActive': True,
   'stblFrontHomePageActive': True,
   'stblFrontObjectPageActive': True,
   'stblFrontDeleteActive': True,
   'stblImportEnabled': True,
   'linkedBeans': {'EDIT': [{'detailEntity': 'User',
      'relationTableName': 'Cont_User',
      'bean': 'user',
      'relation': 'contuser'},
     {'detailEntity': 'Comp',
      'relationTableName': 'Cont_Comp',
      'bean': 'comp',
      'relation': 'contcomp'},
     {'detailEntity': 'Cont',
      'relationTableName': 'Cont_Cont',
      'bean': 'cont',
      'relation': 'contcont'},
     {'detailEntit

In [53]:
chat = client.chat.completions.create(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are an assistant that helps with database schema analysis. You must find the best matching table or relation for a given question."
        "The goal of this model is to help users mapping the unstructured input data to the existing relational database schema."},
        {"role": "user", "content": f"What is the best matching table or relation for the following question: {question}." 
         f"Here are the best matching tables or relations found in the database schema: {json.dumps(all_results, indent=2)}."
         "Provide only the name of the best matching table or relation and a brief explanation of why it is the best match."}
    ],

)

print(chat.choices[0].message.content)

The best matching table or relation is "Cont_Comp". This relation directly describes the connection between the "Contact" and "Company" entities, as implied by its name and the parsed content indicating an existing relationship table specifically combining these two entities.


<h2> Fields mapping in dico response

In [86]:
import pandas as pd

fields_dict = dico_data['data']['fields']
fields_df = pd.DataFrame.from_dict(fields_dict.values(), orient='columns')

tables_dict = dico_data['data']['tables']
tables_df = pd.DataFrame.from_dict(tables_dict.values(), orient='columns')
tables_df = tables_df[tables_df['stblKTable'].notna()]

In [87]:
tables_df

Unnamed: 0,stblKind,stblName,stblCode,stblSecured,stblFrontIcon,stblFrontSidePanelActive,stblFrontHomePageActive,stblFrontObjectPageActive,stblFrontMappedTableCode,stblFrontDeleteActive,stblKTable,stblCrmModule,stblPrivilege,stblIndexSearch,stblIndexActive,stblFavorite,linkedBeans,stblNotify,stblImportEnabled,categoryParentKTable
43,S,SysTable,stbl,0,table,True,True,True,,,10.0,administration,8.0,,,,,,,
44,S,SysCategory,sctg,0,,,,,,,20.0,administration,8.0,,,,,,,
45,S,SysField,sfld,0,field,True,True,,,,30.0,administration,8.0,,,,,,,
46,S,SysRelation,srel,0,,,True,True,,,40.0,administration,8.0,,,,,,,
47,S,SysIndex,sidx,0,,,,,,,50.0,administration,8.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
404,E,PopulationDependency,pode,2,,,,,,,434000.0,marketing,,,,,,,,
405,E,AIPrompt,aipr,0,sparkles,True,True,,,,435000.0,,,,,,,,,
406,E,ExternalLink,elik,0,,,,,,,604170.0,administration,,,,,,,,
407,R,Fldr_Rule,fldrrule,0,,,,,,True,604180.0,relations,,,,,,,,


In [99]:
tables_df[(tables_df['stblKind'] == 'R') & (tables_df['linkedBeans'].notna())].head(4)

Unnamed: 0,stblKind,stblName,stblCode,stblSecured,stblFrontIcon,stblFrontSidePanelActive,stblFrontHomePageActive,stblFrontObjectPageActive,stblFrontMappedTableCode,stblFrontDeleteActive,stblKTable,stblCrmModule,stblPrivilege,stblIndexSearch,stblIndexActive,stblFavorite,linkedBeans,stblNotify,stblImportEnabled,categoryParentKTable
103,R,Pipl_Pipa,piplpipa,0,,,,,,True,35034.0,relations,,,,,"{'EDIT': [{'detailEntity': 'Pipl_Pipa', 'relat...",,,
153,R,Comp_Addr,compaddr,0,,,,,,True,100072.0,relations,,,,,"{'EDIT': [{'detailEntity': 'Comp_Addr', 'relat...",,,
156,R,Comp_Bank,compbank,0,,,,,,True,100086.0,relations,,,,,"{'EDIT': [{'detailEntity': 'Comp_Bank', 'relat...",,,
157,R,Comp_Pymt,comppymt,0,,,,,,True,100087.0,relations,,,,,"{'EDIT': [{'detailEntity': 'Comp_Pymt', 'relat...",,,


In [108]:
tables_df[tables_df['stblKTable'] == 35034.0]['linkedBeans'].to_dict()

{103: {'EDIT': [{'detailEntity': 'Pipl_Pipa',
    'relationTableName': 'Pipl_Pipa',
    'bean': 'piplpipa',
    'relation': 'piplpipa'}],
  'CONSULT': [{'detailEntity': 'Pipl_Pipa',
    'relationTableName': 'Pipl_Pipa',
    'bean': 'piplpipa',
    'relation': 'piplpipa'}]}}

In [83]:
fields_df

Unnamed: 0,sfldKey,sfldKTable,sfldName,sfldQuery,sfldGrid,sfldDataType,sfldDefaultValue,sfldMassAction,sfldType,sfldUsage,...,isInlineEditAllowed,sfldDataLength,sfldAllowNull,sfldFobKTable,sfldRefrTable,sfldRefrField,sfldMultivalue,sfldIsCustomCreated,sfldLabelPos,sfldTranslate
0,0001000U000003fh,10,stblKey,True,True,Key,KEYGEN,True,FOB,K,...,,,,,,,,,,
1,0001000U000003fj,10,stblKTable,True,True,Integer,,True,INTEGER,,...,True,,,,,,,,,
2,0001000U000003fl,10,stblName,True,True,Alphanumeric,,True,ALPHANUMERIC,,...,True,30.0,,,,,,,,
3,0001000U000003fn,10,stblSysCreatedDate,True,True,Date,DATEGMT,True,DATE,,...,True,,,,,,,,,
4,0001000U000003fp,10,stblSysUpdatedDate,True,True,Date,DATEGMT,True,DATE,,...,True,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4213,0001000U000003nj,604180,fldrruleSysUpdatedDate,True,True,DateTime,DATEGMT,True,DATE,,...,True,,True,,,,,True,,
4214,0001000U000003nl,604180,fldrruleMain,True,True,Logical,0,True,BOOLEAN,,...,True,,True,,,,,True,,
4215,0001000U000003nn,604180,fldrruleRuleKey,True,True,Key,KEYGEN,True,FOB,K,...,,,,,,,,True,,
4216,0001000U000003np,604180,fldrruleFolderKey,True,True,Key,KEYGEN,True,FOB,K,...,,,,,,,,True,,


In [79]:
compcomp_vue = fields_df[fields_df['sfldKTable'] == 100100]
columns_list = compcomp_vue.columns.tolist()
columns_list

['sfldKey',
 'sfldKTable',
 'sfldName',
 'sfldQuery',
 'sfldGrid',
 'sfldDataType',
 'sfldDefaultValue',
 'sfldMassAction',
 'sfldType',
 'sfldUsage',
 'sfldAvailableInSegments',
 'isInlineEditAllowed',
 'sfldDataLength',
 'sfldAllowNull',
 'sfldFobKTable',
 'sfldRefrTable',
 'sfldRefrField',
 'sfldMultivalue',
 'sfldIsCustomCreated',
 'sfldLabelPos',
 'sfldTranslate']

In [None]:
common_cols = [col for col in (tables_df.columns.tolist()) and (fields_df.columns.tolist())]

['sfldKey',
 'sfldKTable',
 'sfldName',
 'sfldQuery',
 'sfldGrid',
 'sfldDataType',
 'sfldDefaultValue',
 'sfldMassAction',
 'sfldType',
 'sfldUsage',
 'sfldAvailableInSegments',
 'isInlineEditAllowed',
 'sfldDataLength',
 'sfldAllowNull',
 'sfldFobKTable',
 'sfldRefrTable',
 'sfldRefrField',
 'sfldMultivalue',
 'sfldIsCustomCreated',
 'sfldLabelPos',
 'sfldTranslate']

In [120]:
from Agent.chunk_generator import generate_table_ingestion_chunks

table_chunks = generate_table_ingestion_chunks(dico_data, tables_df, fields_df) 

len(table_chunks)

286

In [127]:
table_chunks[200].metadata

{'chunk_type': 'table_ingestion_profile',
 'primary_table': 'Repo_Subs',
 'table_code': 'reposubs',
 'table_kind': 'Relation',
 'field_count': 8}

In [122]:

table_chunks[0].page_content[:500]

'=== TABLE INGESTION PROFILE: SecureToken (stkn) ===\nType: Entity Table\nPurpose: <ADD BUSINESS PURPOSE HERE IF KNOWN>\n\n# SCHEMA & CONSTRAINTS\nTotal fields: 8\nRequired fields: (none detected)\n\n# FIELD DEFINITIONS\n- stknKey (None): FOB | NULLABLE\n- stknControlString (None): MEMO | NULLABLE\n- stknTokenUsage (None): MEMO | NULLABLE\n- stknSysCreatedUserKey (None): FOB | NULLABLE\n- stknSysUpdatedUserKey (None): FOB | NULLABLE\n- stknSysCreatedDate (None): DATE | NULLABLE\n- stknSysUpdatedDate (None): DAT'