constructive-io · pyramation · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/graphile/node-type-registry/src/blueprint-types.generated.ts b/graphile/node-type-registry/src/blueprint-types.generated.ts
@@ -88,6 +88,18 @@ export interface DataEmbeddingParams {
   job_task_name?: string;
   /* Strategy for tracking embedding staleness. column: embedding_stale boolean. null: set embedding to NULL. hash: md5 hash of source fields. */
   stale_strategy?: "column" | "null" | "hash";
+  /* Chunking configuration for long-text embedding. Creates an embedding_chunks record that drives automatic text splitting and per-chunk embedding. Omit to skip chunking. */
+  chunks?: {
+    /* Name of the text content column in the chunks table */content_field_name?: string;
+    /* Maximum number of characters per chunk */chunk_size?: number;
+    /* Number of overlapping characters between consecutive chunks */chunk_overlap?: number;
+    /* Strategy for splitting text into chunks */chunk_strategy?: "fixed" | "sentence" | "paragraph" | "semantic";
+    /* Metadata fields from parent to copy into chunks */metadata_fields?: {
+      [key: string]: unknown;
+    };
+    /* Whether to auto-enqueue a chunking job on insert/update */enqueue_chunking_job?: boolean;
+    /* Task identifier for the chunking job queue */chunking_task_name?: string;
+  };
 }
 /** Adds a tsvector column with GIN index and automatic trigger population from source fields. Enables PostgreSQL full-text search with configurable weights and language support. Leverages the existing metaschema full_text_search infrastructure. */
 export interface DataFullTextSearchParams {
@@ -143,6 +155,17 @@ export interface DataSearchParams {
     metric?: "cosine" | "l2" | "ip";
     source_fields?: string[];
     search_score_weight?: number;
+    /* Chunking configuration for long-text embedding. Creates an embedding_chunks record that drives automatic text splitting and per-chunk embedding. Omit to skip chunking. */chunks?: {
+      /* Name of the text content column in the chunks table */content_field_name?: string;
+      /* Maximum number of characters per chunk */chunk_size?: number;
+      /* Number of overlapping characters between consecutive chunks */chunk_overlap?: number;
+      /* Strategy for splitting text into chunks */chunk_strategy?: "fixed" | "sentence" | "paragraph" | "semantic";
+      /* Metadata fields from parent to copy into chunks */metadata_fields?: {
+        [key: string]: unknown;
+      };
+      /* Whether to auto-enqueue a chunking job on insert/update */enqueue_chunking_job?: boolean;
+      /* Task identifier for the chunking job queue */chunking_task_name?: string;
+    };
   };
   /* Field names to tag with @trgmSearch for fuzzy/typo-tolerant matching */
   trgm_fields?: string[];

diff --git a/graphile/node-type-registry/src/data/data-embedding.ts b/graphile/node-type-registry/src/data/data-embedding.ts
@@ -74,6 +74,52 @@ export const DataEmbedding: NodeTypeDefinition = {
         ],
         "description": "Strategy for tracking embedding staleness. column: embedding_stale boolean. null: set embedding to NULL. hash: md5 hash of source fields.",
         "default": "column"
+      },
+      "chunks": {
+        "type": "object",
+        "description": "Chunking configuration for long-text embedding. Creates an embedding_chunks record that drives automatic text splitting and per-chunk embedding. Omit to skip chunking.",
+        "properties": {
+          "content_field_name": {
+            "type": "string",
+            "description": "Name of the text content column in the chunks table",
+            "default": "content"
+          },
+          "chunk_size": {
+            "type": "integer",
+            "description": "Maximum number of characters per chunk",
+            "default": 1000
+          },
+          "chunk_overlap": {
+            "type": "integer",
+            "description": "Number of overlapping characters between consecutive chunks",
+            "default": 200
+          },
+          "chunk_strategy": {
+            "type": "string",
+            "enum": [
+              "fixed",
+              "sentence",
+              "paragraph",
+              "semantic"
+            ],
+            "description": "Strategy for splitting text into chunks",
+            "default": "fixed"
+          },
+          "metadata_fields": {
+            "type": "object",
+            "description": "Metadata fields from parent to copy into chunks"
+          },
+          "enqueue_chunking_job": {
+            "type": "boolean",
+            "description": "Whether to auto-enqueue a chunking job on insert/update",
+            "default": true
+          },
+          "chunking_task_name": {
+            "type": "string",
+            "description": "Task identifier for the chunking job queue",
+            "default": "generate_chunks"
+          }
+        }
       }
     }
   },

diff --git a/graphile/node-type-registry/src/data/data-search.ts b/graphile/node-type-registry/src/data/data-search.ts
@@ -108,6 +108,52 @@ export const DataSearch: NodeTypeDefinition = {
           "search_score_weight": {
             "type": "number",
             "default": 1
+          },
+          "chunks": {
+            "type": "object",
+            "description": "Chunking configuration for long-text embedding. Creates an embedding_chunks record that drives automatic text splitting and per-chunk embedding. Omit to skip chunking.",
+            "properties": {
+              "content_field_name": {
+                "type": "string",
+                "description": "Name of the text content column in the chunks table",
+                "default": "content"
+              },
+              "chunk_size": {
+                "type": "integer",
+                "description": "Maximum number of characters per chunk",
+                "default": 1000
+              },
+              "chunk_overlap": {
+                "type": "integer",
+                "description": "Number of overlapping characters between consecutive chunks",
+                "default": 200
+              },
+              "chunk_strategy": {
+                "type": "string",
+                "enum": [
+                  "fixed",
+                  "sentence",
+                  "paragraph",
+                  "semantic"
+                ],
+                "description": "Strategy for splitting text into chunks",
+                "default": "fixed"
+              },
+              "metadata_fields": {
+                "type": "object",
+                "description": "Metadata fields from parent to copy into chunks"
+              },
+              "enqueue_chunking_job": {
+                "type": "boolean",
+                "description": "Whether to auto-enqueue a chunking job on insert/update",
+                "default": true
+              },
+              "chunking_task_name": {
+                "type": "string",
+                "description": "Task identifier for the chunking job queue",
+                "default": "generate_chunks"
+              }
+            }
           }
         }
       },

diff --git a/graphql/query/src/introspect/infer-tables.ts b/graphql/query/src/introspect/infer-tables.ts
@@ -540,14 +540,11 @@ function inferHasManyOrManyToMany(
   const isManyToMany = field.name.includes('By') && field.name.includes('And');
 
   if (isManyToMany) {
-    // For ManyToMany, extract the actual entity name from the field name prefix
-    // Field name pattern: {relatedEntities}By{JunctionTable}{Keys}
-    // e.g., "usersByMembershipActorIdAndEntityId" → "users" → "User"
-    // e.g., "productsByOrderItemOrderIdAndProductId" → "products" → "Product"
-    const prefixMatch = field.name.match(/^([a-z]+)By/i);
-    const actualEntityName = prefixMatch
-      ? singularize(ucFirst(prefixMatch[1]))
-      : relatedEntityName;
+    // Use the entity name already resolved from the connection type mapping.
+    // This is more reliable than re-singularizing from the field name prefix,
+    // which can produce incorrect inflections (e.g. "codebases" → "Codebasis"
+    // instead of the correct "Codebase").
+    const actualEntityName = relatedEntityName;
 
     // Try to extract junction table from field name
     // Pattern: {relatedEntities}By{JunctionTable}{Keys}