commontoolsinc · bfollington · Mar 4, 2025 · Mar 4, 2025 · Mar 4, 2025
diff --git a/...kages/toolshed/routes/ai/spell/fulfill.ts → ...lshed/routes/ai/spell/handlers/fulfill.ts b/...kages/toolshed/routes/ai/spell/fulfill.ts → ...lshed/routes/ai/spell/handlers/fulfill.ts
@@ -1,19 +1,85 @@
-import { getAllBlobs, getBlob } from "@/routes/ai/spell/behavior/effects.ts";
-import { generateText } from "@/lib/llm.ts";
-import { performSearch } from "@/routes/ai/spell/behavior/search.ts";
-import { checkSchemaMatch } from "@/lib/schema-match.ts";
+import * as HttpStatusCodes from "stoker/http-status-codes";
+import { z } from "zod";
+import { getAllBlobs } from "@/routes/ai/spell/behavior/effects.ts";
+
+import type { AppRouteHandler } from "@/lib/types.ts";
+import type { FulfillSchemaRoute } from "@/routes/ai/spell/spell.routes.ts";
+import { Spell } from "@/routes/ai/spell/spell.ts";
+import { performSearch } from "../behavior/search.ts";
 import { Logger } from "@/lib/prefixed-logger.ts";
-import {
-  ProcessSchemaRequest,
-  ProcessSchemaResponse,
-} from "@/routes/ai/spell/spell.handlers.ts";
+import { candidates } from "@/routes/ai/spell/caster.ts";
+import { CasterSchemaRoute } from "@/routes/ai/spell/spell.routes.ts";
+import { processSpellSearch } from "@/routes/ai/spell/behavior/spell-search.ts";
+import { captureException } from "@sentry/deno";
+import { areSchemaCompatible } from "@/routes/ai/spell/schema-compatibility.ts";
+
+import { generateText } from "@/lib/llm.ts";
 import {
   decomposeSchema,
   findExactMatches,
   findFragmentMatches,
   reassembleFragments,
   SchemaFragment,
 } from "@/routes/ai/spell/schema.ts";
+import { extractJSON } from "@/routes/ai/spell/json.ts";
+
+export const FulfillSchemaRequestSchema = z.object({
+  schema: z.record(
+    z
+      .string()
+      .or(
+        z.number().or(z.boolean().or(z.array(z.any()).or(z.record(z.any())))),
+      ),
+  ).openapi({
+    example: {
+      title: { type: "string" },
+      url: { type: "string" },
+    },
+  }),
+  tags: z.array(z.string()).optional(),
+  many: z.boolean().optional(),
+  prompt: z.string().optional(),
+  options: z
+    .object({
+      format: z.enum(["json", "yaml"]).optional(),
+      validate: z.boolean().optional(),
+      maxExamples: z.number().default(5).optional(),
+      exact: z.boolean().optional(),
+    })
+    .optional(),
+});
+
+export const FulfillSchemaResponseSchema = z.object({
+  result: z.union([z.record(z.any()), z.array(z.record(z.any()))]),
+  metadata: z.object({
+    processingTime: z.number(),
+    schemaFormat: z.string(),
+    fragments: z.array(
+      z.object({
+        matches: z.array(
+          z.object({
+            key: z.string(),
+            data: z.record(z.any()),
+            similarity: z.number(),
+          }),
+        ),
+        path: z.array(z.string()),
+        schema: z.record(z.any()),
+      }),
+    ),
+    reassembledExample: z.record(z.any()),
+    tagMatchInfo: z.object({
+      usedTags: z.any(),
+      matchRanks: z.array(z.object({
+        path: z.any(),
+        matches: z.any(),
+      })),
+    }),
+  }),
+});
+
+export type FulfillSchemaRequest = z.infer<typeof FulfillSchemaRequestSchema>;
+export type FulfillSchemaResponse = z.infer<typeof FulfillSchemaResponseSchema>;
 
 function calculateTagRank(
   data: Record<string, unknown>,
@@ -34,10 +100,10 @@ function calculateTagRank(
 }
 
 export async function processSchema(
-  body: ProcessSchemaRequest,
+  body: FulfillSchemaRequest,
   logger: Logger,
   startTime: number,
-): Promise<ProcessSchemaResponse> {
+): Promise<FulfillSchemaResponse> {
   const tags = body.tags || [];
   logger.info(
     { schema: body.schema, many: body.many, options: body.options, tags },
@@ -162,28 +228,6 @@ export async function processSchema(
   );
 
   let result: Record<string, unknown> | Array<Record<string, unknown>>;
-  function extractJSON(
-    text: string,
-  ): Record<string, unknown> | Array<Record<string, unknown>> {
-    try {
-      // Try to extract from markdown code block first
-      const markdownMatch = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/);
-      if (markdownMatch) {
-        return JSON.parse(markdownMatch[1].trim());
-      }
-
-      // If not in markdown, try to find JSON-like content
-      const jsonMatch = text.match(/\{[\s\S]*\}|\[[\s\S]*\]/);
-      if (jsonMatch) {
-        return JSON.parse(jsonMatch[0].trim());
-      }
-
-      // If no special formatting, try parsing the original text
-      return JSON.parse(text.trim());
-    } catch (error) {
-      return {};
-    }
-  }
 
   try {
     logger.debug("Parsing LLM response");
@@ -318,3 +362,26 @@ Respond with ${
     many ? "an array of valid JSON objects" : "a single valid JSON object"
   }.`;
 }
+
+export const fulfill: AppRouteHandler<FulfillSchemaRoute> = async (c) => {
+  const logger: Logger = c.get("logger");
+  const body = (await c.req.json()) as FulfillSchemaRequest;
+  const startTime = performance.now();
+
+  try {
+    const response = await processSchema(body, logger, startTime);
+
+    logger.info(
+      { processingTime: response.metadata.processingTime },
+      "Request completed",
+    );
+    return c.json(response, HttpStatusCodes.OK);
+  } catch (error) {
+    logger.error({ error }, "Error processing schema");
+    captureException(error);
+    return c.json(
+      { error: "Failed to process schema" },
+      HttpStatusCodes.INTERNAL_SERVER_ERROR,
+    );
+  }
+};
diff --git a/typescript/packages/toolshed/routes/ai/spell/handlers/imagine.ts b/typescript/packages/toolshed/routes/ai/spell/handlers/imagine.ts
@@ -0,0 +1,176 @@
+import { generateText } from "@/lib/llm.ts";
+import { Logger } from "@/lib/prefixed-logger.ts";
+import { extractJSON } from "@/routes/ai/spell/json.ts";
+import * as HttpStatusCodes from "stoker/http-status-codes";
+import { z } from "zod";
+import { captureException } from "@sentry/deno";
+import type { AppRouteHandler } from "@/lib/types.ts";
+import type { ImagineDataRoute } from "@/routes/ai/spell/spell.routes.ts";
+
+export const ImagineDataRequestSchema = z.object({
+  schema: z.record(
+    z
+      .string()
+      .or(
+        z.number().or(z.boolean().or(z.array(z.any()).or(z.record(z.any())))),
+      ),
+  )
+    .describe("JSON schema format to conform to")
+    .openapi({
+      example: {
+        title: { type: "string" },
+        url: { type: "string" },
+      },
+    }),
+  model: z.string().default("claude-3-7-sonnet").describe(
+    "The LLM to use for data generation",
+  ).openapi({ example: "claude-3-7-sonnet" }),
+  prompt: z.string().optional().describe(
+    "Guide data generation with a prompt",
+  ).openapi({ example: "Make it about cats" }),
+  options: z
+    .object({
+      many: z.boolean().default(false).describe(
+        "Whether to generate multiple results",
+      ),
+    })
+    .optional(),
+});
+
+export const ImagineDataResponseSchema = z.object({
+  result: z.union([z.record(z.any()), z.array(z.record(z.any()))]),
+  metadata: z.object({
+    processingTime: z.number(),
+  }),
+});
+
+export type ImagineDataRequest = z.infer<typeof ImagineDataRequestSchema>;
+export type ImagineDataResponse = z.infer<typeof ImagineDataResponseSchema>;
+
+export async function processSchema(
+  body: ImagineDataRequest,
+  logger: Logger,
+  startTime: number,
+): Promise<ImagineDataResponse> {
+  logger.info(
+    { schema: body.schema, options: body.options },
+    "Starting schema processing request",
+  );
+
+  logger.debug("Constructing prompt with reassembled examples");
+  const prompt = constructSchemaPrompt(
+    body.schema,
+    body.prompt,
+    body?.options?.many,
+  );
+
+  logger.info({ prompt }, "Sending request to LLM");
+  const llmStartTime = performance.now();
+  const llmResponse = await generateText({
+    model: "claude-3-7-sonnet",
+    system: body?.options?.many
+      ? "Generate realistic example data that fits the provided schema. Return valid JSON array with multiple objects. Each object must match the schema exactly and respect all descriptions and constraints."
+      : "Generate realistic example data that fits the provided schema. Return a valid JSON object that matches the schema exactly and respects all descriptions and constraints.",
+    stream: false,
+    messages: [{ role: "user", content: prompt }],
+  });
+  logger.info(
+    { llmTime: Math.round(performance.now() - llmStartTime) },
+    "Received LLM response",
+  );
+
+  let result: Record<string, unknown> | Array<Record<string, unknown>>;
+
+  try {
+    logger.debug("Parsing LLM response");
+    result = extractJSON(llmResponse);
+    logger.debug({ extractedJSON: result }, "Extracted JSON from response");
+
+    if (body?.options?.many && !Array.isArray(result)) {
+      logger.debug("Converting single object to array for many=true");
+      result = [result];
+    }
+    logger.info(
+      {
+        resultType: body?.options?.many ? "array" : "object",
+        resultSize: body?.options?.many ? (result as Array<unknown>).length : 1,
+      },
+      "Successfully parsed LLM response",
+    );
+  } catch (error) {
+    logger.error(
+      { error, response: llmResponse },
+      "Failed to parse LLM response",
+    );
+    throw new Error("Failed to parse LLM response as JSON");
+  }
+
+  const totalTime = Math.round(performance.now() - startTime);
+  logger.info(
+    { totalTime },
+    "Completed schema processing request",
+  );
+
+  return {
+    result,
+    metadata: {
+      processingTime: totalTime,
+    },
+  };
+}
+
+function constructSchemaPrompt(
+  schema: Record<string, unknown>,
+  userPrompt?: string,
+  many?: boolean,
+): string {
+  const schemaStr = JSON.stringify(schema, null, 2);
+
+  return `# TASK
+  ${
+    many
+      ? `Generate multiple objects that fit the requested schema based on the references provided.`
+      : `Fit data into the requested schema based on the references provided.`
+  }
+
+# SCHEMA
+${schemaStr}
+
+# INSTRUCTIONS
+1. ${
+    many
+      ? `Generate an array of objects that strictly follow the schema structure`
+      : `Generate an object that strictly follows the schema structure`
+  }
+2. Return ONLY valid JSON ${many ? "array" : "object"} matching the schema
+
+${userPrompt ? `# ADDITIONAL REQUIREMENTS\n${userPrompt}\n\n` : ""}
+
+# RESPONSE FORMAT
+Respond with ${
+    many ? "an array of valid JSON objects" : "a single valid JSON object"
+  }.`;
+}
+
+export const imagine: AppRouteHandler<ImagineDataRoute> = async (c) => {
+  const logger: Logger = c.get("logger");
+  const body = (await c.req.json()) as ImagineDataRequest;
+  const startTime = performance.now();
+
+  try {
+    const response = await processSchema(body, logger, startTime);
+
+    logger.info(
+      { processingTime: response.metadata.processingTime },
+      "Request completed",
+    );
+    return c.json(response, HttpStatusCodes.OK);
+  } catch (error) {
+    logger.error({ error }, "Error processing schema");
+    captureException(error);
+    return c.json(
+      { error: "Failed to process schema" },
+      HttpStatusCodes.INTERNAL_SERVER_ERROR,
+    );
+  }
+};