In [26]:
import { create, insertMultiple, insert, search } from 'npm:@orama/orama'
import { restoreFromFile, persistToFile } from 'npm:@orama/plugin-data-persistence/server'
import { pipeline, env } from "https://cdn.jsdelivr.net/npm/@xenova/transformers";
env.backends.onnx.wasm.numThreads = 1;

const embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2")

const embedderOptions = { pooling: "mean", normalize: true}

[33m1[39m

In [2]:
const issues = JSON.parse(Deno.readTextFileSync("../api/issues-cache.json"))

In [15]:
const vectorDB = await create({
    schema: {
        title: "string",
        body: "string",
        url: "string",
        id: "string",
        vector: "vector[384]",
    }
})

In [10]:
const vectors = []

for (const issue of issues) {
    const em = await embedder(issue.title + ": " + issue.body, {
      pooling: "mean",
      normalize: true,
    });
    vectors.push({
        id: issue.id,
        vector: em,
        url: issue.html_url,
        title: issue.title,
        body: issue.body,
    });
}

[33m2447[39m

In [22]:
const vecsToList = vectors.map(v=>{
    return {
        vector: v.vector.tolist().flat(),
        id: v.id.toString(),
        title: v.title,
        body: v?.body ?? "",
        url: v?.url ?? ""
    }
})

In [19]:
const vecsFloats = vectors.map(v=>{
    return {
        vector: v.vector.data,
        id: v.id.toString(),
        title: v.title,
        body: v?.body ?? "",
        url: v?.url ?? ""
    }
})

In [24]:
await insertMultiple(vectorDB, vecsToList)

Error: A document with id "2318612341" already exists.

In [28]:
await persistToFile(vectorDB,"binary", "../api/issues-embeddings.msp")

[32m"../api/issues-embeddings.msp"[39m

In [13]:
const logs = await (await fetch("http://localhost:8788/v0/logs")).json();

const jsxSampleError = logs.logs[2].message.message
jsxSampleError

[32m"process is not defined"[39m

In [14]:
const jsxSampleErrorEmbedding = (await embedder(jsxSampleError, embedderOptions)).data

In [None]:
vectorDB

{
  data: {
    index: {
      sharedInternalDocumentStore: {
        idToInternalId: Map(2447) {
          [32m"77922575-1"[39m => [33m1[39m,
          [32m"77922575-2"[39m => [33m2[39m,
          [32m"77922575-3"[39m => [33m3[39m,
          [32m"77922575-4"[39m => [33m4[39m,
          [32m"77922575-5"[39m => [33m5[39m,
          [32m"77922575-6"[39m => [33m6[39m,
          [32m"77922575-7"[39m => [33m7[39m,
          [32m"77922575-8"[39m => [33m8[39m,
          [32m"77922575-9"[39m => [33m9[39m,
          [32m"77922575-10"[39m => [33m10[39m,
          [32m"77922575-11"[39m => [33m11[39m,
          [32m"77922575-12"[39m => [33m12[39m,
          [32m"77922575-13"[39m => [33m13[39m,
          [32m"77922575-14"[39m => [33m14[39m,
          [32m"77922575-15"[39m => [33m15[39m,
          [32m"77922575-16"[39m => [33m16[39m,
          [32m"77922575-17"[39m => [33m17[39m,
          [32m"77922575-18"[39m => [33m18[39m,
  

In [26]:
const results = await search(vectorDB, {
  mode: 'vector',
  vector: {
    value: jsxSampleErrorEmbedding,
    property: 'embedding',
  },
  similarity: 0.3,      // Minimum similarity. Defaults to `0.8`
  limit: 3,             // Defaults to `10`
  offset: 0,             // Defaults to `0`
})

In [27]:
results

{
  count: [33m24[39m,
  hits: [
    {
      id: [32m"77922575-1763"[39m,
      score: [33m0.48561719339137566[39m,
      document: {
        embedding: [1mnull[22m,
        title: [32m"`c.runtime === 'node'` does not work well"[39m,
        url: [32m"https://github.com/honojs/hono/issues/685"[39m,
        body: [32m"`c.runtime === 'node'` does not work well. It is because the `global.process.title` is such as`/User"[39m... 97 more characters
      }
    },
    {
      id: [32m"77922575-456"[39m,
      score: [33m0.376411462347048[39m,
      document: {
        embedding: [1mnull[22m,
        title: [32m"Can't Run Cloudflare Pages Template On Fresh Initialization on Windows"[39m,
        url: [32m"https://github.com/honojs/hono/issues/2182"[39m,
        body: [32m"### What version of Hono are you using?\r\n"[39m +
          [32m"\r\n"[39m +
          [32m"4.0\r\n"[39m +
          [32m"\r\n"[39m +
          [32m"### What runtime/platform is your app runn