In [1]:
import { pipeline, env } from "https://cdn.jsdelivr.net/npm/@xenova/transformers";
import { QdrantClient } from "npm:@qdrant/qdrant-js";

env.backends.onnx.wasm.numThreads = 1;

[33m1[39m

In [2]:
const embedder = await pipeline("feature-extraction", "Xenova/all-MiniLM-L6-v2")

In [68]:
const embedderOptions = { pooling: "mean", normalize: true}

In [5]:
const client = new QdrantClient({ host: "localhost", port: 6333})

In [6]:
const issues = JSON.parse(Deno.readTextFileSync("api/issues-cache.json"))

In [14]:
const vectors = []

In [16]:
for (const issue of issues) {
    const em = await embedder(issue.title + ": " + issue.body, {
      pooling: "mean",
      normalize: true,
    });
    vectors.push({
      id: issue.id,
      vector: em.tolist()[0],
      payload: {
        issue: {
          url: issue.html_url,
          title: issue.title,
          body: issue.body,
        },
      },
    });
}

[33m2447[39m

In [67]:
const logs = await (await fetch("http://localhost:8788/v0/logs")).json();
const jsxSampleError = logs.logs[1].message.stack

In [74]:
vectors[0].vector.length

[33m384[39m

In [86]:
const jsxSampleErrorEmbedding = (await embedder(jsxSampleError, embedderOptions)).tolist().flat()

# Now we're trying it out with Orama in-memory/disk persistence

In [59]:
import { create, insertMultiple, search } from 'npm:@orama/orama'

In [75]:
const db = await create({
  schema: {
    body: 'string',        // To make it simple, let's pretend that
    embedding: 'vector[384]', // we are using a 5-dimensional vector.
  }
})

In [83]:
const dataset = vectors.map(v=>{
    return {
        body: v.payload.issue.title + ": " + v.payload.issue.body ,
        embedding: v.vector
    }
});

In [85]:
await insertMultiple(db, dataset)

[
  [32m"95064623-1"[39m,  [32m"95064623-2"[39m,  [32m"95064623-3"[39m,  [32m"95064623-4"[39m,  [32m"95064623-5"[39m,
  [32m"95064623-6"[39m,  [32m"95064623-7"[39m,  [32m"95064623-8"[39m,  [32m"95064623-9"[39m,  [32m"95064623-10"[39m,
  [32m"95064623-11"[39m, [32m"95064623-12"[39m, [32m"95064623-13"[39m, [32m"95064623-14"[39m, [32m"95064623-15"[39m,
  [32m"95064623-16"[39m, [32m"95064623-17"[39m, [32m"95064623-18"[39m, [32m"95064623-19"[39m, [32m"95064623-20"[39m,
  [32m"95064623-21"[39m, [32m"95064623-22"[39m, [32m"95064623-23"[39m, [32m"95064623-24"[39m, [32m"95064623-25"[39m,
  [32m"95064623-26"[39m, [32m"95064623-27"[39m, [32m"95064623-28"[39m, [32m"95064623-29"[39m, [32m"95064623-30"[39m,
  [32m"95064623-31"[39m, [32m"95064623-32"[39m, [32m"95064623-33"[39m, [32m"95064623-34"[39m, [32m"95064623-35"[39m,
  [32m"95064623-36"[39m, [32m"95064623-37"[39m, [32m"95064623-38"[39m, [32m"95064623-39"[39m, [32m"95

In [89]:
const results = await search(db, {
  mode: 'vector',
  vector: {
    value: jsxSampleErrorEmbedding,
    property: 'embedding',
  },
  similarity: 0.6,      // Minimum similarity. Defaults to `0.8`
  includeVectors: true,  // Defaults to `false`
  limit: 33,             // Defaults to `10`
  offset: 0,             // Defaults to `0`
})

In [90]:
results

{
  count: [33m3[39m,
  hits: [
    {
      id: [32m"95064623-445"[39m,
      score: [33m0.6830262192378667[39m,
      document: {
        body: [32m"html renderer `isEscaped` TypeError: ### What version of Hono are you using?\n"[39m +
          [32m"\n"[39m +
          [32m"4.0.1\n"[39m +
          [32m"\n"[39m +
          [32m"### What runtim"[39m... 1831 more characters,
        embedding: [
          [33m-0.02282983809709549[39m,
          [33m-0.02159351482987404[39m,
          [33m0.0423736609518528[39m,
          [33m-0.03101913444697857[39m,
          [33m-0.04926181584596634[39m,
          [33m-0.05285801738500595[39m,
          [33m-0.03062395565211773[39m,
          [33m0.03202451020479202[39m,
          [33m-0.0281803198158741[39m,
          [33m0.011611485853791237[39m,
          [33m-0.00021630954870488495[39m,
          [33m-0.060184579342603683[39m,
          [33m-0.06411124765872955[39m,
          [33m0.00017331457638647407[3