Skip to content

Commit

Permalink
cleaned up pinecone integration
Browse files Browse the repository at this point in the history
  • Loading branch information
mbilokonsky authored and kodiakhq[bot] committed Mar 17, 2024
1 parent 8ca62e0 commit 1d4d68f
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 8 deletions.
19 changes: 18 additions & 1 deletion apps/course-builder-web/src/utils/pinecone.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,24 @@
import { Pinecone } from '@pinecone-database/pinecone'
import { CreateIndexOptions, Index, Pinecone } from '@pinecone-database/pinecone'
import { PineconeConflictError } from '@pinecone-database/pinecone/dist/errors'

const pc = new Pinecone()

export async function get_or_create_index(opts: CreateIndexOptions): Promise<Index> {
try {
await pc.createIndex(opts)
} catch (e) {
console.log(
'Error creating new index with name ' +
opts.name +
'. This is expected if the index already exists. The error is as follows: ' +
(e as Error).message,
)
}

const index: Index = await pc.index(opts.name)
return index
}

export async function get_index(name: string) {
return await pc.index(name)
}
24 changes: 17 additions & 7 deletions apps/course-builder-web/src/utils/vector-utils/concepts.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { PineconeRecord } from '@pinecone-database/pinecone'
import { Index, Pinecone, PineconeRecord } from '@pinecone-database/pinecone'

import { get_embedding } from '../openai'
import { get_index } from '../pinecone'
import { get_or_create_index } from '../pinecone'

export type Concent = {
id: string
Expand All @@ -12,21 +12,32 @@ export type Concent = {
}
}

const index: Index = await get_or_create_index({
name: 'concepts',
dimension: 1536,
metric: 'cosine',
spec: {
serverless: {
cloud: 'aws',
region: 'us-west-2',
},
},
})

// ok, we are now using cosine similarity to identify concepts that are within .2 of the provided term. We will want to tune this figure to get a cutoff that is broad enough to capture all possible synonyms but narrow enough to filter out unrelated stuff.
export async function get_related_concepts(text: string) {
const embedding = (await get_embedding(text)).embedding
if (!embedding) return Promise.resolve([])

const index = await get_index('concepts')
const synonyms = await index.query({ vector: embedding, topK: 5, includeMetadata: true, includeValues: true })
return synonyms
const synonyms = await index.query({ vector: embedding, topK: 5, includeMetadata: true })
return synonyms.matches.filter((record) => record.score && record.score < 0.2)
}

export async function add_concept(text: string) {
const embedding = (await get_embedding(text)).embedding

if (!embedding) throw new Error('Unable to create embedding for concept: ' + text)

const index = await get_index('concepts')
const new_record: PineconeRecord = {
id: text,
values: embedding,
Expand All @@ -39,7 +50,6 @@ export async function add_concept(text: string) {
}

export async function add_alias_to_concept(concept_id: string, alias: string) {
const index = await get_index('concepts')
const fetch_results = await index.fetch([concept_id])

const concept = fetch_results.records[concept_id]
Expand Down

0 comments on commit 1d4d68f

Please sign in to comment.