Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
334a6fd
feat: added posthog likn
Jul 25, 2025
bbaff96
Merge remote-tracking branch 'origin/main' into generators-yml-links
Jul 25, 2025
127bcfd
feat: added posthog likn
Jul 25, 2025
1164aa4
feat: updating readme
Jul 26, 2025
4641ec8
Merge remote-tracking branch 'origin/main' into generators-yml-links
Jul 28, 2025
9917caa
fix: merged with main
Jul 28, 2025
917aa50
fix: adding changelog button
Jul 28, 2025
f2a87b0
fix: adding changelog button
Jul 28, 2025
47c6bbd
fix: added changelog icon
Jul 28, 2025
0cd1fa7
fix: added changelog icon
Jul 28, 2025
e48260b
fix: added changelog icon
Jul 28, 2025
0b3a249
fix: added changelog icon
Jul 28, 2025
d1b0f8f
fix: remvoed ts svgs
Jul 28, 2025
b0bf978
feat: added scribe issue templte
Jul 28, 2025
48c8434
feat: added scribe issue templte
Jul 28, 2025
2218e7f
feat: added all scribe scripts
Jul 28, 2025
3c0e949
feat: added turbopuffer support
Jul 29, 2025
a9e7c03
feat: added query to turbopuffer and slack normalization
Jul 29, 2025
de5a071
feat: added query to turbopuffer and slack normalization
Jul 29, 2025
c5e4526
feat: adding console logs
Jul 29, 2025
4fa8983
feat: updated fern scribe script
Jul 29, 2025
fac278e
feat: merged
Jul 29, 2025
a612432
feat: merged
Jul 29, 2025
acd7ba4
feat: merged
Jul 29, 2025
b95deff
feat: merged
Jul 29, 2025
6292d08
Merge remote-tracking branch 'origin/main' into scribe-issue-turbopuffer
Jul 29, 2025
53ecf0a
feat: added turbopuffer sdk
Jul 29, 2025
79da328
Merge remote-tracking branch 'origin/main' into scribe-issue-turbopuffer
Jul 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 96 additions & 37 deletions .github/scripts/fern-scribe.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
const { Octokit } = require('@octokit/rest');
const Turbopuffer = require('@turbopuffer/turbopuffer').default;
const fs = require('fs').promises;
const path = require('path');

class FernScribe {
constructor() {
this.octokit = new Octokit({ auth: process.env.GITHUB_TOKEN });
this.turbopufferEndpoint = process.env.TURBOPUFFER_ENDPOINT;
this.turbopufferApiKey = process.env.TURBOPUFFER_API_KEY;
this.turbopuffer = new Turbopuffer({
apiKey: process.env.TURBOPUFFER_API_KEY,
region: "gcp-us-east4",
});
this.anthropicApiKey = process.env.ANTHROPIC_API_KEY;
this.slackToken = process.env.SLACK_USER_TOKEN;

Expand Down Expand Up @@ -303,6 +306,39 @@ class FernScribe {
}
}

reciprocalRankFusion(semanticResults, bm25Results) {
const k = 60; // RRF constant
const combinedScores = new Map();

// Add semantic results with RRF scoring
semanticResults.forEach((result, index) => {
const score = 1 / (k + index + 1);
const id = result.id;
if (id) {
combinedScores.set(id, { result, score });
}
});

// Add BM25 results with RRF scoring
bm25Results.forEach((result, index) => {
const score = 1 / (k + index + 1);
const id = result.id;
if (id) {
const existing = combinedScores.get(id);
if (existing) {
existing.score += score;
} else {
combinedScores.set(id, { result, score });
}
}
});

// Sort by combined score and return results
return Array.from(combinedScores.values())
.sort((a, b) => b.score - a.score)
.map(item => item.result);
}

async queryTurbopuffer(query, opts = {}) {
if (!query || query.trimStart().length === 0) {
console.log('🔧 Empty query provided to Turbopuffer');
Expand All @@ -312,46 +348,69 @@ class FernScribe {
try {
console.log('🔧 Querying Turbopuffer with options:', JSON.stringify(opts, null, 2));

const {
namespace,
topK = 10,
mode = "hybrid",
documentIdsToIgnore = [],
urlsToIgnore = []
} = opts;

const ns = this.turbopuffer.namespace(namespace);

// Create embedding for the query
const embeddingResponse = await this.createEmbedding(query);
if (!embeddingResponse) {
const vector = await this.createEmbedding(query);
if (!vector) {
console.error('🔧 Failed to create embedding for query');
return [];
}

const requestBody = {
query_embedding: embeddingResponse,
top_k: opts.topK || 10,
namespace: opts.namespace,
...(opts.documentIdsToIgnore && { document_ids_to_ignore: opts.documentIdsToIgnore }),
...(opts.urlsToIgnore && { urls_to_ignore: opts.urlsToIgnore })
};

console.log('🔧 Turbopuffer request body (without embedding):', {
...requestBody,
query_embedding: `[${embeddingResponse.length} dimensions]`
});

const response = await fetch(this.turbopufferEndpoint, {
method: 'POST',
headers: {
'Authorization': `Bearer ${this.turbopufferApiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify(requestBody)
});

if (!response.ok) {
const errorText = await response.text();
console.error('🔧 Turbopuffer API error details:', errorText);
throw new Error(`Turbopuffer API error: ${response.status}`);
}

const data = await response.json();
console.log('🔧 Turbopuffer response structure:', Object.keys(data));
console.log('🔧 Turbopuffer results count:', data.results?.length || 0);
// Build filters
const documentIdFilters = documentIdsToIgnore.map((id) => ["id", "NotEq", id]);
const urlFilters = urlsToIgnore.map((url) => ["url", "NotEq", url]);

const allFilters = [...documentIdFilters, ...urlFilters];
const queryFilters = allFilters.length > 0
? (allFilters.length === 1 ? allFilters[0] : ["And", allFilters])
: undefined;

console.log('🔧 Turbopuffer query filters:', queryFilters);

// Semantic search (vector similarity)
const semanticResponse = mode !== "bm25" ? await ns.query({
rank_by: ["vector", "ANN", vector],
top_k: topK,
include_attributes: true,
filters: queryFilters,
}) : { rows: [] };

// BM25 search (keyword matching) - search across multiple text fields
const bm25Response = mode !== "semantic" && query.length < 1024 ? await ns.query({
rank_by: [
"Sum",
[
["chunk", "BM25", query],
["title", "BM25", query],
["keywords", "BM25", query],
],
],
top_k: topK,
include_attributes: true,
filters: queryFilters,
}) : { rows: [] };

const semanticResults = semanticResponse.rows || [];
const bm25Results = bm25Response.rows || [];

console.log('🔧 Semantic results count:', semanticResults.length);
console.log('🔧 BM25 results count:', bm25Results.length);

// Combine results using reciprocal rank fusion
const fusedResults = this.reciprocalRankFusion(semanticResults, bm25Results);

console.log('🔧 Fused results count:', fusedResults.length);

return data.results || [];
return fusedResults;
} catch (error) {
console.error('🔧 Turbopuffer query failed:', error);
return [];
Expand All @@ -370,7 +429,7 @@ class FernScribe {
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'text-embedding-3-small',
model: 'text-embedding-3-large',
input: text
})
});
Expand Down
3 changes: 2 additions & 1 deletion .github/scripts/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
"main": "fern-scribe.js",
"dependencies": {
"@octokit/rest": "^20.0.2",
"@turbopuffer/turbopuffer": "^0.10.14",
"node-fetch": "^3.3.2"
},
"engines": {
"node": ">=18"
}
}
}