-
Notifications
You must be signed in to change notification settings - Fork 3
/
ingest.ts
39 lines (33 loc) · 1.06 KB
/
ingest.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import 'dotenv/config';
import { readFile } from 'node:fs/promises';
import { chunkDatastore, type Chunk } from './tools.js';
async function main() {
const chunks = await loadChunks();
const docs = chunks.map((chunk) => ({
id: chunk.chunkId,
metadata: chunk,
}));
console.log(`Upserting ${chunks.length} chunks...`);
const start = Date.now();
await chunkDatastore.upsert(docs);
const end = Date.now();
console.log(`Upserted ${chunks.length} chunks in ${end - start}ms`);
}
// From: https://huggingface.co/datasets/dexaai/huberman_on_exercise
async function loadChunks(): Promise<Chunk[]> {
const json = JSON.parse(
await readFile('./examples/chatbot/data.json', 'utf8')
);
const rows = json.rows.map((row: any) => row.row);
const chunks: Chunk[] = rows.map((row: any) => ({
chunkId: row.id,
chunkTitle: row.metadata.chunkTitle,
episodeTitle: row.metadata.episodeTitle,
imgUrl: row.metadata.imgUrl,
published: row.metadata.published,
url: row.metadata.url,
transcript: row.document,
}));
return chunks;
}
main();