Skip to content

Commit

Permalink
tools, refactor: optimize document indexing and disable useless fragm…
Browse files Browse the repository at this point in the history
…ents.
  • Loading branch information
xicilion committed Apr 2, 2023
1 parent a80336f commit 8b4869f
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 34 deletions.
50 changes: 29 additions & 21 deletions tools/ask.js
Expand Up @@ -5,45 +5,53 @@ const ChatGTP = require('./util/chatgpt');
const chatgpt = new ChatGTP(process.env.OPENAI_API_KEY);

const conn = db.open('sqlite:./temp/docs.db');
const prompt = `You are a fibjs development assistant, please answer the question strictly based on the following information in the most detail , do not refer to the nodejs documentation and source code`;
const prompt = `You are a fibjs development assistant, please answer the questions and explain in detail strictly based on the following information, and be sure not to refer to the nodejs documentation and source code`;
const modules = `fibjs has the following modules built in: ${util.buildInfo().modules.join(',')}`;

while (true) {
var question = console.readLine("Ask a question: ");
var ask_embedding = chatgpt.get_embedding(question);

var content = [];
var content_tokens = 0;
var contents = conn.execute(`SELECT docs.id, docs.text, docs.total_tokens, distance FROM doc_index, docs WHERE vec_search(doc_index.vec, "${JSON.stringify(ask_embedding.data[0].embedding)}:50") AND docs.rowid = doc_index.rowid ORDER BY distance`);
console.log('top distance:', contents[0].distance);

var res = conn.execute(`SELECT docs.text, docs.total_tokens, distance FROM doc_index, docs WHERE vec_search(doc_index.vec, "${JSON.stringify(ask_embedding.data[0].embedding)}:50") AND docs.rowid = doc_index.rowid ORDER BY distance`);
console.log('top distance:', res[0].distance);
for (var i = 0; i < res.length; i++) {
if (content_tokens < 2000) {
content.push(res[i].text);
content_tokens += res[i].total_tokens;
}
var content_tokens = 0;
for (var i = 0; i < contents.length; i++) {
if (content_tokens < 2000)
content_tokens += contents[i].total_tokens;
else
break;
}

var answer = chatgpt.chat([
contents = contents.slice(0, i);

var messages = [
{
role: 'system',
content: prompt
},
{
role: 'assistant',
role: 'system',
content: modules
},
{
role: 'assistant',
content: content.join('\n')
},
{
role: 'user',
content: question
}
]);
];

contents.forEach((content) => {
// console.notice(`id: ${content.id} distance: ${content.distance}`);
// console.log('content:', content.text);
messages.push({
role: 'system',
content: content.text
});
});

messages.push({
role: 'user',
content: question
});

var answer = chatgpt.chat(messages);

console.warn("=================================================================")
console.log('answer:', answer);
}
28 changes: 15 additions & 13 deletions tools/gen_index.js
Expand Up @@ -29,22 +29,24 @@ res.forEach(element => {
var ids = new Set();

nodes.forEach(function (node) {
var id = hash.md5(node).digest('hex');
if (node.length > 32) {
var id = hash.md5(node).digest('hex');

if (!ids.has(id)) {
ids.add(id);
if (!ids.has(id)) {
ids.add(id);

if (docs[id] === undefined) {
console.log('inserting', id);
var res = chatgpt.get_embedding(node);
conn.execute('INSERT INTO docs (id, text, embedding, total_tokens) VALUES (?, ?, ?, ?)', id, node,
JSON.stringify(res.data[0].embedding), res.usage.total_tokens);
} else if (docs[id] === 1) {
console.log('updating', id);
conn.execute('UPDATE docs SET deleted = 0 WHERE id = ?', id);
}
if (docs[id] === undefined) {
console.log('inserting', id);
var res = chatgpt.get_embedding(node);
conn.execute('INSERT INTO docs (id, text, embedding, total_tokens) VALUES (?, ?, ?, ?)', id, node,
JSON.stringify(res.data[0].embedding), res.usage.total_tokens);
} else if (docs[id] === 1) {
console.log('updating', id);
conn.execute('UPDATE docs SET deleted = 0 WHERE id = ?', id);
}

delete docs[id];
delete docs[id];
}
}
});

Expand Down

0 comments on commit 8b4869f

Please sign in to comment.