forked from mhujer/ankiai
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Further fixes and rewrites to make it more reliable
- Loading branch information
Showing
10 changed files
with
235 additions
and
298 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,118 +1,170 @@ | ||
// import { NoteForProcessing, fetchNotesFromAnki, updateNote } from './anki'; | ||
import * as dotenv from 'dotenv'; | ||
import { fetchExamples } from './openai/get-sentences-from-chatgpt'; | ||
// import sleep from './utils/sleep'; | ||
// import { removeSentencesWithoutCharacter } from './utils/remove-sentences-without-character'; | ||
require('axios-debug-log/enable'); | ||
|
||
// const MAX_NOTES_PROCESSED_AT_ONCE = 10; | ||
// const MAX_NOTES_PROCESSED_AT_ONE_RUN = 250; | ||
|
||
// let notesProcessedCount = 0; | ||
|
||
// const processNote = async (ankiLanguage: string, noteForProcessing: NoteForProcessing) => { | ||
// const examplesFromChatGPT = await fetchExamples(ankiLanguage, noteForProcessing); | ||
// // console.log(examplesFromChatGPT) | ||
|
||
// for (const vocabularyExample of examplesFromChatGPT) { | ||
// const noteId = vocabularyExample.id; | ||
// const examples = vocabularyExample.exampleSentences; | ||
|
||
// // Remove sentences that do not contain the target word | ||
// const filteredExamples = removeSentencesWithoutCharacter(examples, noteForProcessing.text) | ||
// if (examples.length !== filteredExamples.length) { | ||
// for (const example of examples) { | ||
// if (!example.includes(noteForProcessing.text)) { | ||
// console.error(`\n\n\n${noteForProcessing.text} is not present in ${example}\n\n\n`) | ||
// } | ||
// } | ||
// } | ||
|
||
// // console.log(`Updating Anki note ${noteForProcessing.text} (${noteId}) with:\n${examples.join('\n')}\n`); | ||
|
||
// const fieldText = filteredExamples | ||
// .map((example) => `<div class="char_example">${example}</div>`) | ||
// .join(''); | ||
|
||
// const noteForAnki = { | ||
// id: +noteId, | ||
// fields: { | ||
// Examples: fieldText, | ||
// }, | ||
// }; | ||
|
||
// await updateNote(noteForAnki); | ||
// await sleep(100); | ||
// } | ||
// } | ||
import { fetchExample } from './openai/get-sentences-from-chatgpt'; | ||
import { logger } from './utils/logger'; | ||
import { NoteForProcessing, fetchNotesFromAnki, updateNote } from './anki'; | ||
import sleep from './utils/sleep'; | ||
import { removeSentencesWithoutCharacter } from './utils/remove-sentences-without-character'; | ||
import type { VocabularyExamples } from './openai/typechat-response-schema'; | ||
|
||
const MAX_NOTES_PROCESSED_AT_ONCE = 5; | ||
const MAX_NOTES_PROCESSED_AT_ONE_RUN = 400; | ||
|
||
let notesProcessedCount = 0; | ||
|
||
// Sometimes ChatGPT fails repeatedly with a word, these have to be added manually | ||
const skipList: string[] = [ | ||
// Not yet manually recorded | ||
// "园地", | ||
// "无论如何", | ||
// "豪华", | ||
// "人体", | ||
// "堕落", | ||
// "刹车", | ||
// "巡逻", | ||
// "煎", | ||
// "召集", | ||
// "纯洁", | ||
// "休想", | ||
]; | ||
|
||
function capitaliseFirstLetter(str: string): string { | ||
return str.charAt(0).toUpperCase() + str.slice(1); | ||
} | ||
|
||
const formatExamplesForAnki = (examples: VocabularyExamples[], note: NoteForProcessing): string[] => { | ||
const ankiExamples: string[] = []; | ||
for (const example of examples) { | ||
// Remove sentences that do not contain the target word | ||
const filteredExampleSentences = removeSentencesWithoutCharacter(example.exampleSentences, note.text); | ||
if (example.exampleSentences.length !== filteredExampleSentences.length) { | ||
for (const exampleSentence of example.exampleSentences) { | ||
if (!example.exampleSentences.includes(note.text)) { | ||
console.error(`${note.text} is not present in ${exampleSentence}.`); | ||
console.error( | ||
`Adding ${filteredExampleSentences.length} examples to card, filtered from ${example.exampleSentences.length}.`, | ||
); | ||
} | ||
} | ||
} | ||
|
||
// Only add the part of speech section if there are examples | ||
if (filteredExampleSentences.length > 0) { | ||
ankiExamples.push( | ||
`<div class="char_examples-parts-of-speech">${capitaliseFirstLetter(example.partsOfSpeech)}</div>`, | ||
); | ||
for (const exampleSentence of filteredExampleSentences) { | ||
ankiExamples.push(`<div class="char_example">${exampleSentence}</div>`); | ||
} | ||
} | ||
} | ||
|
||
return ankiExamples; | ||
}; | ||
|
||
const processNote = async (ankiLanguage: string, noteForProcessing: NoteForProcessing) => { | ||
if (skipList.includes(noteForProcessing.text)) { | ||
logger.info(`${noteForProcessing.text} is in the skip list, skipping`); | ||
return; | ||
} | ||
|
||
const examplesFromChatGPT = await fetchExample(ankiLanguage, noteForProcessing); | ||
|
||
const fieldText = formatExamplesForAnki(examplesFromChatGPT, noteForProcessing).join(''); | ||
|
||
const noteId = noteForProcessing.noteId; | ||
const noteForAnki = { | ||
id: +noteId, | ||
fields: { | ||
Examples: fieldText, | ||
}, | ||
}; | ||
|
||
// logger.verbose(noteForAnki); | ||
|
||
await updateNote(noteForAnki); | ||
return await sleep(100); | ||
}; | ||
|
||
function removeElements(sourceList: NoteForProcessing[], elementsToRemove: string[]): NoteForProcessing[] { | ||
// Create a set from elementsToRemove for faster lookup | ||
const elementsToRemoveSet = new Set(elementsToRemove); | ||
|
||
// Filter elements from sourceList that are not in elementsToRemoveSet | ||
return sourceList.filter((item) => !elementsToRemoveSet.has(item.text)); | ||
} | ||
|
||
// const testApiWithoutUpdating = async () => { | ||
// logger.info( | ||
// await Promise.all([ | ||
// fetchExample('Mandarin', { | ||
// noteId: 1, | ||
// text: '阳性', | ||
// definitions: 'none', | ||
// }), | ||
// // fetchExample('Mandarin', { | ||
// // noteId: 2, | ||
// // text: '不', | ||
// // definitions: 'none', | ||
// // }), | ||
// // fetchExample('Mandarin', { | ||
// // noteId: 3, | ||
// // text: '就', | ||
// // definitions: 'none', | ||
// // }), | ||
// // fetchExample('Mandarin', { | ||
// // noteId: 4, | ||
// // text: '下', | ||
// // definitions: 'none', | ||
// // }), | ||
// // fetchExample('Mandarin', { | ||
// // noteId: 5, | ||
// // text: '又', | ||
// // definitions: 'none', | ||
// // }), | ||
// // fetchExample('Mandarin', { | ||
// // noteId: 6, | ||
// // text: '喂', | ||
// // definitions: 'none', | ||
// // }), | ||
// ]), | ||
// ); | ||
// return; | ||
// }; | ||
|
||
void (async function () { | ||
dotenv.config({ path: '.env.local' }); | ||
console.log( | ||
await Promise.all([ | ||
fetchExamples('Mandarin', { | ||
noteId: 1, | ||
text: '得', | ||
definitions: 'none', | ||
}), | ||
fetchExamples('Mandarin', { | ||
noteId: 2, | ||
text: '不', | ||
definitions: 'none', | ||
}), | ||
// fetchExamples('Mandarin', { | ||
// noteId: 3, | ||
// text: '就', | ||
// definitions: 'none', | ||
// }), | ||
// fetchExamples('Mandarin', { | ||
// noteId: 4, | ||
// text: '下', | ||
// definitions: 'none', | ||
// }), | ||
fetchExamples('Mandarin', { | ||
noteId: 5, | ||
text: '又', | ||
definitions: 'none', | ||
}), | ||
// fetchExamples('Mandarin', { | ||
// noteId: 6, | ||
// text: '喂', | ||
// definitions: 'none', | ||
// }), | ||
]), | ||
); | ||
return; | ||
// const ankiDeck = process.env['ANKI_DECK']; | ||
// if (ankiDeck === undefined) { | ||
// throw new Error('ANKI_DECK is not configured in env!'); | ||
// } | ||
|
||
// const ankiLanguage = process.env['ANKI_LANGUAGE']; | ||
// if (ankiLanguage === undefined) { | ||
// throw new Error('ANKI_LANGUAGE is not configured in env!'); | ||
// } | ||
|
||
// console.log(`Processing cards from Anki deck "${ankiDeck}" in language "${ankiLanguage}"...`); | ||
|
||
// // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition,no-constant-condition | ||
// while (true) { | ||
// const notes = await fetchNotesFromAnki(ankiDeck); | ||
// console.log(`Found ${notes.length} notes eligible for fetching!`); | ||
// if (notes.length === 0) { | ||
// break; | ||
// } | ||
|
||
// const notesForProcessing = notes.slice(0, MAX_NOTES_PROCESSED_AT_ONCE); | ||
// console.log(`Processing batch of ${notesForProcessing.length} notes.`); | ||
|
||
// await Promise.all(notesForProcessing.map((note) => processNote(ankiDeck, note))); | ||
|
||
// notesProcessedCount += notesForProcessing.length; | ||
// if (notesProcessedCount >= MAX_NOTES_PROCESSED_AT_ONE_RUN) { | ||
// console.log(`Processed ${notesProcessedCount} notes, quiting.`); | ||
// console.dir(notes.map((note) => note.text).join(', ')); | ||
// break; | ||
// } | ||
// } | ||
// return testApiWithoutUpdating(); | ||
|
||
const ankiDeck = process.env['ANKI_DECK']; | ||
if (ankiDeck === undefined) { | ||
throw new Error('ANKI_DECK is not configured in env'); | ||
} | ||
|
||
const ankiLanguage = process.env['ANKI_LANGUAGE']; | ||
if (ankiLanguage === undefined) { | ||
throw new Error('ANKI_LANGUAGE is not configured in env'); | ||
} | ||
|
||
logger.info(`Processing cards from Anki deck "${ankiDeck}" in language "${ankiLanguage}"...`); | ||
|
||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition,no-constant-condition | ||
while (true) { | ||
const notes = removeElements(await fetchNotesFromAnki(ankiDeck), skipList); | ||
logger.info(`Found ${notes.length} notes eligible for fetching`); | ||
if (notes.length === 0) { | ||
break; | ||
} | ||
|
||
const notesForProcessing = notes.slice(0, MAX_NOTES_PROCESSED_AT_ONCE); | ||
logger.info(`Processing batch of ${notesForProcessing.length} notes.`); | ||
|
||
await Promise.all(notesForProcessing.map((note) => processNote(ankiDeck, note))); | ||
|
||
notesProcessedCount += notesForProcessing.length; | ||
if (notesProcessedCount >= MAX_NOTES_PROCESSED_AT_ONE_RUN) { | ||
logger.info(`Processed ${notesProcessedCount} notes, quiting.`); | ||
console.dir(notes.map((note) => note.text).join(', ')); | ||
break; | ||
} | ||
} | ||
})(); |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.