forked from mhujer/ankiai
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Updated to provide better examples for words able to be used as mulit…
…ple different parts of speech.
- Loading branch information
Showing
6 changed files
with
126 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,29 @@ | ||
import type { NoteForProcessing } from '../anki'; | ||
import type { PartsOfSpeech } from './typechat-response-parts-of-speech-schema'; | ||
|
||
export const generatePrompt = (vocabulary: NoteForProcessing[]): string => { | ||
return vocabulary.map((note) => `${note.noteId}: ${note.text}`).join('\n'); | ||
export const generateBasicVocabularyPromptSegment = (vocabulary: NoteForProcessing[]): string => { | ||
return vocabulary | ||
.map((note) => `${note.noteId}: ${note.text}`) | ||
.join('\n'); | ||
}; | ||
|
||
const partsOfSpeechLookup = (noteId: number, partsOfSpeech: PartsOfSpeech[]): string => { | ||
const partOfSpeech = partsOfSpeech.find( | ||
(partsOfSpeech): boolean | undefined => noteId === partsOfSpeech.id | ||
) | ||
|
||
if (partOfSpeech === undefined) { | ||
throw Error(`Part of speech not found for ${noteId}`) | ||
} | ||
|
||
return partOfSpeech.partsOfSpeech | ||
} | ||
|
||
export const generatePartsOfSpeechVocabularyPromptSegment = ( | ||
vocabulary: NoteForProcessing[], | ||
partsOfSpeech: PartsOfSpeech[] | ||
): string => { | ||
return vocabulary | ||
.map((note) => `${note.noteId}: ${note.text} (${partsOfSpeechLookup(note.noteId, partsOfSpeech)})`) | ||
.join('\n') | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,41 +1,78 @@ | ||
/* eslint-disable @typescript-eslint/no-unused-vars */ | ||
import type { NoteForProcessing } from '../anki'; | ||
import { generatePrompt } from './generate-prompt'; | ||
import { generateBasicVocabularyPromptSegment, generatePartsOfSpeechVocabularyPromptSegment } from './generate-prompt'; | ||
import { createJsonTranslator, createLanguageModel } from 'typechat'; | ||
import { createTypeScriptJsonValidator } from "typechat/ts"; | ||
import fs from 'fs'; | ||
import path from 'path'; | ||
import type { VocabularyExamples, AllVocabularyExamples } from './typechat-response-schema'; | ||
import type { AllVocabularyExamples, VocabularyExamples } from './typechat-response-schema'; | ||
import type { AllPartsOfSpeech } from './typechat-response-parts-of-speech-schema'; | ||
|
||
export const fetchExamples = async (language: string, vocabulary: NoteForProcessing[]): Promise<VocabularyExamples[]> => { | ||
if (vocabulary.length === 0) { | ||
throw new Error('No vocabulary passed!'); | ||
} | ||
|
||
const model = createLanguageModel(process.env); | ||
const partsOfSpeechModel = createLanguageModel(process.env); | ||
// @todo path is from `build/openai/` | ||
const schema = fs.readFileSync(path.join(__dirname, '../../src/openai/typechat-response-schema.ts'), 'utf8'); | ||
const partsOfSpeechSchema = fs.readFileSync(path.join(__dirname, '../../src/openai/typechat-response-parts-of-speech-schema.ts'), 'utf8'); | ||
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-call | ||
const validator = createTypeScriptJsonValidator<AllVocabularyExamples>(schema, "AllVocabularyExamples"); | ||
const partsOfSpeechValidator = createTypeScriptJsonValidator<AllPartsOfSpeech>(partsOfSpeechSchema, "AllPartsOfSpeech"); | ||
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument | ||
const translator = createJsonTranslator(model, validator); | ||
const partsOfSpeechTranslator = createJsonTranslator(partsOfSpeechModel, partsOfSpeechValidator); | ||
|
||
const vocabularyPrompt = generatePrompt(vocabulary); | ||
console.log({ vocabularyPrompt}); | ||
const vocabularyPrompt = generateBasicVocabularyPromptSegment(vocabulary); | ||
|
||
const prompt = | ||
'You are a helpful vocabulary learning assistant who helps user generate example sentences in ' + | ||
language + | ||
' for language learning. I will provide each word prefixed by ID and you will generate two example sentences for each input. The sentences should be complex enough for HSK4 or higher level.\n' + | ||
vocabularyPrompt; | ||
// Splitting | ||
const partsOfSpeechPrompt = `You are a helpful vocabulary learning assistant who helps users generate example sentences in Mandarin for language learning. You understand that in Mandarin, words can serve different parts of speech depending on context, for example: nouns, pronouns, verbs, adjectives, adverbs, number words, measure words, prepositions, conjunctions, interjections, onomatopoeias, particles, function words, verbal measure words, auxiliary verbs. | ||
const response = await translator.translate(prompt); | ||
I will provide each word prefixed by ID. For each word, please find the possible usages for the words. Return the parts of speech as a single comma separated string. | ||
Here is the list of words prefixed by ID: | ||
${vocabularyPrompt} | ||
For each word in the provided list, define all possible parts of speech based on context. If a word can have multiple parts of speech, list all possible parts of speech for that word explicitly.`; | ||
|
||
console.log(partsOfSpeechPrompt) | ||
|
||
const response = await partsOfSpeechTranslator.translate(partsOfSpeechPrompt); | ||
if (!response.success) { | ||
console.dir({ response }); | ||
throw new Error('Error fetching data from chatGPT: ' + response.message); | ||
} | ||
const partsOfSpeech = response.data.partsOfSpeech | ||
|
||
console.dir(partsOfSpeech) | ||
|
||
const exampleSentencesModel = createLanguageModel(process.env); | ||
const exampleSentencesSchema = fs.readFileSync(path.join(__dirname, '../../src/openai/typechat-response-schema.ts'), 'utf8'); | ||
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-call | ||
const exampleSentencesValidator = createTypeScriptJsonValidator<AllVocabularyExamples>(exampleSentencesSchema, "AllVocabularyExamples"); | ||
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument | ||
const exampleSentencesTranslator = createJsonTranslator(exampleSentencesModel, exampleSentencesValidator); | ||
|
||
const exampleSentencesPrompt = `Now that you have defined the possible parts of speech for each word, please generate one example sentence per part of speech for each word. The sentences should be medium or longer length and complexity of HSK5 or higher. Each possible part of speech must have at least one sentence generated for it. | ||
Here is the list of words prefixed by ID: | ||
${generatePartsOfSpeechVocabularyPromptSegment(vocabulary, partsOfSpeech)} | ||
For each word in the provided list, generate example sentences for each defined part of speech. Wrap the ${language} sentences in <div class="char_example"></div>. ` | ||
|
||
/** | ||
* Also generate the English translation, wrapped in <div class="char_example_English"></div>. At the end of each English translation include the part of speech in brackets. | ||
Return the examples and translations as a flat list of strings, with the the example and translation in pairs. | ||
*/ | ||
|
||
console.log(exampleSentencesPrompt) | ||
|
||
const exampleSentencesResponse = await exampleSentencesTranslator.translate(exampleSentencesPrompt); | ||
if (!exampleSentencesResponse.success) { | ||
console.dir({ response }); | ||
throw new Error('Error fetching data from chatGPT: ' + exampleSentencesResponse.message); | ||
} else { | ||
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment | ||
console.dir({ items: response.data.items }); | ||
return (response.data.items as VocabularyExamples[]); | ||
return exampleSentencesResponse.data.items; | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
export interface AllPartsOfSpeech { | ||
partsOfSpeech: PartsOfSpeech[]; | ||
} | ||
|
||
export interface PartsOfSpeech { | ||
id: number; | ||
partsOfSpeech: string; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters