Skip to content

Commit

Permalink
feat(toolbox): 语素分析, close #240
Browse files Browse the repository at this point in the history
  • Loading branch information
mark9804 committed Jun 22, 2024
1 parent 53f5d22 commit 7d9daa2
Show file tree
Hide file tree
Showing 14 changed files with 949 additions and 361 deletions.
18 changes: 18 additions & 0 deletions apps/ba-online-toolbox/mock/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { MockMethod } from "vite-plugin-mock";
export default [
{
url: "/v1/messages",
method: "post",
response: () => {
return {
role: "assistant",
content: [
{
type: "text",
text: '[{"word":"青空","furigana":"あおぞら","basic_form":"青空","word_type":"名詞","word_sub_type":"普通名詞","conjungation_type":"*","conjungation_form":"*"},{"word":"に","furigana":"に","basic_form":"に","word_type":"助詞","word_sub_type":"格助詞","conjungation_type":"*","conjungation_form":"*"},{"word":"\\n","furigana":"","basic_form":"","word_type":"未定義語","word_sub_type":"その他","conjungation_type":"*","conjungation_form":"*"},{"word":"たくさん","furigana":"たくさん","basic_form":"たくさん","word_type":"副詞","word_sub_type":"一般","conjungation_type":"*","conjungation_form":"*"},{"word":"の","furigana":"の","basic_form":"の","word_type":"助詞","word_sub_type":"連体化","conjungation_type":"*","conjungation_form":"*"},{"word":"気球","furigana":"ききゅう","basic_form":"気球","word_type":"名詞","word_sub_type":"普通名詞","conjungation_type":"*","conjungation_form":"*"},{"word":"が","furigana":"が","basic_form":"が","word_type":"助詞","word_sub_type":"格助詞","conjungation_type":"*","conjungation_form":"*"},{"word":"\\n","furigana":"","basic_form":"","word_type":"未定義語","word_sub_type":"その他","conjungation_type":"*","conjungation_form":"*"},{"word":"浮か","furigana":"うか","basic_form":"浮く","word_type":"動詞","word_sub_type":"一般","conjungation_type":"五段・カ行イ音便","conjungation_form":"連用形"},{"word":"んで","furigana":"んで","basic_form":"ぬ","word_type":"助動詞","word_sub_type":"*","conjungation_type":"助動詞-ぬ","conjungation_form":"連用形"},{"word":"い","furigana":"い","basic_form":"いる","word_type":"動詞","word_sub_type":"非自立可能","conjungation_type":"上一段-ア行","conjungation_form":"連用形"},{"word":"た","furigana":"た","basic_form":"た","word_type":"助動詞","word_sub_type":"*","conjungation_type":"助動詞-タ","conjungation_form":"基本形"}]',
},
],
};
},
},
] as MockMethod[];
10 changes: 8 additions & 2 deletions apps/ba-online-toolbox/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,18 @@
"js-yaml": "^4.1.0",
"pinia": "^2.1.7",
"pinia-plugin-persistedstate": "^3.2.1",
"vue": "^3.3.7",
"vue": "^3.4.29",
"vue-router": "~4.3.0",
"mitt": "^3.0.0",
"cross-env": "^7.0.3",
"unocss": "^0.59.0-beta.1",
"@arco-design/web-vue": "~2.55.2"
"@arco-design/web-vue": "~2.55.2",
"xxhashjs": "^0.2.2",
"@types/xxhashjs": "^0.2.2",
"uuid": "~9.0.0",
"radash": "~12.1.0",
"mockjs": "~1.1.0",
"vite-plugin-mock": "~3.0.2"
},
"devDependencies": {
"taze": "^0.13.8",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
<script setup lang="ts">
import { AnthropicStatusCode } from "../types/Semantic";
import { useTranslationStore } from "../store/translationStore";
import { computed, watch } from "vue";
import { useClipboard } from "@vueuse/core";
import { parseSemantics } from "../../public/helper/AnthropicSemanticParser";
import { ElMessage } from "element-plus";
import SemanticUnit from "./SemanticUnit.vue";
const props = defineProps<{
text: string | undefined;
preferSemantic: boolean;
selectLine: number;
}>();
const useTranslationCache = useTranslationStore();
function searchSemanticDict() {
return useTranslationCache.getSemantic(props.text);
}
async function getSemantics() {
if (!props.text || !props.preferSemantic) {
return;
}
if (searchSemanticDict()) {
return searchSemanticDict();
}
return await parseSemantics(props.text).then(res => {
if (!res) {
ElMessage({
message: "语义解析无结果,请告诉开发帕鲁哪句出了问题",
type: "error",
});
return;
}
if (res.status !== AnthropicStatusCode.NORMAL) {
ElMessage({
message: res.message,
type: "error",
});
}
// @ts-ignore
useTranslationCache.setSemantic(props.text, res.tokens);
});
}
watch(
() => [props.text, props.preferSemantic],
() => {
if (props.preferSemantic) {
getSemantics();
}
}
);
const semantics = computed(() => {
const semanticUnits = useTranslationCache.getSemantic(props.text);
if (!semanticUnits) {
return;
}
const indexOfAllCRLF = semanticUnits.reduce((acc, el, index) => {
if (el.word === "\n") {
acc.push(index);
}
return acc;
}, [] as number[]);
const result = [];
indexOfAllCRLF.forEach((el, index) => {
if (index === 0) {
result.push(semanticUnits.slice(0, el));
} else {
result.push(semanticUnits.slice(indexOfAllCRLF[index - 1] + 1, el));
}
});
result.push(
semanticUnits.slice(indexOfAllCRLF[indexOfAllCRLF.length - 1] + 1)
);
return result;
});
const { copy } = useClipboard({ legacy: true });
const html = computed(() => {
if (-1 === props.selectLine) {
return "请选择一行";
}
if (!props.text) {
return "该语言暂无翻译";
}
return props.text.replace("\n", "<br>");
});
</script>

<template>
<div
class="p-4 pt-2 pb-2 rounded-sm border-solid border-1 border-[var(--color-arona-blue)] overflow-y-scroll select-auto"
>
<div class="select-text" v-if="!props.preferSemantic || !searchSemanticDict()" v-html="html" />
<div v-else class="flex flex-col gap-1">
<div v-for="semanticLine in semantics" class="flex">
<semantic-unit
v-for="(semantic, index) in semanticLine"
:key="index"
:semantic="semantic"
/>
</div>
</div>
</div>
</template>

<style scoped lang="scss"></style>
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<script setup lang="ts">
import { SemanticUnit } from "../types/Semantic";
import { computed } from "vue";
const props = defineProps<{ semantic: SemanticUnit }>();
const shouldShowKana = computed(() => {
return props.semantic.furigana !== props.semantic.word;
});
</script>

<template>
<div class="flex flex-col justify-end items-center">
<span class="text-[8px]" v-if="shouldShowKana">{{ semantic.furigana }}</span>
<n-tooltip placement="bottom">
<template #trigger>
<n-tag size="medium">{{ semantic.word }}</n-tag>
</template>
<div>基本形表記: {{ semantic.basic_form }}</div>
<div>品詞: {{ semantic.word_type }}</div>
<div>品詞細分類: {{ semantic.word_sub_type }}</div>
<div>活用型: {{ semantic.conjungation_type }}</div>
<div>活用形: {{ semantic.conjungation_form }}</div>
</n-tooltip>
</div>
</template>

<style scoped lang="scss">
</style>
Original file line number Diff line number Diff line change
Expand Up @@ -10,38 +10,32 @@
:key="lang"
:value="lang"
@click="config.setLanguage(lang as Language)"
>{{ langHash[lang as Language] }}</n-radio
>{{
langHash[lang as "TextJp" | "TextEn" | "TextTw" | "ScriptKr"]
}}</n-radio
>
</n-radio-group>
</span>
<n-switch
@update:value="handleShowAllLanguageChange"
:value="config.getShowAllLanguage"
>
<template #checked> 所有语言 </template>
<template #unchecked> 当前语言 </template>
</n-switch>
</div>
<div class="referLang">
<n-input
type="textarea"
class="h-[80px] w-full"
:value="
config.getSelectLine !== -1
? mainStore.getScenario.content[config.getSelectLine][
config.getLanguage
]?.replaceAll('#n', '\n')
: '请选择一行'
"
:placeholder="
config.getSelectLine !== -1
? mainStore.getScenario.content[config.getSelectLine][
config.getLanguage
] || '该语言暂无翻译'
: ''
"
></n-input>
<n-space>
<n-checkbox
:checked="config.getSemanticPreference"
@update:checked="updateParseSemanticPref"
size="large"
label="解析语素"
/>
<n-checkbox
:checked="config.getShowAllLanguage"
@update:checked="handleShowAllLanguageChange"
size="large"
label="显示所有语言"
/>
</n-space>
</div>
<original-text-disp
:text="mainStore.getScenario.content[config.getSelectLine]?.TextJp"
:prefer-semantic="config.getSemanticPreference"
:select-line="config.getSelectLine"
/>
<div class="flex content-between gap-4">
<n-space vertical>
<n-space>
Expand Down Expand Up @@ -203,7 +197,7 @@ import {
getClaudeTranslation,
} from "../../public/helper/AnthropicTranslationService";
import { transformStudentName } from "../../public/helper/transformStudentName";
import { el } from "date-fns/locale";
import OriginalTextDisp from "./OriginalTextDisp.vue";
const config = useGlobalConfig();
const mainStore = useScenarioStore();
Expand All @@ -226,6 +220,10 @@ const langSelect = [
{ label: "泰语", key: "TextTh" },
];
function updateParseSemanticPref(value: boolean) {
config.setSemanticPreference(value);
}
const currentText = computed(() => {
return mainStore.getScenario.content[config.getSelectLine]?.[
config.getLanguage
Expand All @@ -235,24 +233,7 @@ const currentText = computed(() => {
const translateHandle = (force = false) => {
if (!force && config.getTmpMachineTranslate(currentText.value)) return;
if (config.getSelectLine !== -1) {
const text = currentText.value
?.replaceAll("#n", "[#n]")
?.replaceAll(/\[.*?\]/g, "");
handleLLMTranslateRequest(0);
// translate(
// text,
// translateHash[config.getLanguage],
// translateHash[config.getTargetLang]
// )
// .then(res => {
// config.setTmpMachineTranslate(
// currentText.value,
// halfToFull((res.translation || [])[0] ?? "")
// );
// })
// .catch(err => {
// console.log(err);
// });
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export const useGlobalConfig = defineStore({
selectTag: "[wa:]",
previewMode: false,
students: [] as Student[],
preferSemanticsView: true,
}),
getters: {
isProofread: state => state.proofread,
Expand Down Expand Up @@ -44,6 +45,7 @@ export const useGlobalConfig = defineStore({
getShowAllLanguage: state => state.showAllLanguage,
getPreviewMode: state => state.previewMode,
getStudentList: state => state.students,
getSemanticPreference: state => state.preferSemanticsView ?? false,
},
actions: {
startProofread() {
Expand Down Expand Up @@ -106,5 +108,8 @@ export const useGlobalConfig = defineStore({
setStudents(students: Student[]) {
this.students = students;
},
setSemanticPreference(preference: boolean) {
this.preferSemanticsView = preference;
},
},
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import { defineStore } from "pinia";
import { SemanticUnit } from "../types/Semantic";
import xxhash from "xxhashjs"; //xxhash.h32(string, seed).toNumber()

export interface Translation {
[key: number]: {
machineTranslation: string;
semantic: SemanticUnit[];
};
}

function calcHash(text: string) {
return xxhash.h32(text, 0).toNumber();
}

export const useTranslationStore = defineStore({
id: "translationStore",
// @ts-ignore
persist: true,
state: () => ({
translations: {} as Translation,
}),
getters: {
getFullTranslation: state => (text: string) => {
const hash = calcHash(text);
return state.translations[hash] || undefined;
},
getMachineTranslation: state => (text: string) => {
const hash = calcHash(text);
return state.translations[hash]?.machineTranslation || "";
},
getSemantic: state => (text: string | undefined) => {
if (!text) return;
const hash = calcHash(text);
const result = state.translations[hash]?.semantic;
return Array.isArray(result) && result.length > 0 ? result : undefined;
},
},
actions: {
setTranslation(
text: string,
machineTranslation: string,
semantic: SemanticUnit[]
) {
const hash = calcHash(text);
this.translations[hash] = {
machineTranslation,
semantic,
};
},
setMachineTranslation(text: string, machineTranslation: string) {
const hash = calcHash(text);
this.translations[hash] = {
...this.translations[hash],
machineTranslation,
};
},
setSemantic(text: string | undefined, semantic: SemanticUnit[]) {
if (!text) return;
const hash = calcHash(text);
this.translations[hash] = {
...this.translations[hash],
semantic,
};
},
},
});
Loading

0 comments on commit 7d9daa2

Please sign in to comment.