Skip to content

Commit

Permalink
stopping: openai is censoring ocr-like uses
Browse files Browse the repository at this point in the history
  • Loading branch information
jokester committed Feb 20, 2024
1 parent 7c58030 commit 9858e3a
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 23 deletions.
2 changes: 2 additions & 0 deletions next/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
MY_HOST="example.com"
TZ=UTC
DISABLE_X_ROBOTS_TAG='NO'
GOOGLE_APPLICATION_CREDENTIALS='PATH'
OPENAI_API_KEY='sk_***'
1 change: 0 additions & 1 deletion next/next.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ const nextConf = {
*/
serverRuntimeConfig: {
serverStartAt: new Date().toISOString(),
openaiApiKey: `TODO`,
projectRoot: ___dirname,
},
/**
Expand Down
1 change: 1 addition & 0 deletions next/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-use": "^17.5.0",
"really-relaxed-json": "^0.3.2",
"superjson": "^2.2.1",
"zod": "^3.22"
},
Expand Down
49 changes: 32 additions & 17 deletions next/server/api/moeflow-router.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ import fsp from 'node:fs/promises';
import { TRPCError } from '@trpc/server';
import { serverRuntimeConfig } from '../runtime-config';
import openai from 'openai';
import { withRetry } from '@jokester/ts-commonutil/lib/util/with-retry';
// @ts-ignore
import { toJson } from 'really-relaxed-json';

const publicDir = path.join(serverRuntimeConfig.projectRoot, 'public');

Expand Down Expand Up @@ -45,15 +48,16 @@ function orgText(
async function aiRebuild(annotations: OcrResult): Promise<{ x: number; y: number; text: string }[]> {
const blocks = orgText(annotations);
const prompt = `
これは漫画ページをOCRにかけたものから、吹き出しごとに文言を抽出するための処理です。
OCRで抽出されたテキストは以下の形式に従います: (x座標, y座標): {OCRで抽出されたテキスト} 。入力テキストは ### の後に続くものとします
これは漫画ページから、吹き出しごとに文言を抽出するための処理です。
入力テキストは以下の形式に従います: (x座標, y座標): {テキスト}
漫画の文字方向と配置を考慮しつつ、テキストの内容と座標を確認しながら、吹き出しの境界を検出し、吹き出しごとのテキストを抽出します。抽出されたテキストは以下のJSON配列で返してください:
{
"x": number, // 吹き出しのx座標
"y": number, // 吹き出しのy座標
"text": string // 吹き出しの内容
}
入力テキストは ### の後に続くものとします。
###
${blocks.map((b) => `(${b.leftTop.x}, ${b.leftTop.y}): {${b.text}}`).join('\n')}
Expand All @@ -74,8 +78,15 @@ ${blocks.map((b) => `(${b.leftTop.x}, ${b.leftTop.y}): {${b.text}}`).join('\n')}
});

debugLogger('completion', completion);

return JSON.parse(completion.choices[0]?.message!.content!);
const shouldBeJson = `[` + completion.choices[0]?.message?.content + `]`;

debugLogger('shouldBeJson', shouldBeJson);
const jsonized = JSON.parse(toJson(shouldBeJson));
debugLogger('toJson(shouldBeJson)', jsonized);
if (!Array.isArray(jsonized)) {
throw new Error(`fail early`);
}
return jsonized;
}

async function openaiTranslate(texts: string[]): Promise<string[]> {
Expand Down Expand Up @@ -138,22 +149,26 @@ export const moeflowRouter = t.router({
}),

extractText: t.procedure.input(z.object({ imgBytes: z.string() })).mutation(async ({ input }) => {
const fullpath = path.join(publicDir, input.file);
if (!fullpath.startsWith(publicDir)) {
throw new TRPCError({ message: 'Invalid directory', code: 'FORBIDDEN' });
}
const ocrTextResult = await ocrText(Buffer.from(input.imgBytes, 'base64'));

const bytes = await fsp.readFile(fullpath);
const ocrTextResult = await ocrText(bytes);

const rebuilt = await aiRebuild(ocrTextResult);
const translated = await openaiTranslate(rebuilt.map((b) => b.text));
const ocrTextResult = await withRetry(() => ocrText(Buffer.from(input.imgBytes, 'base64')));
const rebuilt = await withRetry(() => aiRebuild(ocrTextResult), {
maxAttempts: 10,
shouldBreak(error: unknown, tried: number): boolean | PromiseLike<boolean> {
console.error('aiRebuild failed', error, tried);
return false;
},
});
debugLogger('rebuilt', typeof rebuilt, rebuilt);
const translated = await withRetry(() => openaiTranslate(rebuilt.map((b) => b.text)), {
maxAttempts: 10,
shouldBreak(error: unknown, tried: number): boolean | PromiseLike<boolean> {
console.error('openaiTranslate failed', error, tried);
return false;
},
});
return {
...ocrTextResult,
blocks: orgText(ocrTextResult),
rebuilt,
translated,
translated: rebuilt.map((b, i) => ({ ...b, translated: translated[i] ?? '????' })),
};
}),

Expand Down
24 changes: 20 additions & 4 deletions next/src/moeflow-auto-translate/manga-translator.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,26 @@
import { LPFile } from './moeflow-packager';
import { trpcClient, trpcClient$ } from '../api/trpc-client';
import { MoeflowImageFile } from './moeflow-packager';
import { trpcClient$ } from '../api/trpc-client';
import { measureImgSize } from '@jokester/ts-commonutil/lib/frontend/measure-img';

export async function translate(imgFile: File): Promise<LPFile> {
export async function translateImgFile(imgFile: File): Promise<MoeflowImageFile> {
const imgBytes = await toBase64(imgFile);
const text = await trpcClient$.moeflow.extractText.mutate({
imgBytes: await toBase64(imgFile),
imgBytes,
});
const dimension = await measureImgSize(imgFile);

return {
lp: {
file_name: imgFile.name,
labels: text.translated.map((t) => ({
x: t.x / dimension.width,
y: t.y / dimension.height,
position_type: 1,
translation: `${t.text} // ${t.translated}`,
})),
},
image: imgFile,
};
}

function toBase64(file: File): Promise<string> {
Expand Down
19 changes: 18 additions & 1 deletion next/src/moeflow-auto-translate/moetrans.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import { wait } from '@jokester/ts-commonutil/lib/concurrency/timing';
import clsx from 'clsx';
import { useCounter } from '@chakra-ui/react';
import { createMoeflowProjectZip } from './moeflow-packager';
import { ResourcePool } from '@jokester/ts-commonutil/lib/concurrency/resource-pool';
import { translateImgFile } from './manga-translator';
import { TRPCError } from '@trpc/server';

export function FilePicker(props: { disabled?: boolean; onFilesLoaded?(files: File[]): void }) {
return (
Expand All @@ -16,6 +19,10 @@ export function FilePicker(props: { disabled?: boolean; onFilesLoaded?(files: Fi
onChange={(ev) => {
const input = ev.target as HTMLInputElement;
if (input.files?.length) {
if (input.files.length >= 5) {
alert('一次最多只能上传5张图片');
return;
}
props.onFilesLoaded?.(Array.from(input.files));
}
}}
Expand Down Expand Up @@ -87,6 +94,16 @@ export function Downloader({ built }: { built?: File }) {

export async function build(files: File[]): Promise<File> {
const filename = `moeflow-project-${Date.now()}-${files[0]!.name}.zip`;

const throttler = ResourcePool.multiple([1, 2, 3]);

const translated = await Promise.all(files.map((f) => throttler.use(() => translateImgFile(f)))).catch((e) => {
if (e instanceof TRPCError) {
alert(`翻译失败: ${e.code} ${e.message}`);
}
throw e;
});

const bytes = await createMoeflowProjectZip(
{
name: `${files[0]!.name}`,
Expand All @@ -98,7 +115,7 @@ export async function build(files: File[]): Promise<File> {
source_language: 'ja',
output_language: 'zh-TW',
},
files.map((f) => ({ lp: { file_name: f.name, labels: [] }, image: f })),
translated,
);
return new File([bytes], filename);
}
Expand Down
6 changes: 6 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9858e3a

Please sign in to comment.