Skip to content

Commit

Permalink
Merge pull request #321 from cofacts/ocr-confidence
Browse files Browse the repository at this point in the history
Apply OCR confidence threshold
  • Loading branch information
MrOrz committed Oct 18, 2023
2 parents 0263d29 + feef609 commit e6bde99
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 2 deletions.
3 changes: 2 additions & 1 deletion src/graphql/__tests__/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ if (process.env.GCS_BUCKET_NAME) {
與自律神經。
藉著汗,氣化熱消耗熱量,能夠提升代謝力,
不但減少體脂肪,還有助於消除肥胖。
可以先從關掉冷氣做起",
可以先從關掉冷氣做起
",
"type": "TRANSCRIPT",
"userId": "user-id",
}
Expand Down
57 changes: 56 additions & 1 deletion src/graphql/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,46 @@ export function createAIResponse({ user, ...loadingResponseBody }) {
}

const imageAnnotator = new ImageAnnotatorClient();
const OCR_CONFIDENCE_THRESHOLD = 0.75;

/**
* @param {ITextAnnotation} fullTextAnnotation - The fullTextAnnotation returned by client.documentTextDetection
* @returns {string} The extracted text that is comprised of paragraphs passing OCR_CONFIDENCE_THRESHOLD
*/
function extractTextFromFullTextAnnotation(fullTextAnnotation) {
const {
pages: [{ blocks }],
} = fullTextAnnotation;

// Hierarchy described in https://cloud.google.com/vision/docs/fulltext-annotations#annotating_an_image_using_document_text_ocr
//
return blocks
.flatMap(({ paragraphs }) =>
paragraphs
.filter(({ confidence }) => confidence >= OCR_CONFIDENCE_THRESHOLD)
.flatMap(({ words }) =>
words.flatMap(({ symbols }) =>
symbols.map(({ text, property }) => {
if (!property || !property.detectedBreak) return text;

// Word break type described in
// http://googleapis.github.io/googleapis/java/grpc-google-cloud-vision-v1/0.1.5/apidocs/com/google/cloud/vision/v1/TextAnnotation.DetectedBreak.BreakType.html#UNKNOWN
const breakStr = [
'EOL_SURE_SPACE',
'HYPHEN',
'LINE_BREAK',
].includes(property.detectedBreak.type)
? '\n'
: ' ';
return property.detectedBreak.isPrefix
? `${breakStr}${text}`
: `${text}${breakStr}`;
})
)
)
)
.join('');
}

/**
* @param {object} queryInfo - contains type and media entry ID of contents after fileUrl
Expand All @@ -713,10 +753,25 @@ export async function createTranscript(queryInfo, fileUrl, user) {
] = await imageAnnotator.documentTextDetection(fileUrl);

console.log('[createTranscript]', queryInfo.id, fullTextAnnotation);

// This should not happen, but just in case
//
if (
!fullTextAnnotation ||
!fullTextAnnotation.pages ||
fullTextAnnotation.pages.length === 0
) {
return update({
status: 'SUCCESS',
// No text detected
text: '',
});
}

return update({
status: 'SUCCESS',
// Write '' if no text detected
text: fullTextAnnotation?.text ?? '',
text: extractTextFromFullTextAnnotation(fullTextAnnotation),
});
}

Expand Down

0 comments on commit e6bde99

Please sign in to comment.