From ecc6a5194a69318a64277383ee836a2c95dbffdd Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Fri, 5 Apr 2019 08:50:32 +0200 Subject: [PATCH 01/15] Added timestamp update via diff tool --- .../components/timed-text-editor/index.js | 13 +- packages/stt-adapters/index.js | 1 + .../TimedTextEditor/UpdateTimestamps.js | 154 ++++++++++++++++++ 3 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js diff --git a/packages/components/timed-text-editor/index.js b/packages/components/timed-text-editor/index.js index 48967ba2..f12ce092 100644 --- a/packages/components/timed-text-editor/index.js +++ b/packages/components/timed-text-editor/index.js @@ -20,6 +20,7 @@ import sttJsonAdapter from '../../stt-adapters'; // TODO: connect to local packages version import exportAdapter from '../../export-adapters'; // import exportAdapter from '../../Util/export-adapters/index.js'; +import updateTimestamps from './UpdateTimestamps'; import style from './index.module.css'; class TimedTextEditor extends React.Component { @@ -28,6 +29,7 @@ class TimedTextEditor extends React.Component { this.state = { editorState: EditorState.createEmpty(), + originalState: null, transcriptData: this.props.transcriptData, isEditable: this.props.isEditable, sttJsonType: this.props.sttJsonType, @@ -116,13 +118,22 @@ class TimedTextEditor extends React.Component { } } + updateTimestampsForEditorState() { + // Update timestamps according to the original state. + const currentContent = convertToRaw(this.state.editorState.getCurrentContent()); + const updatedContentRaw = updateTimestamps(currentContent, this.state.originalState); + const updatedContent = convertFromRaw(updatedContentRaw); + + this.setEditorNewContentState(updatedContent); + } + loadData() { if (this.props.transcriptData !== null) { const blocks = sttJsonAdapter(this.props.transcriptData, this.props.sttJsonType); + this.setState({ originalState: convertToRaw(convertFromRaw(blocks)) }); this.setEditorContentState(blocks); } } - getEditorContent(exportFormat) { const format = exportFormat || 'draftjs'; diff --git a/packages/stt-adapters/index.js b/packages/stt-adapters/index.js index 129c3878..703a7f74 100644 --- a/packages/stt-adapters/index.js +++ b/packages/stt-adapters/index.js @@ -48,3 +48,4 @@ const sttJsonAdapter = (transcriptData, sttJsonType) => { }; export default sttJsonAdapter; +export { createEntityMap }; \ No newline at end of file diff --git a/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js b/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js new file mode 100644 index 00000000..072d658e --- /dev/null +++ b/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js @@ -0,0 +1,154 @@ +import generateEntitiesRanges from '../../Util/adapters/generate-entities-ranges/index.js'; +import { createEntityMap } from '../../Util/adapters/index.js'; +import DiffMatchPatch from 'diff-match-patch'; + +const createEntity = (start, end, confidence, word, wordIdx) => { + return ({ + start: start, + end: end, + confidence: confidence, + word: word.toLowerCase().replace(/[.?!]/g, ''), + punct: word, + index: wordIdx, + }); +}; + +const realignTimestamps = (differences, currentContent, referenceContent) => { + + var diffIdx = 0; + var entityIdx = 0; + + const entities = referenceContent.entityMap; + const results = []; + + for (var blockIdx in currentContent.blocks) { + const block = currentContent.blocks[blockIdx]; + const words = block.text.match(/\S+/g) || []; + + var substitutionStart = null; + var wordMeta = null; + var entity = null; + + if (words.length > 0) { + var wordMetaArray = []; + var wordIdx = 0; + + while (wordIdx < words.length) { + const word = words[wordIdx]; + const diffType = differences[diffIdx]; + diffIdx++; + if (diffType === 'm' || diffType === 's') { + entity = entities[entityIdx].data; + wordMeta = createEntity(entity.start, entity.end, entity.confidence, word, wordIdx); + wordIdx++; + entityIdx++; + wordMetaArray.push(wordMeta); + } else if (diffType === 'ss') { + substitutionStart = entities[entityIdx].data.start; + entityIdx++; + } else if (diffType === 'se') { + wordMeta = createEntity(substitutionStart, entities[entityIdx].data.end, 0.0, word, wordIdx); + wordIdx++; + entityIdx++; + wordMetaArray.push(wordMeta); + } else if (diffType === 'd' || diffType == 'si') { + entityIdx++; + } else if (diffType === 'i') { + entity = entities[entityIdx].data; + wordMeta = createEntity(entity.start, entity.end, entity.confidence, word, wordIdx); + wordIdx++; + wordMetaArray.push(wordMeta); + } else { + console.log('Found illegal symbol ' + diffType); + wordIdx++; + } + } + + const updatedBlock = { + text: wordMetaArray.map((entry) => entry.punct).join(' '), + type: 'paragraph', + data: { + speaker: 'speaker', + words: wordMetaArray, + start: wordMetaArray[0].start + }, + entityRanges: generateEntitiesRanges(wordMetaArray, 'punct'), + }; + + results.push(updatedBlock); + } + } + + const updatedContent = { blocks: results, entityMap: createEntityMap(results) }; + + return updatedContent; +}; + + + +// https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs +const diffLineMode = (text1, text2) => { + var dmp = new DiffMatchPatch(); + var a = dmp.diff_linesToChars_(text1, text2); + var lineText1 = a.chars1; + var lineText2 = a.chars2; + var lineArray = a.lineArray; + var diffs = dmp.diff_main(lineText1, lineText2, false); + dmp.diff_charsToLines_(diffs, lineArray); + + return diffs; +}; + +const diff = (text1, text2) => { + + var diffArray = []; + var arrayIdx = 0; + + const lineModeDiff = diffLineMode(text2.join('\n') + '\n', text1.join('\n') + '\n'); + while (arrayIdx < lineModeDiff.length) { + const diffEntry = lineModeDiff[arrayIdx]; + const numberOfWords = (diffEntry[1].match(/\n/g) || []).length; + var symbol = '-'; + if (diffEntry[0] === 0) { + symbol = 'm'; + } else if (diffEntry[0] === 1) { + symbol = 'i'; + } else if (diffEntry[0] === -1) { + if (arrayIdx < lineModeDiff.length - 1 && lineModeDiff[arrayIdx + 1][0] === 1) { + symbol = 'ss'; + arrayIdx++; // Increase an additional time to skip insert/delete syntax for substitution + } else { + symbol = 'd'; + } + } + for (var i = 0; i < numberOfWords; i++) { + if (symbol === 'ss' && numberOfWords === 1) { + symbol = 's'; + } + diffArray.push(symbol); + if (symbol === 'ss') { + symbol = 'si'; + } + if (symbol === 'si' && i === (numberOfWords - 2)) { + symbol = 'se'; + } + }; + arrayIdx++; + } + + return diffArray; +}; + + + +const updateTimestamps = (currentContent, originalContent) => { + const currentText = convertContentToText(currentContent); + const originalText = convertContentToText(originalContent); + + const diffArray = diff(currentText, originalText); + const updatedContent = realignTimestamps(diffArray, currentContent, originalContent); + + return (updatedContent); +}; + +export default updateTimestamps; \ No newline at end of file From 8ce71247e830f0dff53e41740b9b96953ab31e3b Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Fri, 5 Apr 2019 09:25:59 +0200 Subject: [PATCH 02/15] Added missing function --- .../TimedTextEditor/UpdateTimestamps.js | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js b/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js index 072d658e..9f911eae 100644 --- a/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js +++ b/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js @@ -2,6 +2,18 @@ import generateEntitiesRanges from '../../Util/adapters/generate-entities-ranges import { createEntityMap } from '../../Util/adapters/index.js'; import DiffMatchPatch from 'diff-match-patch'; +const convertContentToText = (content) => { + var text = []; + + for (var blockIdx in content.blocks) { + const block = content.blocks[blockIdx]; + const blockArray = block.text.match(/\S+/g) || []; + text = text.concat(blockArray); + } + + return (text); +}; + const createEntity = (start, end, confidence, word, wordIdx) => { return ({ start: start, @@ -51,7 +63,7 @@ const realignTimestamps = (differences, currentContent, referenceContent) => { wordIdx++; entityIdx++; wordMetaArray.push(wordMeta); - } else if (diffType === 'd' || diffType == 'si') { + } else if (diffType === 'd' || diffType === 'si') { entityIdx++; } else if (diffType === 'i') { entity = entities[entityIdx].data; @@ -84,8 +96,6 @@ const realignTimestamps = (differences, currentContent, referenceContent) => { return updatedContent; }; - - // https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs const diffLineMode = (text1, text2) => { var dmp = new DiffMatchPatch(); @@ -139,8 +149,6 @@ const diff = (text1, text2) => { return diffArray; }; - - const updateTimestamps = (currentContent, originalContent) => { const currentText = convertContentToText(currentContent); const originalText = convertContentToText(originalContent); From 08723085e417e8c73e94cf06c515a89360cbf5bb Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Mon, 8 Apr 2019 10:33:19 +0200 Subject: [PATCH 03/15] Commited intermediate state --- .../components/timed-text-editor/index.js | 20 +++++++ .../TimedTextEditor/UpdateTimestamps.js | 52 ++++++++++++------- 2 files changed, 54 insertions(+), 18 deletions(-) diff --git a/packages/components/timed-text-editor/index.js b/packages/components/timed-text-editor/index.js index f12ce092..8530585c 100644 --- a/packages/components/timed-text-editor/index.js +++ b/packages/components/timed-text-editor/index.js @@ -114,6 +114,13 @@ class TimedTextEditor extends React.Component { this.saveTimer = setTimeout(() => { this.localSave(this.props.mediaUrl); }, 1000); + + if (this.timestampTimer !== undefined) { + clearTimeout(this.timestampTimer); + } + this.timestampTimer = setTimeout(() => { + this.updateTimestampsForEditorState(); + }, 5000); }); } } @@ -266,8 +273,21 @@ class TimedTextEditor extends React.Component { * Update Editor content state */ setEditorNewContentState = (newContentState) => { + var start = new Date().getTime(); + const newEditorState = EditorState.push(this.state.editorState, newContentState); + + var end = new Date().getTime(); + var time = end - start; + console.log('Execution time for Editor State push: ' + time); + + start = new Date().getTime(); + this.setState({ editorState: newEditorState }); + + end = new Date().getTime(); + time = end - start; + console.log('Execution time for Set State: ' + time); } /** diff --git a/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js b/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js index 9f911eae..15f5a1b6 100644 --- a/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js +++ b/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js @@ -80,7 +80,7 @@ const realignTimestamps = (differences, currentContent, referenceContent) => { text: wordMetaArray.map((entry) => entry.punct).join(' '), type: 'paragraph', data: { - speaker: 'speaker', + speaker: block.data.speaker, words: wordMetaArray, start: wordMetaArray[0].start }, @@ -118,41 +118,57 @@ const diff = (text1, text2) => { while (arrayIdx < lineModeDiff.length) { const diffEntry = lineModeDiff[arrayIdx]; const numberOfWords = (diffEntry[1].match(/\n/g) || []).length; - var symbol = '-'; if (diffEntry[0] === 0) { - symbol = 'm'; + for (var i = 0; i < numberOfWords; i++) { diffArray.push('m'); } } else if (diffEntry[0] === 1) { - symbol = 'i'; + for (var i = 0; i < numberOfWords; i++) { diffArray.push('i'); } } else if (diffEntry[0] === -1) { if (arrayIdx < lineModeDiff.length - 1 && lineModeDiff[arrayIdx + 1][0] === 1) { - symbol = 'ss'; + // The matching number of words is substituted + const numberOfWordsSub = (lineModeDiff[arrayIdx + 1][1].match(/\n/g) || []).length; + + for (var subItr = 0; subItr < Math.min(numberOfWords, numberOfWordsSub); subItr++) { diffArray.push('s'); } + for (var delItr = 0; delItr < numberOfWords - numberOfWordsSub; delItr++) { diffArray.push('d'); } + for (var insItr = 0; insItr < numberOfWordsSub - numberOfWords; insItr++) { diffArray.push('i'); } + + // if (numberOfWordsSub < numberOfWords) { + // diffArray.push('ss'); + // for (var i = 1; i < (numberOfWords - 1); i++) { diffArray.push('si'); } + // diffArray.push('se'); + // for (var i = 0; i < (numberOfWords - numberOfWordsSub); i++) { diffArray.push('i'); } + // } else { + // } + + // diffArray.push('ss'); + // for (var i = 1; i < (numberOfWords - 1); i++) { diffArray.push('si'); } + // diffArray.push('se'); + // if (numberOfWords > numberOfWordsSub) { + // // If there are more words in the original substring, delete the rest. + // for (var i = 0; i < (numberOfWords - numberOfWordsSub); i++) { diffArray.push('i'); } + // } arrayIdx++; // Increase an additional time to skip insert/delete syntax for substitution } else { - symbol = 'd'; + for (var i = 0; i < numberOfWords; i++) { diffArray.push('diff'); } } } - for (var i = 0; i < numberOfWords; i++) { - if (symbol === 'ss' && numberOfWords === 1) { - symbol = 's'; - } - diffArray.push(symbol); - if (symbol === 'ss') { - symbol = 'si'; - } - if (symbol === 'si' && i === (numberOfWords - 2)) { - symbol = 'se'; - } - }; arrayIdx++; } return diffArray; }; +const diffAndRealign = (currentText, originalText) => { + const lineModeDiff = diffLineMode(originalText.join('\n') + '\n', currentText.join('\n') + '\n'); + + return lineModeDiff; +}; + const updateTimestamps = (currentContent, originalContent) => { const currentText = convertContentToText(currentContent); const originalText = convertContentToText(originalContent); + // const updatedContent = diffAndRealign(currentText, originalText) + const diffArray = diff(currentText, originalText); const updatedContent = realignTimestamps(diffArray, currentContent, originalContent); From 2d1619baee15544fb6d138beaf4c58b26183ad03 Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Mon, 8 Apr 2019 12:26:45 +0200 Subject: [PATCH 04/15] Rewrote timestamp alignment and differ to be integrated in each other instead of doing a 2-step process. --- .../TimedTextEditor/UpdateTimestamps.js | 196 +++++++----------- 1 file changed, 72 insertions(+), 124 deletions(-) diff --git a/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js b/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js index 15f5a1b6..7f41beef 100644 --- a/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js +++ b/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js @@ -25,77 +25,6 @@ const createEntity = (start, end, confidence, word, wordIdx) => { }); }; -const realignTimestamps = (differences, currentContent, referenceContent) => { - - var diffIdx = 0; - var entityIdx = 0; - - const entities = referenceContent.entityMap; - const results = []; - - for (var blockIdx in currentContent.blocks) { - const block = currentContent.blocks[blockIdx]; - const words = block.text.match(/\S+/g) || []; - - var substitutionStart = null; - var wordMeta = null; - var entity = null; - - if (words.length > 0) { - var wordMetaArray = []; - var wordIdx = 0; - - while (wordIdx < words.length) { - const word = words[wordIdx]; - const diffType = differences[diffIdx]; - diffIdx++; - if (diffType === 'm' || diffType === 's') { - entity = entities[entityIdx].data; - wordMeta = createEntity(entity.start, entity.end, entity.confidence, word, wordIdx); - wordIdx++; - entityIdx++; - wordMetaArray.push(wordMeta); - } else if (diffType === 'ss') { - substitutionStart = entities[entityIdx].data.start; - entityIdx++; - } else if (diffType === 'se') { - wordMeta = createEntity(substitutionStart, entities[entityIdx].data.end, 0.0, word, wordIdx); - wordIdx++; - entityIdx++; - wordMetaArray.push(wordMeta); - } else if (diffType === 'd' || diffType === 'si') { - entityIdx++; - } else if (diffType === 'i') { - entity = entities[entityIdx].data; - wordMeta = createEntity(entity.start, entity.end, entity.confidence, word, wordIdx); - wordIdx++; - wordMetaArray.push(wordMeta); - } else { - console.log('Found illegal symbol ' + diffType); - wordIdx++; - } - } - - const updatedBlock = { - text: wordMetaArray.map((entry) => entry.punct).join(' '), - type: 'paragraph', - data: { - speaker: block.data.speaker, - words: wordMetaArray, - start: wordMetaArray[0].start - }, - entityRanges: generateEntitiesRanges(wordMetaArray, 'punct'), - }; - - results.push(updatedBlock); - } - } - - const updatedContent = { blocks: results, entityMap: createEntityMap(results) }; - - return updatedContent; -}; - // https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs const diffLineMode = (text1, text2) => { var dmp = new DiffMatchPatch(); @@ -109,70 +38,89 @@ const diffLineMode = (text1, text2) => { return diffs; }; -const diff = (text1, text2) => { +const updateTimestamps = (currentContent, originalContent) => { + const currentText = convertContentToText(currentContent); + const originalText = convertContentToText(originalContent); + + const lineModeDiff = diffLineMode(originalText.join('\n') + '\n', currentText.join('\n') + '\n'); + const entities = originalContent.entityMap; + + var currentTextIdx = 0; + var entityIdx = 0; + var diffIdx = 0; - var diffArray = []; - var arrayIdx = 0; + var newEntities = []; - const lineModeDiff = diffLineMode(text2.join('\n') + '\n', text1.join('\n') + '\n'); - while (arrayIdx < lineModeDiff.length) { - const diffEntry = lineModeDiff[arrayIdx]; + while (diffIdx < lineModeDiff.length) { + const diffEntry = lineModeDiff[diffIdx]; + const nextDiffEntry = lineModeDiff[diffIdx + 1] || -1; + const diffType = diffEntry[0]; const numberOfWords = (diffEntry[1].match(/\n/g) || []).length; - if (diffEntry[0] === 0) { - for (var i = 0; i < numberOfWords; i++) { diffArray.push('m'); } - } else if (diffEntry[0] === 1) { - for (var i = 0; i < numberOfWords; i++) { diffArray.push('i'); } - } else if (diffEntry[0] === -1) { - if (arrayIdx < lineModeDiff.length - 1 && lineModeDiff[arrayIdx + 1][0] === 1) { - // The matching number of words is substituted - const numberOfWordsSub = (lineModeDiff[arrayIdx + 1][1].match(/\n/g) || []).length; - - for (var subItr = 0; subItr < Math.min(numberOfWords, numberOfWordsSub); subItr++) { diffArray.push('s'); } - for (var delItr = 0; delItr < numberOfWords - numberOfWordsSub; delItr++) { diffArray.push('d'); } - for (var insItr = 0; insItr < numberOfWordsSub - numberOfWords; insItr++) { diffArray.push('i'); } - - // if (numberOfWordsSub < numberOfWords) { - // diffArray.push('ss'); - // for (var i = 1; i < (numberOfWords - 1); i++) { diffArray.push('si'); } - // diffArray.push('se'); - // for (var i = 0; i < (numberOfWords - numberOfWordsSub); i++) { diffArray.push('i'); } - // } else { - // } - - // diffArray.push('ss'); - // for (var i = 1; i < (numberOfWords - 1); i++) { diffArray.push('si'); } - // diffArray.push('se'); - // if (numberOfWords > numberOfWordsSub) { - // // If there are more words in the original substring, delete the rest. - // for (var i = 0; i < (numberOfWords - numberOfWordsSub); i++) { diffArray.push('i'); } - // } - arrayIdx++; // Increase an additional time to skip insert/delete syntax for substitution - } else { - for (var i = 0; i < numberOfWords; i++) { diffArray.push('diff'); } + + if (diffType === 0) { + for (var wordItr = 0; wordItr < numberOfWords; wordItr++) { + const word = currentText[currentTextIdx++]; + const entity = entities[entityIdx++].data; + + const newEntity = createEntity(entity.start, entity.end, 0.0, word, -1); + newEntities.push(newEntity); } - } - arrayIdx++; - } + } else if (diffType === -1) { + if (nextDiffEntry !== -1 && nextDiffEntry[0] === 1) { + const entityStart = entities[entityIdx].data.start; + const entityEnd = entities[entityIdx + numberOfWords - 1].data.end; - return diffArray; -}; + const numberOfReplacements = (nextDiffEntry[1].match(/\n/g) || []).length; -const diffAndRealign = (currentText, originalText) => { - const lineModeDiff = diffLineMode(originalText.join('\n') + '\n', currentText.join('\n') + '\n'); + for (var wordItr = 0; wordItr < numberOfReplacements; wordItr++) { + const word = currentText[currentTextIdx++]; - return lineModeDiff; -}; + const newEntity = createEntity(entityStart, entityEnd, 0.0, word, -1); + newEntities.push(newEntity); + } + entityIdx += numberOfWords; + diffIdx++; + } else { + entityIdx += numberOfWords; + } + } else if (diffType === 1) { + for (var wordItr = 0; wordItr < numberOfWords; wordItr++) { + const word = currentText[currentTextIdx++]; + const entity = entities[entityIdx].data; -const updateTimestamps = (currentContent, originalContent) => { - const currentText = convertContentToText(currentContent); - const originalText = convertContentToText(originalContent); + const newEntity = createEntity(entity.start, entity.end, 0.0, word, -1); + newEntities.push(newEntity); + } + } + diffIdx ++; + } + + var updatedBlockArray = []; + var totalWords = 0; - // const updatedContent = diffAndRealign(currentText, originalText) + for (var blockIdx in currentContent.blocks) { + const block = currentContent.blocks[blockIdx]; + const wordsInBlock = (block.text.match(/\S+/g) || []).length; + const blockEntites = newEntities.slice(totalWords, totalWords + wordsInBlock); + + const updatedBlock = { + text: blockEntites.map((entry) => entry.punct).join(' '), + type: 'paragraph', + data: { + speaker: block.data.speaker, + words: blockEntites, + start: blockEntites[0].start + }, + entityRanges: generateEntitiesRanges(blockEntites, 'punct'), + }; + + updatedBlockArray.push(updatedBlock); + totalWords += wordsInBlock; + } - const diffArray = diff(currentText, originalText); - const updatedContent = realignTimestamps(diffArray, currentContent, originalContent); + const updatedContent = { blocks: updatedBlockArray, entityMap: createEntityMap(updatedBlockArray) }; - return (updatedContent); + return updatedContent; }; export default updateTimestamps; \ No newline at end of file From 5d921174e70fff3c19c09024422036f3d919e8ee Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Wed, 10 Apr 2019 09:13:46 +0200 Subject: [PATCH 05/15] Update Timestamps now works correctly. --- .../TimedTextEditor/UpdateTimestamps.js | 36 ++++++++++++++----- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js b/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js index 7f41beef..ca0cf35c 100644 --- a/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js +++ b/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js @@ -58,6 +58,7 @@ const updateTimestamps = (currentContent, originalContent) => { const numberOfWords = (diffEntry[1].match(/\n/g) || []).length; if (diffType === 0) { + // Matched words. for (var wordItr = 0; wordItr < numberOfWords; wordItr++) { const word = currentText[currentTextIdx++]; const entity = entities[entityIdx++].data; @@ -66,24 +67,40 @@ const updateTimestamps = (currentContent, originalContent) => { newEntities.push(newEntity); } } else if (diffType === -1) { + // Deletion if (nextDiffEntry !== -1 && nextDiffEntry[0] === 1) { - const entityStart = entities[entityIdx].data.start; - const entityEnd = entities[entityIdx + numberOfWords - 1].data.end; - + // If next entry is a insert, the operation is a replacement. const numberOfReplacements = (nextDiffEntry[1].match(/\n/g) || []).length; - for (var wordItr = 0; wordItr < numberOfReplacements; wordItr++) { - const word = currentText[currentTextIdx++]; - - const newEntity = createEntity(entityStart, entityEnd, 0.0, word, -1); - newEntities.push(newEntity); + if (numberOfReplacements === numberOfWords) { + // If the number of replacement words is equal to the number of original words + // it is easily possible to match them correctly. + for (var wordItr = 0; wordItr < numberOfWords; wordItr++) { + const word = currentText[currentTextIdx++]; + const entity = entities[entityIdx++].data; + + const newEntity = createEntity(entity.start, entity.end, 0.0, word, -1); + newEntities.push(newEntity); + } + } else { + // Otherwise, we give the whole segment the same timestamp. + const entityStart = entities[entityIdx].data.start; + const entityEnd = entities[entityIdx + numberOfWords - 1].data.end; + + for (var wordItr = 0; wordItr < numberOfReplacements; wordItr++) { + const word = currentText[currentTextIdx++]; + const newEntity = createEntity(entityStart, entityEnd, 0.0, word, -1); + newEntities.push(newEntity); + } + entityIdx += numberOfWords; } - entityIdx += numberOfWords; diffIdx++; } else { + // Deletions ignore the corresponding entity. entityIdx += numberOfWords; } } else if (diffType === 1) { + // Insertions get the same timestamp as the previous entity for (var wordItr = 0; wordItr < numberOfWords; wordItr++) { const word = currentText[currentTextIdx++]; const entity = entities[entityIdx].data; @@ -95,6 +112,7 @@ const updateTimestamps = (currentContent, originalContent) => { diffIdx ++; } + // Update entites to block structure. var updatedBlockArray = []; var totalWords = 0; From 02c82bc4357fcf0978db56814819fc94e8298868 Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Fri, 12 Apr 2019 08:39:51 +0200 Subject: [PATCH 06/15] Fixed errors from rebase, removed debug code --- package-lock.json | 122 ++++++++---------- package.json | 1 + .../timed-text-editor}/UpdateTimestamps.js | 4 +- .../components/timed-text-editor/index.js | 14 +- 4 files changed, 58 insertions(+), 83 deletions(-) rename {src/lib/TranscriptEditor/TimedTextEditor => packages/components/timed-text-editor}/UpdateTimestamps.js (96%) diff --git a/package-lock.json b/package-lock.json index 10267734..f36ae579 100644 --- a/package-lock.json +++ b/package-lock.json @@ -2614,7 +2614,7 @@ }, "array-equal": { "version": "1.0.0", - "resolved": "https://registry.npmjs.org/array-equal/-/array-equal-1.0.0.tgz", + "resolved": "http://registry.npmjs.org/array-equal/-/array-equal-1.0.0.tgz", "integrity": "sha1-jCpe8kcv2ep0KwTHenUJO6J1fJM=", "dev": true }, @@ -2749,7 +2749,7 @@ }, "util": { "version": "0.10.3", - "resolved": "https://registry.npmjs.org/util/-/util-0.10.3.tgz", + "resolved": "http://registry.npmjs.org/util/-/util-0.10.3.tgz", "integrity": "sha1-evsa/lCAUkZInj23/g7TeTNqwPk=", "dev": true, "requires": { @@ -3139,7 +3139,7 @@ }, "babel-plugin-syntax-object-rest-spread": { "version": "6.13.0", - "resolved": "https://registry.npmjs.org/babel-plugin-syntax-object-rest-spread/-/babel-plugin-syntax-object-rest-spread-6.13.0.tgz", + "resolved": "http://registry.npmjs.org/babel-plugin-syntax-object-rest-spread/-/babel-plugin-syntax-object-rest-spread-6.13.0.tgz", "integrity": "sha1-/WU28rzhODb/o6VFjEkDpZe7O/U=", "dev": true }, @@ -3771,7 +3771,7 @@ }, "browserify-aes": { "version": "1.2.0", - "resolved": "https://registry.npmjs.org/browserify-aes/-/browserify-aes-1.2.0.tgz", + "resolved": "http://registry.npmjs.org/browserify-aes/-/browserify-aes-1.2.0.tgz", "integrity": "sha512-+7CHXqGuspUn/Sl5aO7Ea0xWGAtETPXNSAjHo48JfLdPWcMng33Xe4znFvQweqc/uzk5zSOI3H52CYnjCfb5hA==", "dev": true, "requires": { @@ -3808,7 +3808,7 @@ }, "browserify-rsa": { "version": "4.0.1", - "resolved": "https://registry.npmjs.org/browserify-rsa/-/browserify-rsa-4.0.1.tgz", + "resolved": "http://registry.npmjs.org/browserify-rsa/-/browserify-rsa-4.0.1.tgz", "integrity": "sha1-IeCr+vbyApzy+vsTNWenAdQTVSQ=", "dev": true, "requires": { @@ -3862,7 +3862,7 @@ }, "buffer": { "version": "4.9.1", - "resolved": "https://registry.npmjs.org/buffer/-/buffer-4.9.1.tgz", + "resolved": "http://registry.npmjs.org/buffer/-/buffer-4.9.1.tgz", "integrity": "sha1-bRu2AbB6TvztlwlBMgkwJ8lbwpg=", "dev": true, "requires": { @@ -4380,7 +4380,7 @@ }, "clone-deep": { "version": "0.2.4", - "resolved": "https://registry.npmjs.org/clone-deep/-/clone-deep-0.2.4.tgz", + "resolved": "http://registry.npmjs.org/clone-deep/-/clone-deep-0.2.4.tgz", "integrity": "sha1-TnPdCen7lxzDhnDF3O2cGJZIHMY=", "dev": true, "requires": { @@ -4780,7 +4780,7 @@ }, "create-hash": { "version": "1.2.0", - "resolved": "https://registry.npmjs.org/create-hash/-/create-hash-1.2.0.tgz", + "resolved": "http://registry.npmjs.org/create-hash/-/create-hash-1.2.0.tgz", "integrity": "sha512-z00bCGNHDG8mHAkP7CtT1qVu+bFQUPjYq/4Iv3C3kWjTFV10zIjfSoeqXo9Asws8gwSHDGj/hl2u4OGIjapeCg==", "dev": true, "requires": { @@ -4793,7 +4793,7 @@ }, "create-hmac": { "version": "1.1.7", - "resolved": "https://registry.npmjs.org/create-hmac/-/create-hmac-1.1.7.tgz", + "resolved": "http://registry.npmjs.org/create-hmac/-/create-hmac-1.1.7.tgz", "integrity": "sha512-MJG9liiZ+ogc4TzUwuvbER1JRdgvUFSB5+VR/g5h82fGaIRWMWddtKBHi7/sVhfjQZ6SehlyhvQYrcYkaUIpLg==", "dev": true, "requires": { @@ -4877,7 +4877,7 @@ }, "css-select": { "version": "1.2.0", - "resolved": "https://registry.npmjs.org/css-select/-/css-select-1.2.0.tgz", + "resolved": "http://registry.npmjs.org/css-select/-/css-select-1.2.0.tgz", "integrity": "sha1-KzoRBTnFNV8c2NMUYj6HCxIeyFg=", "dev": true, "requires": { @@ -5198,6 +5198,11 @@ "debug": "^2.6.0" } }, + "diff-match-patch": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/diff-match-patch/-/diff-match-patch-1.0.4.tgz", + "integrity": "sha512-Uv3SW8bmH9nAtHKaKSanOQmj2DnlH65fUpcrMdfdaOxUG02QQ4YGZ8AE7kKOMisF7UqvOlGKVYWRvezdncW9lg==" + }, "diff-sequences": { "version": "24.3.0", "resolved": "https://registry.npmjs.org/diff-sequences/-/diff-sequences-24.3.0.tgz", @@ -5206,7 +5211,7 @@ }, "diffie-hellman": { "version": "5.0.3", - "resolved": "https://registry.npmjs.org/diffie-hellman/-/diffie-hellman-5.0.3.tgz", + "resolved": "http://registry.npmjs.org/diffie-hellman/-/diffie-hellman-5.0.3.tgz", "integrity": "sha512-kqag/Nl+f3GwyK25fhUMYj81BUOrZ9IuJsjIcDE5icNM9FJHAVm3VcUDxdLPoQtTuUylWm6ZIknYJwwaPxsUzg==", "dev": true, "requires": { @@ -5377,7 +5382,7 @@ }, "duplexer": { "version": "0.1.1", - "resolved": "https://registry.npmjs.org/duplexer/-/duplexer-0.1.1.tgz", + "resolved": "http://registry.npmjs.org/duplexer/-/duplexer-0.1.1.tgz", "integrity": "sha1-rOb/gIwc5mtX0ev5eXessCM0z8E=", "dev": true }, @@ -6018,7 +6023,7 @@ "dependencies": { "doctrine": { "version": "1.5.0", - "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-1.5.0.tgz", + "resolved": "http://registry.npmjs.org/doctrine/-/doctrine-1.5.0.tgz", "integrity": "sha1-N53Ocw9hZvds76TmcHoVmwLFpvo=", "dev": true, "requires": { @@ -6649,7 +6654,7 @@ "dependencies": { "core-js": { "version": "1.2.7", - "resolved": "https://registry.npmjs.org/core-js/-/core-js-1.2.7.tgz", + "resolved": "http://registry.npmjs.org/core-js/-/core-js-1.2.7.tgz", "integrity": "sha1-ZSKUwUZR2yj6k70tX/KYOk8IxjY=" } } @@ -6789,7 +6794,7 @@ }, "finalhandler": { "version": "1.1.1", - "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.1.1.tgz", + "resolved": "http://registry.npmjs.org/finalhandler/-/finalhandler-1.1.1.tgz", "integrity": "sha512-Y1GUDo39ez4aHAw7MysnUD5JzYX+WaIj8I57kO3aEPT1fFRL4sr7mjei97FgnwhAyyzRYmQZaTHb2+9uZ1dPtg==", "dev": true, "requires": { @@ -7020,8 +7025,7 @@ "ansi-regex": { "version": "2.1.1", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "aproba": { "version": "1.2.0", @@ -7042,14 +7046,12 @@ "balanced-match": { "version": "1.0.0", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "brace-expansion": { "version": "1.1.11", "bundled": true, "dev": true, - "optional": true, "requires": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" @@ -7064,20 +7066,17 @@ "code-point-at": { "version": "1.1.0", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "concat-map": { "version": "0.0.1", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "console-control-strings": { "version": "1.1.0", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "core-util-is": { "version": "1.0.2", @@ -7194,8 +7193,7 @@ "inherits": { "version": "2.0.3", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "ini": { "version": "1.3.5", @@ -7207,7 +7205,6 @@ "version": "1.0.0", "bundled": true, "dev": true, - "optional": true, "requires": { "number-is-nan": "^1.0.0" } @@ -7222,7 +7219,6 @@ "version": "3.0.4", "bundled": true, "dev": true, - "optional": true, "requires": { "brace-expansion": "^1.1.7" } @@ -7230,14 +7226,12 @@ "minimist": { "version": "0.0.8", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "minipass": { "version": "2.3.5", "bundled": true, "dev": true, - "optional": true, "requires": { "safe-buffer": "^5.1.2", "yallist": "^3.0.0" @@ -7256,7 +7250,6 @@ "version": "0.5.1", "bundled": true, "dev": true, - "optional": true, "requires": { "minimist": "0.0.8" } @@ -7337,8 +7330,7 @@ "number-is-nan": { "version": "1.0.1", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "object-assign": { "version": "4.1.1", @@ -7350,7 +7342,6 @@ "version": "1.4.0", "bundled": true, "dev": true, - "optional": true, "requires": { "wrappy": "1" } @@ -7436,8 +7427,7 @@ "safe-buffer": { "version": "5.1.2", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "safer-buffer": { "version": "2.1.2", @@ -7473,7 +7463,6 @@ "version": "1.0.2", "bundled": true, "dev": true, - "optional": true, "requires": { "code-point-at": "^1.0.0", "is-fullwidth-code-point": "^1.0.0", @@ -7493,7 +7482,6 @@ "version": "3.0.1", "bundled": true, "dev": true, - "optional": true, "requires": { "ansi-regex": "^2.0.0" } @@ -7537,14 +7525,12 @@ "wrappy": { "version": "1.0.2", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "yallist": { "version": "3.0.3", "bundled": true, - "dev": true, - "optional": true + "dev": true } } }, @@ -8253,7 +8239,7 @@ }, "http-errors": { "version": "1.6.3", - "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.6.3.tgz", + "resolved": "http://registry.npmjs.org/http-errors/-/http-errors-1.6.3.tgz", "integrity": "sha1-i1VoC7S+KDoLW/TqLjhYC+HZMg0=", "dev": true, "requires": { @@ -8288,7 +8274,7 @@ }, "humanize-url": { "version": "1.0.1", - "resolved": "https://registry.npmjs.org/humanize-url/-/humanize-url-1.0.1.tgz", + "resolved": "http://registry.npmjs.org/humanize-url/-/humanize-url-1.0.1.tgz", "integrity": "sha1-9KuZ4NKIF0yk4eUEB8VfuuRk7/8=", "dev": true, "requires": { @@ -8424,7 +8410,7 @@ }, "immutable": { "version": "3.7.6", - "resolved": "https://registry.npmjs.org/immutable/-/immutable-3.7.6.tgz", + "resolved": "http://registry.npmjs.org/immutable/-/immutable-3.7.6.tgz", "integrity": "sha1-E7TTyxK++hVIKib+Gy665kAHHks=" }, "import-cwd": { @@ -8895,7 +8881,7 @@ }, "is-obj": { "version": "1.0.1", - "resolved": "https://registry.npmjs.org/is-obj/-/is-obj-1.0.1.tgz", + "resolved": "http://registry.npmjs.org/is-obj/-/is-obj-1.0.1.tgz", "integrity": "sha1-PkcprB9f3gJc19g6iW2rn09n2w8=", "dev": true }, @@ -9856,7 +9842,7 @@ "dependencies": { "minimist": { "version": "1.2.0", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", + "resolved": "http://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", "integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=", "dev": true } @@ -10029,7 +10015,7 @@ "dependencies": { "json5": { "version": "1.0.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz", + "resolved": "http://registry.npmjs.org/json5/-/json5-1.0.1.tgz", "integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==", "dev": true, "requires": { @@ -10038,7 +10024,7 @@ }, "minimist": { "version": "1.2.0", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", + "resolved": "http://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", "integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=", "dev": true } @@ -10337,7 +10323,7 @@ }, "media-typer": { "version": "0.3.0", - "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", + "resolved": "http://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", "integrity": "sha1-hxDXrwqmJvj/+hzgAWhUUmMlV0g=", "dev": true }, @@ -10648,7 +10634,7 @@ }, "minimist": { "version": "0.0.8", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz", + "resolved": "http://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz", "integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=", "dev": true }, @@ -10721,7 +10707,7 @@ }, "mkdirp": { "version": "0.5.1", - "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz", + "resolved": "http://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz", "integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=", "dev": true, "requires": { @@ -11385,7 +11371,7 @@ }, "os-homedir": { "version": "1.0.2", - "resolved": "https://registry.npmjs.org/os-homedir/-/os-homedir-1.0.2.tgz", + "resolved": "http://registry.npmjs.org/os-homedir/-/os-homedir-1.0.2.tgz", "integrity": "sha1-/7xJiDNuDoM94MFox+8VISGqf7M=", "dev": true }, @@ -11441,7 +11427,7 @@ }, "os-tmpdir": { "version": "1.0.2", - "resolved": "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", + "resolved": "http://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", "integrity": "sha1-u+Z0BseaqFxc/sdm/lc0VV36EnQ=", "dev": true }, @@ -11689,7 +11675,7 @@ }, "path-is-absolute": { "version": "1.0.1", - "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "resolved": "http://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=", "dev": true }, @@ -13296,7 +13282,7 @@ }, "readable-stream": { "version": "2.3.6", - "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", + "resolved": "http://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", "dev": true, "requires": { @@ -13536,7 +13522,7 @@ "dependencies": { "jsesc": { "version": "0.5.0", - "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-0.5.0.tgz", + "resolved": "http://registry.npmjs.org/jsesc/-/jsesc-0.5.0.tgz", "integrity": "sha1-597mbjXW/Bb3EP6R1c9p9w8IkR0=", "dev": true } @@ -13967,7 +13953,7 @@ }, "safe-regex": { "version": "1.1.0", - "resolved": "https://registry.npmjs.org/safe-regex/-/safe-regex-1.1.0.tgz", + "resolved": "http://registry.npmjs.org/safe-regex/-/safe-regex-1.1.0.tgz", "integrity": "sha1-QKNmnzsHfR6UPURinhV91IAjvy4=", "dev": true, "requires": { @@ -14035,7 +14021,7 @@ }, "minimist": { "version": "1.2.0", - "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", + "resolved": "http://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", "integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=", "dev": true } @@ -14472,7 +14458,7 @@ }, "sha.js": { "version": "2.4.11", - "resolved": "https://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz", + "resolved": "http://registry.npmjs.org/sha.js/-/sha.js-2.4.11.tgz", "integrity": "sha512-QMEp5B7cftE7APOjk5Y6xgrbWu+WkLVQwk8JNjZ8nKRciZaByEW6MubieAiToS7+dwvrjGhH8jRXz3MVd0AYqQ==", "dev": true, "requires": { @@ -14494,7 +14480,7 @@ "dependencies": { "kind-of": { "version": "2.0.1", - "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-2.0.1.tgz", + "resolved": "http://registry.npmjs.org/kind-of/-/kind-of-2.0.1.tgz", "integrity": "sha1-AY7HpM5+OobLkUG+UZ0kyPqpgbU=", "dev": true, "requires": { @@ -15120,7 +15106,7 @@ }, "strip-eof": { "version": "1.0.0", - "resolved": "https://registry.npmjs.org/strip-eof/-/strip-eof-1.0.0.tgz", + "resolved": "http://registry.npmjs.org/strip-eof/-/strip-eof-1.0.0.tgz", "integrity": "sha1-u0P/VZim6wXYm1n80SnJgzE2Br8=", "dev": true }, @@ -15828,7 +15814,7 @@ }, "table": { "version": "4.0.3", - "resolved": "https://registry.npmjs.org/table/-/table-4.0.3.tgz", + "resolved": "http://registry.npmjs.org/table/-/table-4.0.3.tgz", "integrity": "sha512-S7rnFITmBH1EnyKcvxBh1LjYeQMmnZtCXSEbHcH6S0NoKit24ZuFO/T1vDcLdYsLQkM188PVVhQmzKIuThNkKg==", "dev": true, "requires": { @@ -16031,7 +16017,7 @@ }, "through": { "version": "2.3.8", - "resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz", + "resolved": "http://registry.npmjs.org/through/-/through-2.3.8.tgz", "integrity": "sha1-DdTJ/6q8NXlgsbckEV1+Doai4fU=", "dev": true }, @@ -17262,7 +17248,7 @@ }, "wrap-ansi": { "version": "2.1.0", - "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz", + "resolved": "http://registry.npmjs.org/wrap-ansi/-/wrap-ansi-2.1.0.tgz", "integrity": "sha1-2Pw9KE3QV5T+hJc8rs3Rz4JP3YU=", "dev": true, "requires": { diff --git a/package.json b/package.json index 70fe79b7..6852c6e8 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ "@fortawesome/free-solid-svg-icons": "^5.6.3", "@fortawesome/react-fontawesome": "^0.1.3", "babel-polyfill": "^6.26.0", + "diff-match-patch": "^1.0.4", "draft-js": "^0.10.5", "mousetrap": "1.5.2", "prop-types": "^15.6.2", diff --git a/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js b/packages/components/timed-text-editor/UpdateTimestamps.js similarity index 96% rename from src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js rename to packages/components/timed-text-editor/UpdateTimestamps.js index ca0cf35c..27b1c142 100644 --- a/src/lib/TranscriptEditor/TimedTextEditor/UpdateTimestamps.js +++ b/packages/components/timed-text-editor/UpdateTimestamps.js @@ -1,5 +1,5 @@ -import generateEntitiesRanges from '../../Util/adapters/generate-entities-ranges/index.js'; -import { createEntityMap } from '../../Util/adapters/index.js'; +import generateEntitiesRanges from '../../stt-adapters/generate-entities-ranges/index.js'; +import { createEntityMap } from '../../stt-adapters/index.js'; import DiffMatchPatch from 'diff-match-patch'; const convertContentToText = (content) => { diff --git a/packages/components/timed-text-editor/index.js b/packages/components/timed-text-editor/index.js index 8530585c..63e928c3 100644 --- a/packages/components/timed-text-editor/index.js +++ b/packages/components/timed-text-editor/index.js @@ -141,6 +141,7 @@ class TimedTextEditor extends React.Component { this.setEditorContentState(blocks); } } + getEditorContent(exportFormat) { const format = exportFormat || 'draftjs'; @@ -273,21 +274,8 @@ class TimedTextEditor extends React.Component { * Update Editor content state */ setEditorNewContentState = (newContentState) => { - var start = new Date().getTime(); - const newEditorState = EditorState.push(this.state.editorState, newContentState); - - var end = new Date().getTime(); - var time = end - start; - console.log('Execution time for Editor State push: ' + time); - - start = new Date().getTime(); - this.setState({ editorState: newEditorState }); - - end = new Date().getTime(); - time = end - start; - console.log('Execution time for Set State: ' + time); } /** From d0797d82358e978ddd8e95271723a53c6dfc9222 Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Mon, 15 Apr 2019 11:01:37 +0200 Subject: [PATCH 07/15] Moved UpdateTimestamp into its own folder. --- .../{UpdateTimestamps.js => UpdateTimestamps/index.js} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename packages/components/timed-text-editor/{UpdateTimestamps.js => UpdateTimestamps/index.js} (100%) diff --git a/packages/components/timed-text-editor/UpdateTimestamps.js b/packages/components/timed-text-editor/UpdateTimestamps/index.js similarity index 100% rename from packages/components/timed-text-editor/UpdateTimestamps.js rename to packages/components/timed-text-editor/UpdateTimestamps/index.js From 8babce7573ba0abebe22101267a36e1e8ecc332a Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Mon, 15 Apr 2019 14:29:33 +0200 Subject: [PATCH 08/15] added updateTimestampsSSTAlign which updates the timestamps with the sst-align code --- package-lock.json | 23 +++ package.json | 3 + .../UpdateTimestamps/index.js | 77 ++++--- .../UpdateTimestamps/stt-align-node.js | 191 ++++++++++++++++++ .../components/timed-text-editor/index.js | 4 +- 5 files changed, 273 insertions(+), 25 deletions(-) create mode 100644 packages/components/timed-text-editor/UpdateTimestamps/stt-align-node.js diff --git a/package-lock.json b/package-lock.json index f36ae579..f21799c9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5220,6 +5220,14 @@ "randombytes": "^2.0.0" } }, + "difflib": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/difflib/-/difflib-0.2.4.tgz", + "integrity": "sha1-teMDYabbAjF21WKJLbhZQKcY9H4=", + "requires": { + "heap": ">= 0.2.0" + } + }, "dir-glob": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-2.0.0.tgz", @@ -6263,6 +6271,11 @@ "original": "^1.0.0" } }, + "everpolate": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/everpolate/-/everpolate-0.0.3.tgz", + "integrity": "sha1-OxsxhGVJRxKqHrEGFERo6kF9ASk=" + }, "evp_bytestokey": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/evp_bytestokey/-/evp_bytestokey-1.0.3.tgz", @@ -8080,6 +8093,11 @@ "integrity": "sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==", "dev": true }, + "heap": { + "version": "0.2.6", + "resolved": "https://registry.npmjs.org/heap/-/heap-0.2.6.tgz", + "integrity": "sha1-CH4fELBGky/IWU3Z5tN4r8nR5aw=" + }, "highlight.js": { "version": "9.12.0", "resolved": "https://registry.npmjs.org/highlight.js/-/highlight.js-9.12.0.tgz", @@ -11124,6 +11142,11 @@ "integrity": "sha1-CXtgK1NCKlIsGvuHkDGDNpQaAR0=", "dev": true }, + "number-to-words": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/number-to-words/-/number-to-words-1.2.4.tgz", + "integrity": "sha512-/fYevVkXRcyBiZDg6yzZbm0RuaD6i0qRfn8yr+6D0KgBMOndFPxuW10qCHpzs50nN8qKuv78k8MuotZhcVX6Pw==" + }, "nwsapi": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/nwsapi/-/nwsapi-2.1.3.tgz", diff --git a/package.json b/package.json index 6852c6e8..bc73f4a3 100644 --- a/package.json +++ b/package.json @@ -47,8 +47,11 @@ "@fortawesome/react-fontawesome": "^0.1.3", "babel-polyfill": "^6.26.0", "diff-match-patch": "^1.0.4", + "difflib": "^0.2.4", "draft-js": "^0.10.5", + "everpolate": "0.0.3", "mousetrap": "1.5.2", + "number-to-words": "^1.2.4", "prop-types": "^15.6.2", "react-keyboard-shortcuts": "^1.1.3", "react-simple-tooltip": "^2.3.3" diff --git a/packages/components/timed-text-editor/UpdateTimestamps/index.js b/packages/components/timed-text-editor/UpdateTimestamps/index.js index 27b1c142..f6c48f02 100644 --- a/packages/components/timed-text-editor/UpdateTimestamps/index.js +++ b/packages/components/timed-text-editor/UpdateTimestamps/index.js @@ -1,6 +1,7 @@ -import generateEntitiesRanges from '../../stt-adapters/generate-entities-ranges/index.js'; -import { createEntityMap } from '../../stt-adapters/index.js'; +import generateEntitiesRanges from '../../../stt-adapters/generate-entities-ranges/index.js'; +import { createEntityMap } from '../../../stt-adapters/index.js'; import DiffMatchPatch from 'diff-match-patch'; +import alignJSONText from './stt-align-node.js'; const convertContentToText = (content) => { var text = []; @@ -25,6 +26,34 @@ const createEntity = (start, end, confidence, word, wordIdx) => { }); }; +const createContentFromEntityList = (currentContent, newEntities) => { + // Update entites to block structure. + var updatedBlockArray = []; + var totalWords = 0; + + for (var blockIdx in currentContent.blocks) { + const block = currentContent.blocks[blockIdx]; + const wordsInBlock = (block.text.match(/\S+/g) || []).length; + const blockEntites = newEntities.slice(totalWords, totalWords + wordsInBlock); + + const updatedBlock = { + text: blockEntites.map((entry) => entry.punct).join(' '), + type: 'paragraph', + data: { + speaker: block.data.speaker, + words: blockEntites, + start: blockEntites[0].start + }, + entityRanges: generateEntitiesRanges(blockEntites, 'punct'), + }; + + updatedBlockArray.push(updatedBlock); + totalWords += wordsInBlock; + } + + return { blocks: updatedBlockArray, entityMap: createEntityMap(updatedBlockArray) }; +}; + // https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs const diffLineMode = (text1, text2) => { var dmp = new DiffMatchPatch(); @@ -38,6 +67,7 @@ const diffLineMode = (text1, text2) => { return diffs; }; +// Update timestamps usign diff-match-patch. const updateTimestamps = (currentContent, originalContent) => { const currentText = convertContentToText(currentContent); const originalText = convertContentToText(originalContent); @@ -112,33 +142,34 @@ const updateTimestamps = (currentContent, originalContent) => { diffIdx ++; } - // Update entites to block structure. - var updatedBlockArray = []; - var totalWords = 0; + const updatedContent = createContentFromEntityList(currentContent, newEntities); - for (var blockIdx in currentContent.blocks) { - const block = currentContent.blocks[blockIdx]; - const wordsInBlock = (block.text.match(/\S+/g) || []).length; - const blockEntites = newEntities.slice(totalWords, totalWords + wordsInBlock); + return updatedContent; +}; - const updatedBlock = { - text: blockEntites.map((entry) => entry.punct).join(' '), - type: 'paragraph', - data: { - speaker: block.data.speaker, - words: blockEntites, - start: blockEntites[0].start - }, - entityRanges: generateEntitiesRanges(blockEntites, 'punct'), - }; +// Update timestamps usign stt-align (bbc). +const updateTimestampsSSTAlign = (currentContent, originalContent) => { + const currentText = convertContentToText(currentContent); - updatedBlockArray.push(updatedBlock); - totalWords += wordsInBlock; + const entityMap = originalContent.entityMap; + + const entities = []; + + for (var entityIdx in entityMap) { + entities.push({ + start: parseFloat(entityMap[entityIdx].data.start), + end: parseFloat(entityMap[entityIdx].data.end), + word: entityMap[entityIdx].data.text.toLowerCase().replace(/[.?!]/g, ''), + }); } - const updatedContent = { blocks: updatedBlockArray, entityMap: createEntityMap(updatedBlockArray) }; + const result = alignJSONText( { words: entities }, currentText.join(' ')); + const newEntities = result.words.map((entry) => { + return createEntity(entry.start, entry.end, 0.0, entry.word, -1); + }); + const updatedContent = createContentFromEntityList(currentContent, newEntities); return updatedContent; }; -export default updateTimestamps; \ No newline at end of file +export { updateTimestamps, updateTimestampsSSTAlign }; diff --git a/packages/components/timed-text-editor/UpdateTimestamps/stt-align-node.js b/packages/components/timed-text-editor/UpdateTimestamps/stt-align-node.js new file mode 100644 index 00000000..4832da2a --- /dev/null +++ b/packages/components/timed-text-editor/UpdateTimestamps/stt-align-node.js @@ -0,0 +1,191 @@ +// code obtained from https://github.com/bbc/stt-align-node + +import converterNumbersToWords from 'number-to-words'; +import difflib from 'difflib'; +import everpolate from 'everpolate'; + +/** + * https://stackoverflow.com/questions/175739/built-in-way-in-javascript-to-check-if-a-string-is-a-valid-number + * @param {*} num + * @return {boolean} - if it's a number true, if it's not false. + */ +function isANumber(num) { + return !isNaN(num); +} + +/** + * removes capitalization, punctuation and converts numbers to letters + * @param {string} wordText - word text + * @return {string} + * handles edge case if word is undefined, and returns undefined in that instance + */ +function normaliseWord(wordText) { + if (wordText !== undefined) { + const wordTextResult = wordText.toLowerCase().trim().replace(/[^a-z|0-9|.]+/g, ''); + if (isANumber(wordTextResult)) { + console.log(wordTextResult); + + return converterNumbersToWords.toWords(wordTextResult); + } + + return wordTextResult; + } else { + return wordText; + } +} + +// using neighboring words to set missing start and end time when present +function interpolationOptimization(wordsList) { + return wordsList.map((word, index) => { + let wordTmp = word; + // setting the start time of each unmatched word to the previous word’s end time - when present + // does not first element in list edge case + + if (('start' in word) && (index !== 0)) { + const previousWord = wordsList[index - 1]; + if ('end' in previousWord) { + wordTmp = { + start: previousWord.end, + end: word.end, + word: word.word + }; + } + } + // TODO: handle first item ? + // setting the end time of each unmatched word to the next word’s start time - when present + // does handle last element in list edge case + if (('end' in word) && (index !== (wordsList.length - 1))) { + const nextWord = wordsList[index + 1]; + if ('start' in nextWord) { + wordTmp = { + end: nextWord.start, + start: word.start, + word: word.word + }; + } + } + + // TODO: handle last item ? + return wordTmp; + }); +// return words; +} + +function interpolate(wordsList) { + let words = interpolationOptimization(wordsList); + const indicies = [ ...Array(words.length).keys() ]; + const indiciesWithStart = []; + const indiciesWithEnd = []; + const startTimes = []; + const endTimes = []; + // interpolate times for start + for (let i = 0; i < words.length; i++) { + if ('start' in words[i]) { + indiciesWithStart.push(i); + startTimes.push(words[i].start); + } + } + // interpolate times for end + for (let i = 0; i < words.length; i++) { + if ('end' in words[i]) { + indiciesWithEnd.push(i); + endTimes.push(words[i].end); + } + } + // http://borischumichev.github.io/everpolate/#linear + const outStartTimes = everpolate.linear(indicies, indiciesWithStart, startTimes); + const outEndTimes = everpolate.linear(indicies, indiciesWithEnd, endTimes); + words = words.map((word, index) => { + if (!('start' in word)) { + word.start = outStartTimes[index]; + } + if (!('end' in word)) { + word.end = outEndTimes[index]; + } + + return word; + }); + + return words; +} + +/** + * + * @param {array} sttData - array of STT words + * @param {array} transcriptWords - array of base text accurate words + */ +function alignWords(sttWords, transcriptWords) { + // # extract list of words + // sttWords=[words.get('word') for words in sttData] + + // # convert words to lowercase and remove numbers and special characters + // sttWordsStripped = [re.sub('[^a-z]', '', word.lower()) for word in sttWords] + const sttWordsStripped = sttWords.map((word) => { + return normaliseWord(word.word); + }); + + // transcriptWordsStripped = [re.sub('[^a-z]', '', word.lower()) for word in transcriptWords] + const transcriptWordsStripped = transcriptWords.map((word) => { + return normaliseWord(word); + }); + // # create empty list to receive data + // transcriptData = [{} for _ in range(len(transcriptWords))] + const transcriptData = []; + // empty objects as place holder + transcriptWords.forEach(() => { + transcriptData.push({}); + }); + // # populate transcriptData with matching words + // matcher = difflib.SequenceMatcher(None, sttWordsStripped, transcriptWordsStripped) + // // if they are same length, just interpolate words ? + // const matcher = diffWordMode(transcriptWordsStripped, sttWordsStripped); + // http://qiao.github.io/difflib.js/ + const matcher = new difflib.SequenceMatcher(null, sttWordsStripped, transcriptWordsStripped); + const opCodes = matcher.getOpcodes(); + + opCodes.forEach((opCode) => { + const matchType = opCode[0]; + const sttStartIndex = opCode[1]; + const sttEndIndex = opCode[2]; + const baseTextStartIndex = opCode[3]; + + if (matchType === 'equal' ) { + // slice does not not include the end - hence +1 + const sttDataSegment = sttWords.slice(sttStartIndex, sttEndIndex); + transcriptData.splice(baseTextStartIndex, sttDataSegment.length, ...sttDataSegment); + } + + transcriptData.forEach((wordObject, index) => { + wordObject.word = transcriptWords[index]; + }); + // # replace words with originals + }); + + // # fill in missing timestamps + return interpolate(transcriptData); +} + +function normaliseReferenceText(refText) { + // remove new lines + return refText.trim().replace(/\n\n/g, '').replace(/\n/g, ' '); +} + +/** + * + * @param {json} sttWords - stt transcript json + * @param {array} sttWords.words + * @param {float} sttWords.words[0].start + * @param {float} sttWords.words[0].end + * @param {float} sttWords.words[0].word + * @param {string} transcriptText - plain text corrected transcript, base text + */ +function alignJSONText(sttData, transcriptText) { + const sttWords = sttData.words; + const transcriptTextWithoutLineBreaks = normaliseReferenceText(transcriptText); + const transcriptTextArray = transcriptTextWithoutLineBreaks.split(' '); + const aligned = alignWords(sttWords, transcriptTextArray); + + return { 'text': transcriptText, 'words': aligned }; +} + +export default alignJSONText; \ No newline at end of file diff --git a/packages/components/timed-text-editor/index.js b/packages/components/timed-text-editor/index.js index 63e928c3..d532ffbd 100644 --- a/packages/components/timed-text-editor/index.js +++ b/packages/components/timed-text-editor/index.js @@ -20,7 +20,7 @@ import sttJsonAdapter from '../../stt-adapters'; // TODO: connect to local packages version import exportAdapter from '../../export-adapters'; // import exportAdapter from '../../Util/export-adapters/index.js'; -import updateTimestamps from './UpdateTimestamps'; +import { updateTimestamps, updateTimestampsSSTAlign } from './UpdateTimestamps/index.js'; import style from './index.module.css'; class TimedTextEditor extends React.Component { @@ -128,7 +128,7 @@ class TimedTextEditor extends React.Component { updateTimestampsForEditorState() { // Update timestamps according to the original state. const currentContent = convertToRaw(this.state.editorState.getCurrentContent()); - const updatedContentRaw = updateTimestamps(currentContent, this.state.originalState); + const updatedContentRaw = updateTimestampsSSTAlign(currentContent, this.state.originalState); const updatedContent = convertFromRaw(updatedContentRaw); this.setEditorNewContentState(updatedContent); From bf11847b85d601036bc4cfe8efef55515c039cc6 Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Mon, 29 Apr 2019 16:40:40 +0200 Subject: [PATCH 09/15] Added documentation --- .../timed-text-editor/UpdateTimestamps/index.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/components/timed-text-editor/UpdateTimestamps/index.js b/packages/components/timed-text-editor/UpdateTimestamps/index.js index f6c48f02..b7adff1b 100644 --- a/packages/components/timed-text-editor/UpdateTimestamps/index.js +++ b/packages/components/timed-text-editor/UpdateTimestamps/index.js @@ -83,7 +83,6 @@ const updateTimestamps = (currentContent, originalContent) => { while (diffIdx < lineModeDiff.length) { const diffEntry = lineModeDiff[diffIdx]; - const nextDiffEntry = lineModeDiff[diffIdx + 1] || -1; const diffType = diffEntry[0]; const numberOfWords = (diffEntry[1].match(/\n/g) || []).length; @@ -97,14 +96,20 @@ const updateTimestamps = (currentContent, originalContent) => { newEntities.push(newEntity); } } else if (diffType === -1) { + // Get the following entry too, as it is needed to check if a replacement was done + // If at the last entry of the lineDiffModeArray, set the nextDiffEntry to -1 + const nextDiffEntry = lineModeDiff[diffIdx + 1] || -1; // Deletion if (nextDiffEntry !== -1 && nextDiffEntry[0] === 1) { // If next entry is a insert, the operation is a replacement. const numberOfReplacements = (nextDiffEntry[1].match(/\n/g) || []).length; - if (numberOfReplacements === numberOfWords) { // If the number of replacement words is equal to the number of original words // it is easily possible to match them correctly. + // + // This is exactly the same code as for matched words, but due to the amount of + // side effects (e.g. currentTextIdx and entityIdx increments) the code is copied + // instead of refactored into its own function. for (var wordItr = 0; wordItr < numberOfWords; wordItr++) { const word = currentText[currentTextIdx++]; const entity = entities[entityIdx++].data; From 09f7c16a238a0310fb4ee4110ca0fa29055a627b Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Mon, 29 Apr 2019 16:42:55 +0200 Subject: [PATCH 10/15] Merged timer for updating the timestamps and local save. --- packages/components/timed-text-editor/index.js | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/packages/components/timed-text-editor/index.js b/packages/components/timed-text-editor/index.js index d532ffbd..4e1aed65 100644 --- a/packages/components/timed-text-editor/index.js +++ b/packages/components/timed-text-editor/index.js @@ -112,15 +112,9 @@ class TimedTextEditor extends React.Component { clearTimeout(this.saveTimer); } this.saveTimer = setTimeout(() => { + this.updateTimestampsForEditorState(); this.localSave(this.props.mediaUrl); }, 1000); - - if (this.timestampTimer !== undefined) { - clearTimeout(this.timestampTimer); - } - this.timestampTimer = setTimeout(() => { - this.updateTimestampsForEditorState(); - }, 5000); }); } } From 79dc2b9f635de194a8c7dfc1311ce2dd83dbaafd Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Wed, 5 Jun 2019 14:25:42 +0200 Subject: [PATCH 11/15] Selection state is now kept across updates to timestamps --- .../components/timed-text-editor/index.js | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/packages/components/timed-text-editor/index.js b/packages/components/timed-text-editor/index.js index 4e1aed65..cc9dfbe0 100644 --- a/packages/components/timed-text-editor/index.js +++ b/packages/components/timed-text-editor/index.js @@ -114,7 +114,7 @@ class TimedTextEditor extends React.Component { this.saveTimer = setTimeout(() => { this.updateTimestampsForEditorState(); this.localSave(this.props.mediaUrl); - }, 1000); + }, 5000); }); } } @@ -125,7 +125,33 @@ class TimedTextEditor extends React.Component { const updatedContentRaw = updateTimestampsSSTAlign(currentContent, this.state.originalState); const updatedContent = convertFromRaw(updatedContentRaw); - this.setEditorNewContentState(updatedContent); + // Update editor state + const newEditorState = EditorState.push(this.state.editorState, updatedContent); + + // Re-convert updated content to raw to gain access to block keys + const updatedContentBlocks = convertToRaw(updatedContent); + + // Build block map, which maps the block keys of the previous content to the block keys of the + // updated content. + var blockMap = {}; + for (var blockIdx = 0; blockIdx < currentContent.blocks.length; blockIdx++) { + blockMap[currentContent.blocks[blockIdx].key] = updatedContentBlocks.blocks[blockIdx].key; + } + + // Get current selection state and update block keys + const selectionState = this.state.editorState.getSelection(); + + const selection = selectionState.merge({ + anchorOffset: selectionState.getAnchorOffset(), + anchorKey: blockMap[selectionState.getAnchorKey()], + focusOffset: selectionState.getFocusOffset(), + focusKey: blockMap[selectionState.getFocusKey()], + }); + + // Set the updated selection state on the new editor state + const newEditorStateSelected = EditorState.forceSelection(newEditorState, selection); + + this.setState({ editorState: newEditorStateSelected }); } loadData() { From 91cccd17f44da6b2c39f99bc0f857959a353e73f Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Wed, 5 Jun 2019 15:03:05 +0200 Subject: [PATCH 12/15] Fixed bug where words with punctuation always are considered as new words. Timestamp update function now also uses the alignWords function directly instead of alignJSONText, removing some overhead. --- .../timed-text-editor/UpdateTimestamps/index.js | 9 +++++---- .../timed-text-editor/UpdateTimestamps/stt-align-node.js | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/components/timed-text-editor/UpdateTimestamps/index.js b/packages/components/timed-text-editor/UpdateTimestamps/index.js index b7adff1b..477c2007 100644 --- a/packages/components/timed-text-editor/UpdateTimestamps/index.js +++ b/packages/components/timed-text-editor/UpdateTimestamps/index.js @@ -1,7 +1,7 @@ import generateEntitiesRanges from '../../../stt-adapters/generate-entities-ranges/index.js'; import { createEntityMap } from '../../../stt-adapters/index.js'; import DiffMatchPatch from 'diff-match-patch'; -import alignJSONText from './stt-align-node.js'; +import alignWords from './stt-align-node.js'; const convertContentToText = (content) => { var text = []; @@ -164,12 +164,13 @@ const updateTimestampsSSTAlign = (currentContent, originalContent) => { entities.push({ start: parseFloat(entityMap[entityIdx].data.start), end: parseFloat(entityMap[entityIdx].data.end), - word: entityMap[entityIdx].data.text.toLowerCase().replace(/[.?!]/g, ''), + word: entityMap[entityIdx].data.text, }); } - const result = alignJSONText( { words: entities }, currentText.join(' ')); - const newEntities = result.words.map((entry) => { + const result = alignWords( entities, currentText); + + const newEntities = result.map((entry) => { return createEntity(entry.start, entry.end, 0.0, entry.word, -1); }); const updatedContent = createContentFromEntityList(currentContent, newEntities); diff --git a/packages/components/timed-text-editor/UpdateTimestamps/stt-align-node.js b/packages/components/timed-text-editor/UpdateTimestamps/stt-align-node.js index 4832da2a..8af57a5b 100644 --- a/packages/components/timed-text-editor/UpdateTimestamps/stt-align-node.js +++ b/packages/components/timed-text-editor/UpdateTimestamps/stt-align-node.js @@ -23,7 +23,7 @@ function normaliseWord(wordText) { if (wordText !== undefined) { const wordTextResult = wordText.toLowerCase().trim().replace(/[^a-z|0-9|.]+/g, ''); if (isANumber(wordTextResult)) { - console.log(wordTextResult); + // console.log(wordTextResult); return converterNumbersToWords.toWords(wordTextResult); } @@ -188,4 +188,4 @@ function alignJSONText(sttData, transcriptText) { return { 'text': transcriptText, 'words': aligned }; } -export default alignJSONText; \ No newline at end of file +export default alignWords; \ No newline at end of file From d4e55611510742c277ee9e8b2b87940f0c3f4df7 Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Wed, 5 Jun 2019 15:58:21 +0200 Subject: [PATCH 13/15] Fixed small bug which raised an error if an empty block was present during timestamp update --- packages/components/timed-text-editor/UpdateTimestamps/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/components/timed-text-editor/UpdateTimestamps/index.js b/packages/components/timed-text-editor/UpdateTimestamps/index.js index 477c2007..d7104e91 100644 --- a/packages/components/timed-text-editor/UpdateTimestamps/index.js +++ b/packages/components/timed-text-editor/UpdateTimestamps/index.js @@ -42,7 +42,7 @@ const createContentFromEntityList = (currentContent, newEntities) => { data: { speaker: block.data.speaker, words: blockEntites, - start: blockEntites[0].start + start: block.data.start }, entityRanges: generateEntitiesRanges(blockEntites, 'punct'), }; From 968e5707bff567bc435ef9ee6f5ea998e4e0e675 Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Mon, 10 Jun 2019 18:55:12 +0200 Subject: [PATCH 14/15] Changed time of timestamp-update. Now re-calculates the timestamps after 5 seconds if the transcript has been edited or if the user saves the transcript manually with the save button --- packages/components/timed-text-editor/index.js | 12 +++++++++--- packages/components/transcript-editor/index.js | 2 ++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/packages/components/timed-text-editor/index.js b/packages/components/timed-text-editor/index.js index cc9dfbe0..738ec4a0 100644 --- a/packages/components/timed-text-editor/index.js +++ b/packages/components/timed-text-editor/index.js @@ -101,6 +101,13 @@ class TimedTextEditor extends React.Component { }.bind(this), pauseWhileTypingIntervalInMilliseconds); } } + + if (this.timestampTimer !== undefined) { + clearTimeout(this.timestampTimer); + } + this.timestampTimer = setTimeout(() => { + this.updateTimestampsForEditorState(); + }, 5000); } if (this.state.isEditable) { @@ -112,14 +119,14 @@ class TimedTextEditor extends React.Component { clearTimeout(this.saveTimer); } this.saveTimer = setTimeout(() => { - this.updateTimestampsForEditorState(); this.localSave(this.props.mediaUrl); - }, 5000); + }, 1000); }); } } updateTimestampsForEditorState() { + // Update timestamps according to the original state. const currentContent = convertToRaw(this.state.editorState.getCurrentContent()); const updatedContentRaw = updateTimestampsSSTAlign(currentContent, this.state.originalState); @@ -150,7 +157,6 @@ class TimedTextEditor extends React.Component { // Set the updated selection state on the new editor state const newEditorStateSelected = EditorState.forceSelection(newEditorState, selection); - this.setState({ editorState: newEditorStateSelected }); } diff --git a/packages/components/transcript-editor/index.js b/packages/components/transcript-editor/index.js index df834962..7a493d4a 100644 --- a/packages/components/transcript-editor/index.js +++ b/packages/components/transcript-editor/index.js @@ -270,6 +270,8 @@ class TranscriptEditor extends React.Component { handleSaveTranscript = () => { alert('The changes to this transcript have been saved in your browser'); + + this.timedTextEditorRef.current.updateTimestampsForEditorState(); return this.timedTextEditorRef.current.localSave(this.props.mediaUrl); }; From 4d62770e9d216406be368a50e8b39d046a281147 Mon Sep 17 00:00:00 2001 From: murezzda <47388020+murezzda@users.noreply.github.com> Date: Tue, 11 Jun 2019 20:36:33 +0200 Subject: [PATCH 15/15] Code cleanup --- .../UpdateTimestamps/index.js | 103 +----------------- .../UpdateTimestamps/stt-align-node.js | 23 ---- .../components/timed-text-editor/index.js | 4 +- 3 files changed, 4 insertions(+), 126 deletions(-) diff --git a/packages/components/timed-text-editor/UpdateTimestamps/index.js b/packages/components/timed-text-editor/UpdateTimestamps/index.js index d7104e91..d827ce05 100644 --- a/packages/components/timed-text-editor/UpdateTimestamps/index.js +++ b/packages/components/timed-text-editor/UpdateTimestamps/index.js @@ -1,6 +1,5 @@ import generateEntitiesRanges from '../../../stt-adapters/generate-entities-ranges/index.js'; import { createEntityMap } from '../../../stt-adapters/index.js'; -import DiffMatchPatch from 'diff-match-patch'; import alignWords from './stt-align-node.js'; const convertContentToText = (content) => { @@ -54,106 +53,8 @@ const createContentFromEntityList = (currentContent, newEntities) => { return { blocks: updatedBlockArray, entityMap: createEntityMap(updatedBlockArray) }; }; -// https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs -const diffLineMode = (text1, text2) => { - var dmp = new DiffMatchPatch(); - var a = dmp.diff_linesToChars_(text1, text2); - var lineText1 = a.chars1; - var lineText2 = a.chars2; - var lineArray = a.lineArray; - var diffs = dmp.diff_main(lineText1, lineText2, false); - dmp.diff_charsToLines_(diffs, lineArray); - - return diffs; -}; - -// Update timestamps usign diff-match-patch. -const updateTimestamps = (currentContent, originalContent) => { - const currentText = convertContentToText(currentContent); - const originalText = convertContentToText(originalContent); - - const lineModeDiff = diffLineMode(originalText.join('\n') + '\n', currentText.join('\n') + '\n'); - const entities = originalContent.entityMap; - - var currentTextIdx = 0; - var entityIdx = 0; - var diffIdx = 0; - - var newEntities = []; - - while (diffIdx < lineModeDiff.length) { - const diffEntry = lineModeDiff[diffIdx]; - const diffType = diffEntry[0]; - const numberOfWords = (diffEntry[1].match(/\n/g) || []).length; - - if (diffType === 0) { - // Matched words. - for (var wordItr = 0; wordItr < numberOfWords; wordItr++) { - const word = currentText[currentTextIdx++]; - const entity = entities[entityIdx++].data; - - const newEntity = createEntity(entity.start, entity.end, 0.0, word, -1); - newEntities.push(newEntity); - } - } else if (diffType === -1) { - // Get the following entry too, as it is needed to check if a replacement was done - // If at the last entry of the lineDiffModeArray, set the nextDiffEntry to -1 - const nextDiffEntry = lineModeDiff[diffIdx + 1] || -1; - // Deletion - if (nextDiffEntry !== -1 && nextDiffEntry[0] === 1) { - // If next entry is a insert, the operation is a replacement. - const numberOfReplacements = (nextDiffEntry[1].match(/\n/g) || []).length; - if (numberOfReplacements === numberOfWords) { - // If the number of replacement words is equal to the number of original words - // it is easily possible to match them correctly. - // - // This is exactly the same code as for matched words, but due to the amount of - // side effects (e.g. currentTextIdx and entityIdx increments) the code is copied - // instead of refactored into its own function. - for (var wordItr = 0; wordItr < numberOfWords; wordItr++) { - const word = currentText[currentTextIdx++]; - const entity = entities[entityIdx++].data; - - const newEntity = createEntity(entity.start, entity.end, 0.0, word, -1); - newEntities.push(newEntity); - } - } else { - // Otherwise, we give the whole segment the same timestamp. - const entityStart = entities[entityIdx].data.start; - const entityEnd = entities[entityIdx + numberOfWords - 1].data.end; - - for (var wordItr = 0; wordItr < numberOfReplacements; wordItr++) { - const word = currentText[currentTextIdx++]; - const newEntity = createEntity(entityStart, entityEnd, 0.0, word, -1); - newEntities.push(newEntity); - } - entityIdx += numberOfWords; - } - diffIdx++; - } else { - // Deletions ignore the corresponding entity. - entityIdx += numberOfWords; - } - } else if (diffType === 1) { - // Insertions get the same timestamp as the previous entity - for (var wordItr = 0; wordItr < numberOfWords; wordItr++) { - const word = currentText[currentTextIdx++]; - const entity = entities[entityIdx].data; - - const newEntity = createEntity(entity.start, entity.end, 0.0, word, -1); - newEntities.push(newEntity); - } - } - diffIdx ++; - } - - const updatedContent = createContentFromEntityList(currentContent, newEntities); - - return updatedContent; -}; - // Update timestamps usign stt-align (bbc). -const updateTimestampsSSTAlign = (currentContent, originalContent) => { +const updateTimestamps = (currentContent, originalContent) => { const currentText = convertContentToText(currentContent); const entityMap = originalContent.entityMap; @@ -178,4 +79,4 @@ const updateTimestampsSSTAlign = (currentContent, originalContent) => { return updatedContent; }; -export { updateTimestamps, updateTimestampsSSTAlign }; +export default updateTimestamps; diff --git a/packages/components/timed-text-editor/UpdateTimestamps/stt-align-node.js b/packages/components/timed-text-editor/UpdateTimestamps/stt-align-node.js index 8af57a5b..e1e5ecd9 100644 --- a/packages/components/timed-text-editor/UpdateTimestamps/stt-align-node.js +++ b/packages/components/timed-text-editor/UpdateTimestamps/stt-align-node.js @@ -165,27 +165,4 @@ function alignWords(sttWords, transcriptWords) { return interpolate(transcriptData); } -function normaliseReferenceText(refText) { - // remove new lines - return refText.trim().replace(/\n\n/g, '').replace(/\n/g, ' '); -} - -/** - * - * @param {json} sttWords - stt transcript json - * @param {array} sttWords.words - * @param {float} sttWords.words[0].start - * @param {float} sttWords.words[0].end - * @param {float} sttWords.words[0].word - * @param {string} transcriptText - plain text corrected transcript, base text - */ -function alignJSONText(sttData, transcriptText) { - const sttWords = sttData.words; - const transcriptTextWithoutLineBreaks = normaliseReferenceText(transcriptText); - const transcriptTextArray = transcriptTextWithoutLineBreaks.split(' '); - const aligned = alignWords(sttWords, transcriptTextArray); - - return { 'text': transcriptText, 'words': aligned }; -} - export default alignWords; \ No newline at end of file diff --git a/packages/components/timed-text-editor/index.js b/packages/components/timed-text-editor/index.js index 738ec4a0..0f4e1438 100644 --- a/packages/components/timed-text-editor/index.js +++ b/packages/components/timed-text-editor/index.js @@ -20,7 +20,7 @@ import sttJsonAdapter from '../../stt-adapters'; // TODO: connect to local packages version import exportAdapter from '../../export-adapters'; // import exportAdapter from '../../Util/export-adapters/index.js'; -import { updateTimestamps, updateTimestampsSSTAlign } from './UpdateTimestamps/index.js'; +import updateTimestamps from './UpdateTimestamps/index.js'; import style from './index.module.css'; class TimedTextEditor extends React.Component { @@ -129,7 +129,7 @@ class TimedTextEditor extends React.Component { // Update timestamps according to the original state. const currentContent = convertToRaw(this.state.editorState.getCurrentContent()); - const updatedContentRaw = updateTimestampsSSTAlign(currentContent, this.state.originalState); + const updatedContentRaw = updateTimestamps(currentContent, this.state.originalState); const updatedContent = convertFromRaw(updatedContentRaw); // Update editor state