Apply a heuristic when censorship is off to attempt to undo the rest …

…of the censorship that comes from the Web Speech API. Fixed issue where censorship option is not honored.
curtgrimes · Nov 23, 2018 · 28ba1a7 · 28ba1a7
1 parent 80d30b4
commit 28ba1a7
Show file tree

Hide file tree

Showing 8 changed files with 100 additions and 33 deletions.
diff --git a/app/assets/lang-csv/en-US.csv b/app/assets/lang-csv/en-US.csv
@@ -121,7 +121,7 @@ settings.appearance.xPosition,Use title case,X Position
 settings.appearance.yPosition,Use title case,Y Position
 settings.censor.censor,Use title case,Censor
 settings.censor.censorProfaneLanguage,,Censor profane language.
-settings.censor.usEnglishOnly,,(Currently available for US English only.)
+settings.censor.usEnglishOnly,,Currently available for US English only.
 settings.censor.censorProfaneLanguageDescription.text,"Do not translate {seeThisList} or {useWordReplacement}. The text at the key settings.censor.censorProfaneLanguageDescription.seeThisList and settings.censor.censorProfaneLanguageDescription.useWordReplacements will be placed here, respectively.","What's considered profane? {seeThisList} (note: profanity ahead!) If you need to censor additional words not included in this list, {useWordReplacements}."
 settings.censor.censorProfaneLanguageDescription.seeThisList,Will link to https://github.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words,See this list
 settings.censor.censorProfaneLanguageDescription.useWordReplacements,,use word replacements

diff --git a/app/assets/lang-csv/pt-BR.csv b/app/assets/lang-csv/pt-BR.csv
@@ -121,7 +121,7 @@ settings.appearance.xPosition,Use title case,Posição X
 settings.appearance.yPosition,Use title case,Posição Y
 settings.censor.censor,Use title case,Censurar
 settings.censor.censorProfaneLanguage,,Censurar linguagem profana.
-settings.censor.usEnglishOnly,,(Atualmente disponível apenas para inglês dos EUA)
+settings.censor.usEnglishOnly,,Atualmente disponível apenas para inglês dos EUA.
 settings.censor.censorProfaneLanguageDescription.text,"Do not translate {seeThisList} or {useWordReplacement}. The text at the key settings.censor.censorProfaneLanguageDescription.seeThisList and settings.censor.censorProfaneLanguageDescription.useWordReplacements will be placed here, respectively.","O que é considerado profano? {seeThisList} (nota: palavrões à frente!) Se você precisar censurar palavras adicionais não incluídas nesta lista, {useWordReplacements}."
 settings.censor.censorProfaneLanguageDescription.seeThisList,Will link to https://github.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words,Veja esta lista
 settings.censor.censorProfaneLanguageDescription.useWordReplacements,,usar substituições de palavras

diff --git a/app/lang/en-US.js b/app/lang/en-US.js
@@ -135,7 +135,7 @@ export default { common:
      censor: 
       { censor: 'Censor',
         censorProfaneLanguage: 'Censor profane language.',
-        usEnglishOnly: '(Currently available for US English only.)',
+        usEnglishOnly: 'Currently available for US English only.',
         censorProfaneLanguageDescription: 
          { text: 'What\'s considered profane? {seeThisList} (note: profanity ahead!) If you need to censor additional words not included in this list, {useWordReplacements}.',
            seeThisList: 'See this list',

diff --git a/app/lang/pt-BR.js b/app/lang/pt-BR.js
@@ -135,7 +135,7 @@ export default { common:
      censor: 
       { censor: 'Censurar',
         censorProfaneLanguage: 'Censurar linguagem profana.',
-        usEnglishOnly: '(Atualmente disponível apenas para inglês dos EUA)',
+        usEnglishOnly: 'Atualmente disponível apenas para inglês dos EUA.',
         censorProfaneLanguageDescription: 
          { text: 'O que é considerado profano? {seeThisList} (nota: palavrões à frente!) Se você precisar censurar palavras adicionais não incluídas nesta lista, {useWordReplacements}.',
            seeThisList: 'Veja esta lista',

diff --git a/app/mixins/data/profanity-uncensor-en.js b/app/mixins/data/profanity-uncensor-en.js
@@ -0,0 +1,38 @@
+export default [
+    {
+        from: 'f***',
+        to: 'fuck',
+    },
+    {
+        from: 'f******',
+        to: 'fucking',
+    },
+    {
+        from: 'f*****',
+        to: 'fucked',
+    },
+    {
+        from: 's***',
+        to: 'shit', // "slut" conflicts
+    },
+    {
+        from: 'a******',
+        to: 'asshole',
+    },
+    {
+        from: 'b****',
+        to: 'bitch',
+    },
+    {
+        from: 'b******',
+        to: 'bitches',
+    },
+    {
+        from: 'c**********',
+        to: 'cocksucker',
+    },
+    {
+        from: 'm***',
+        to: 'milf',
+    },
+];
diff --git a/app/pages/captioner/settings/censor/index.vue b/app/pages/captioner/settings/censor/index.vue
@@ -1,41 +1,55 @@
 <template>
-  <div class="settings-about-view">
+  <div>
     <div class="custom-control custom-checkbox mb-2">
       <input v-model="censor" class="custom-control-input" name="word-replacements-censor-profanity" type="checkbox" id="word-replacements-censor-profanity">
-      <label class="custom-control-label" for="word-replacements-censor-profanity">{{$t('settings.censor.censorProfaneLanguage')}} {{$t('settings.censor.usEnglishOnly')}}</label>
+      <label class="custom-control-label" for="word-replacements-censor-profanity">{{$t('settings.censor.censorProfaneLanguage')}}</label>
     </div>
-
-    <i18n path="settings.censor.censorProfaneLanguageDescription.text" tag="p" class="small">
-      <a place="seeThisList" href="https://github.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words" target="_blank">{{$t('settings.censor.censorProfaneLanguageDescription.seeThisList')}}</a>
-      <router-link place="useWordReplacements" to="word-replacements">{{$t('settings.censor.censorProfaneLanguageDescription.useWordReplacements')}}</router-link>
-    </i18n>
-    <label class="col-form-label">{{$t('settings.censor.replaceCensoredWordsWith')}}</label>
-    <div class="custom-control custom-radio">
-      <input type="radio" id="customRadio1" v-model="censorReplaceWith" value="nothing" name="censorReplaceWith" class="custom-control-input">
-      <label class="custom-control-label" for="customRadio1">{{$t('settings.censor.nothing')}}</label>
+    <div v-if="censor" class="card card-body">
+      <p>
+        {{$t('settings.censor.usEnglishOnly')}}
+        <i18n path="settings.censor.censorProfaneLanguageDescription.text" tag="span">
+          <a place="seeThisList" href="https://github.com/LDNOOBW/List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words" target="_blank">{{$t('settings.censor.censorProfaneLanguageDescription.seeThisList')}}</a>
+          <router-link place="useWordReplacements" to="word-replacements">{{$t('settings.censor.censorProfaneLanguageDescription.useWordReplacements')}}</router-link>
+        </i18n>
+      </p>
+      <label class="col-form-label pt-0">{{$t('settings.censor.replaceCensoredWordsWith')}}</label>
+      <div class="custom-control custom-radio">
+        <input type="radio" id="customRadio1" v-model="censorReplaceWith" value="nothing" name="censorReplaceWith" class="custom-control-input">
+        <label class="custom-control-label" for="customRadio1">{{$t('settings.censor.nothing')}}</label>
+      </div>
+      <div class="custom-control custom-radio">
+        <input type="radio" id="customRadio2" v-model="censorReplaceWith" value="asterisks" name="censorReplaceWith" class="custom-control-input">
+        <label class="custom-control-label" for="customRadio2">{{$t('settings.censor.asterisks')}} (*****)</label>
+      </div>
     </div>
-    <div class="custom-control custom-radio">
-      <input type="radio" id="customRadio2" v-model="censorReplaceWith" value="asterisks" name="censorReplaceWith" class="custom-control-input">
-      <label class="custom-control-label" for="customRadio2">{{$t('settings.censor.asterisks')}} (*****)</label>
+    <div v-else class="card card-body">
+      <p>Censorship is off. However, the speech-to-text service that Web Captioner runs on currently does not give an option to completely disable censorship. Web Captioner applies a heuristic to uncensor words that are returned from this service that still appear to be censored.</p>
+      <p class="mb-0">If you are running into issues with words being censored even when censorship is off, <a href="https://feedback.webcaptioner.com/">leave feedback</a> or <a href="https://m.me/webcaptioner">contact Web Captioner</a>.</p>
     </div>
 
-    <p class="mt-4"></p>
-
   </div>
 </template>
 
 <script>
 import { mapGetters, mapActions } from 'vuex'
 
 export default {
-  name: 'settings-censor-view',
   transition: 'fade',
   middleware: [
     'settings-meta',
   ],
   meta: {
     settingsPageTitleKey: 'settings.censor.censor',
   },
+  watch: {
+    censor: function () {
+      if (this.$store.state.captioner.on) {
+        this.$nextTick(() => {
+          this.$store.dispatch('captioner/restartAndReinitializeSpeechRecognizer');
+        });
+      }
+    },
+  },
   computed: {
     censor: {
       get () {

diff --git a/app/store/modules/captioner/RecognitionResultParser.js b/app/store/modules/captioner/RecognitionResultParser.js
@@ -11,7 +11,7 @@ export default class {
                 return escapeRegExp(stringToCensor);
             });
 
-            replacement.fromRegex = new RegExp('\\b(' + stringsToCensor.join('|') + ')\\b', 'gi');
+            replacement.fromRegex = new RegExp('(^|\\b|\\s)(' + stringsToCensor.join('|') + ')(\\b|\\s|$)', 'gi');
             return replacement;
         });
     }
@@ -21,8 +21,10 @@ export default class {
             transcriptFinal = '';
 
         const makeReplacements = (text) => {
+            console.log(this.wordReplacements);
             for (let i = 0; i < this.wordReplacements.length; i++) {
-                text = text.replace(this.wordReplacements[i].fromRegex, this.wordReplacements[i].to);
+                // $1 and $3 are the leading and trailing whitespace, if any
+                text = text.replace(this.wordReplacements[i].fromRegex, '$1' + this.wordReplacements[i].to + '$3');
             }
             return text;
         }

diff --git a/app/store/modules/captioner/index.js b/app/store/modules/captioner/index.js
@@ -2,6 +2,7 @@ import Vue from 'vue'
 import RecognitionResultParser from './RecognitionResultParser.js'
 import internalWordReplacements from '~/mixins/data/internalWordReplacements'
 import censoredProfanity from '~/mixins/data/profanity-en'
+import profanityUncensor from '~/mixins/data/profanity-uncensor-en'
 
 const SILENT_RESTART_AFTER_NO_RESULTS_MS = (2 * 1000);
 const SILENT_RESTART_WAIT_MS_AFTER_STARTING_CAPTIONING = (2.5 * 1000);
@@ -61,19 +62,25 @@ const actions = {
         commit('INIT_STORAGE_SESSION_DATE', null, {root:true});
     },
     start ({commit, state, rootState, getters, dispatch}) {
+        const CENSOR_ON = rootState.settings.censor.on;
+
         let parser = new RecognitionResultParser({
             wordReplacements: [
                 ...rootState.settings.wordReplacements,
                 ...internalWordReplacements,
 
-                // Add profanity
-                {
-                    from: censoredProfanity.join(','),
-                    to: (rootState.settings.censor.replaceWith === 'nothing'
-                        ? ''
-                        : '******' // 'asterisks',
-                    )
-                },
+                ...(CENSOR_ON
+                    // Add profanity censor
+                    ? [{
+                        from: censoredProfanity.join(','),
+                        to: (rootState.settings.censor.replaceWith === 'nothing'
+                            ? ''
+                            : '******' // 'asterisks',
+                        )
+                    }]
+                    // Apply a heuristic to attempt to fully uncensor speech
+                    : profanityUncensor
+                ),
             ],
         });
 
@@ -161,8 +168,7 @@ const actions = {
 
         speechRecognizer.onerror = function(error) {
             clearTimeout(microphonePermissionNeededTimeout);
-            // console.log('speechRecognizer error');
-            // console.log(error);
+
             if (event.error == 'not-allowed') {
                 commit('SET_CAPTIONER_OFF', {omitFromGoogleAnalytics: true});
                 commit('SET_SHOULD_BE_ON', { shouldBeOn: false });
@@ -233,6 +239,8 @@ const actions = {
         dispatch('donation/SHOW_DONATION_MESSAGE_IF_ELIGIBLE', null, {root:true});
     },
 
+
+    // Fast restart if possible
     restart ({commit, state, rootState, dispatch}) {
         if (state.transcript.interim) {
             commit('APPEND_TRANSCRIPT_FINAL', { transcriptFinal: state.transcript.interim });
@@ -255,6 +263,11 @@ const actions = {
         }
     },
 
+    restartAndReinitializeSpeechRecognizer ({commit, state, rootState, dispatch}) {
+        state.on = false;
+        dispatch('restart');
+    },
+
     trackWordCount ({}, {wordCount}) {
         if (wordCount > 0) {
             Vue.$ga.event({