feat(blur) replace BodyPix with TFLite

Use the Google Meet model and SIMD optimized WASM.
jitsi · Feb 17, 2021 · 946339a · 946339a
1 parent f71e8a9
commit 946339a
Show file tree

Hide file tree

Showing 14 changed files with 218 additions and 69 deletions.
diff --git a/.eslintignore b/.eslintignore
@@ -6,6 +6,8 @@ build/*
 flow-typed/*
 libs/*
 
+react/features/stream-effects/blur/vendor/*
+
 # ESLint will by default ignore its own configuration file. However, there does
 # not seem to be a reason why we will want to risk being inconsistent with our
 # remaining JavaScript source code.

diff --git a/Makefile b/Makefile
@@ -5,6 +5,8 @@ LIBJITSIMEET_DIR = node_modules/lib-jitsi-meet/
 LIBFLAC_DIR = node_modules/libflacjs/dist/min/
 OLM_DIR = node_modules/olm
 RNNOISE_WASM_DIR = node_modules/rnnoise-wasm/dist/
+TFLITE_WASM = react/features/stream-effects/blur/vendor/tflite
+MEET_MODELS_DIR  = react/features/stream-effects/blur/vendor/models/
 NODE_SASS = ./node_modules/.bin/sass
 NPM = npm
 OUTPUT_DIR = .
@@ -26,7 +28,7 @@ clean:
 	rm -fr $(BUILD_DIR)
 
 .NOTPARALLEL:
-deploy: deploy-init deploy-appbundle deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac deploy-olm deploy-css deploy-local
+deploy: deploy-init deploy-appbundle deploy-rnnoise-binary deploy-tflite deploy-meet-models deploy-lib-jitsi-meet deploy-libflac deploy-olm deploy-css deploy-local
 
 deploy-init:
 	rm -fr $(DEPLOY_DIR)
@@ -82,6 +84,16 @@ deploy-rnnoise-binary:
 		$(RNNOISE_WASM_DIR)/rnnoise.wasm \
 		$(DEPLOY_DIR)
 
+deploy-tflite:
+	cp \
+		$(TFLITE_WASM)/*.wasm \
+		$(DEPLOY_DIR)		
+
+deploy-meet-models:
+	cp \
+		$(MEET_MODELS_DIR)/*.tflite \
+		$(DEPLOY_DIR)	
+
 deploy-css:
 	$(NODE_SASS) $(STYLES_MAIN) $(STYLES_BUNDLE) && \
 	$(CLEANCSS) --skip-rebase $(STYLES_BUNDLE) > $(STYLES_DESTINATION) ; \
@@ -91,7 +103,7 @@ deploy-local:
 	([ ! -x deploy-local.sh ] || ./deploy-local.sh)
 
 .NOTPARALLEL:
-dev: deploy-init deploy-css deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac deploy-olm
+dev: deploy-init deploy-css deploy-rnnoise-binary deploy-tflite deploy-meet-models deploy-lib-jitsi-meet deploy-libflac deploy-olm
 	$(WEBPACK_DEV_SERVER) --detect-circular-deps
 
 source-package:

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -95,10 +95,10 @@
     "redux-thunk": "2.2.0",
     "rnnoise-wasm": "github:jitsi/rnnoise-wasm#566a16885897704d6e6d67a1d5ac5d39781db2af",
     "rtcstats": "github:jitsi/rtcstats#v6.2.0",
-    "stackblur-canvas": "2.3.0",
     "styled-components": "3.4.9",
     "util": "0.12.1",
     "uuid": "3.1.0",
+    "wasm-check": "2.0.1",
     "windows-iana": "^3.1.0",
     "xmldom": "0.1.27",
     "zxcvbn": "4.4.2"

diff --git a/react/features/stream-effects/blur/JitsiStreamBlurEffect.js b/react/features/stream-effects/blur/JitsiStreamBlurEffect.js
@@ -1,29 +1,32 @@
 // @flow
-
-import * as StackBlur from 'stackblur-canvas';
-
 import {
     CLEAR_TIMEOUT,
     TIMEOUT_TICK,
     SET_TIMEOUT,
     timerWorkerScript
 } from './TimerWorker';
 
+const segmentationWidth = 256;
+const segmentationHeight = 144;
+const segmentationPixelCount = segmentationWidth * segmentationHeight;
+const blurValue = '25px';
+
 /**
  * Represents a modified MediaStream that adds blur to video background.
  * <tt>JitsiStreamBlurEffect</tt> does the processing of the original
  * video stream.
  */
 export default class JitsiStreamBlurEffect {
-    _bpModel: Object;
+    _model: Object;
     _inputVideoElement: HTMLVideoElement;
-    _inputVideoCanvasElement: HTMLCanvasElement;
     _onMaskFrameTimer: Function;
     _maskFrameTimerWorker: Worker;
-    _maskInProgress: boolean;
     _outputCanvasElement: HTMLCanvasElement;
+    _outputCanvasCtx: Object;
+    _segmentationMaskCtx: Object;
+    _segmentationMask: Object;
+    _segmentationMaskCanvas: Object;
     _renderMask: Function;
-    _segmentationData: Object;
     isEnabled: Function;
     startEffect: Function;
     stopEffect: Function;
@@ -35,7 +38,7 @@ export default class JitsiStreamBlurEffect {
      * @param {BodyPix} bpModel - BodyPix model.
      */
     constructor(bpModel: Object) {
-        this._bpModel = bpModel;
+        this._model = bpModel;
 
         // Bind event handler so it is only bound once for every instance.
         this._onMaskFrameTimer = this._onMaskFrameTimer.bind(this);
@@ -44,7 +47,6 @@ export default class JitsiStreamBlurEffect {
         this._outputCanvasElement = document.createElement('canvas');
         this._outputCanvasElement.getContext('2d');
         this._inputVideoElement = document.createElement('video');
-        this._inputVideoCanvasElement = document.createElement('canvas');
     }
 
     /**
@@ -61,61 +63,108 @@ export default class JitsiStreamBlurEffect {
     }
 
     /**
-     * Loop function to render the background mask.
+     * Represents the run post processing.
      *
-     * @private
      * @returns {void}
      */
-    async _renderMask() {
-        if (!this._maskInProgress) {
-            this._maskInProgress = true;
-            this._bpModel.segmentPerson(this._inputVideoElement, {
-                internalResolution: 'low', // resized to 0.5 times of the original resolution before inference
-                maxDetections: 1, // max. number of person poses to detect per image
-                segmentationThreshold: 0.7, // represents probability that a pixel belongs to a person
-                flipHorizontal: false,
-                scoreThreshold: 0.2
-            }).then(data => {
-                this._segmentationData = data;
-                this._maskInProgress = false;
-            });
-        }
-        const inputCanvasCtx = this._inputVideoCanvasElement.getContext('2d');
-
-        inputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);
+    runPostProcessing() {
+        this._outputCanvasCtx.globalCompositeOperation = 'copy';
 
-        const currentFrame = inputCanvasCtx.getImageData(
+        // Draw segmentation mask.
+        this._outputCanvasCtx.filter = `blur(${blurValue})`;
+        this._outputCanvasCtx.drawImage(
+            this._segmentationMaskCanvas,
+            0,
             0,
+            segmentationWidth,
+            segmentationHeight,
             0,
-            this._inputVideoCanvasElement.width,
-            this._inputVideoCanvasElement.height
+            0,
+            this._inputVideoElement.width,
+            this._inputVideoElement.height
         );
 
-        if (this._segmentationData) {
-            const blurData = new ImageData(currentFrame.data.slice(), currentFrame.width, currentFrame.height);
-
-            StackBlur.imageDataRGB(blurData, 0, 0, currentFrame.width, currentFrame.height, 12);
+        this._outputCanvasCtx.globalCompositeOperation = 'source-in';
+        this._outputCanvasCtx.filter = 'none';
+        this._outputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);
 
-            for (let x = 0; x < this._outputCanvasElement.width; x++) {
-                for (let y = 0; y < this._outputCanvasElement.height; y++) {
-                    const n = (y * this._outputCanvasElement.width) + x;
+        this._outputCanvasCtx.globalCompositeOperation = 'destination-over';
+        this._outputCanvasCtx.filter = `blur(${blurValue})`; // FIXME Does not work on Safari.
+        this._outputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);
+    }
 
-                    if (this._segmentationData.data[n] === 0) {
-                        currentFrame.data[n * 4] = blurData.data[n * 4];
-                        currentFrame.data[(n * 4) + 1] = blurData.data[(n * 4) + 1];
-                        currentFrame.data[(n * 4) + 2] = blurData.data[(n * 4) + 2];
-                        currentFrame.data[(n * 4) + 3] = blurData.data[(n * 4) + 3];
-                    }
-                }
-            }
+    /**
+     * Represents the run Tensorflow Interference.
+     *
+     * @returns {void}
+     */
+    runInference() {
+        this._model._runInference();
+        const outputMemoryOffset = this._model._getOutputMemoryOffset() / 4;
+
+        for (let i = 0; i < segmentationPixelCount; i++) {
+            const background = this._model.HEAPF32[outputMemoryOffset + (i * 2)];
+            const person = this._model.HEAPF32[outputMemoryOffset + (i * 2) + 1];
+            const shift = Math.max(background, person);
+            const backgroundExp = Math.exp(background - shift);
+            const personExp = Math.exp(person - shift);
+
+            // Sets only the alpha component of each pixel.
+            this._segmentationMask.data[(i * 4) + 3] = (255 * personExp) / (backgroundExp + personExp);
         }
-        this._outputCanvasElement.getContext('2d').putImageData(currentFrame, 0, 0);
+        this._segmentationMaskCtx.putImageData(this._segmentationMask, 0, 0);
+    }
+
+    /**
+     * Loop function to render the background mask.
+     *
+     * @private
+     * @returns {void}
+     */
+    _renderMask() {
+        this.resizeSource();
+        this.runInference();
+        this.runPostProcessing();
+
         this._maskFrameTimerWorker.postMessage({
             id: SET_TIMEOUT,
             timeMs: 1000 / 30
         });
     }
 
+    /**
+     * Represents the resize source process.
+     *
+     * @returns {void}
+     */
+    resizeSource() {
+        this._segmentationMaskCtx.drawImage(
+            this._inputVideoElement,
+            0,
+            0,
+            this._inputVideoElement.width,
+            this._inputVideoElement.height,
+            0,
+            0,
+            segmentationWidth,
+            segmentationHeight
+        );
+
+        const imageData = this._segmentationMaskCtx.getImageData(
+            0,
+            0,
+            segmentationWidth,
+            segmentationHeight
+        );
+        const inputMemoryOffset = this._model._getInputMemoryOffset() / 4;
+
+        for (let i = 0; i < segmentationPixelCount; i++) {
+            this._model.HEAPF32[inputMemoryOffset + (i * 3)] = imageData.data[i * 4] / 255;
+            this._model.HEAPF32[inputMemoryOffset + (i * 3) + 1] = imageData.data[(i * 4) + 1] / 255;
+            this._model.HEAPF32[inputMemoryOffset + (i * 3) + 2] = imageData.data[(i * 4) + 2] / 255;
+        }
+    }
+
     /**
      * Checks if the local track supports this effect.
      *
@@ -136,15 +185,18 @@ export default class JitsiStreamBlurEffect {
     startEffect(stream: MediaStream) {
         this._maskFrameTimerWorker = new Worker(timerWorkerScript, { name: 'Blur effect worker' });
         this._maskFrameTimerWorker.onmessage = this._onMaskFrameTimer;
-
         const firstVideoTrack = stream.getVideoTracks()[0];
         const { height, frameRate, width }
             = firstVideoTrack.getSettings ? firstVideoTrack.getSettings() : firstVideoTrack.getConstraints();
 
+        this._segmentationMask = new ImageData(segmentationWidth, segmentationHeight);
+        this._segmentationMaskCanvas = document.createElement('canvas');
+        this._segmentationMaskCanvas.width = segmentationWidth;
+        this._segmentationMaskCanvas.height = segmentationHeight;
+        this._segmentationMaskCtx = this._segmentationMaskCanvas.getContext('2d');
         this._outputCanvasElement.width = parseInt(width, 10);
         this._outputCanvasElement.height = parseInt(height, 10);
-        this._inputVideoCanvasElement.width = parseInt(width, 10);
-        this._inputVideoCanvasElement.height = parseInt(height, 10);
+        this._outputCanvasCtx = this._outputCanvasElement.getContext('2d');
         this._inputVideoElement.width = parseInt(width, 10);
         this._inputVideoElement.height = parseInt(height, 10);
         this._inputVideoElement.autoplay = true;

diff --git a/react/features/stream-effects/blur/index.js b/react/features/stream-effects/blur/index.js
@@ -1,8 +1,15 @@
 // @flow
 
-import * as bodyPix from '@tensorflow-models/body-pix';
+import * as wasmCheck from 'wasm-check';
 
 import JitsiStreamBlurEffect from './JitsiStreamBlurEffect';
+import createTFLiteModule from './vendor/tflite/tflite';
+import createTFLiteSIMDModule from './vendor/tflite/tflite-simd';
+
+const models = {
+    '96': '/libs/segm_lite_v681.tflite',
+    '144': '/libs/segm_full_v679.tflite'
+};
 
 /**
  * Creates a new instance of JitsiStreamBlurEffect. This loads the bodyPix model that is used to
@@ -14,15 +21,24 @@ export async function createBlurEffect() {
     if (!MediaStreamTrack.prototype.getSettings && !MediaStreamTrack.prototype.getConstraints) {
         throw new Error('JitsiStreamBlurEffect not supported!');
     }
+    let tflite;
+
+    if (wasmCheck.feature.simd) {
+        tflite = await createTFLiteSIMDModule();
+    } else {
+        tflite = await createTFLiteModule();
+    }
+
+    const modelBufferOffset = tflite._getModelBufferMemoryOffset();
+    const modelResponse = await fetch(
+        models['144']
+    );
+
+    const model = await modelResponse.arrayBuffer();
+
+    tflite.HEAPU8.set(new Uint8Array(model), modelBufferOffset);
 
-    // An output stride of 16 and a multiplier of 0.5 are used for improved
-    // performance on a larger range of CPUs.
-    const bpModel = await bodyPix.load({
-        architecture: 'MobileNetV1',
-        outputStride: 16,
-        multiplier: 0.50,
-        quantBytes: 2
-    });
+    tflite._loadModel(model.byteLength);
 
-    return new JitsiStreamBlurEffect(bpModel);
+    return new JitsiStreamBlurEffect(tflite);
 }
diff --git a/react/features/stream-effects/blur/vendor/README.md b/react/features/stream-effects/blur/vendor/README.md
@@ -0,0 +1,24 @@
+# Virtual Background on stream effects
+
+> Inspired from https://ai.googleblog.com/2020/10/background-features-in-google-meet.html and https://github.com/Volcomix/virtual-background.git
+
+#### Canvas 2D + CPU
+
+This rendering pipeline is pretty much the same as for BodyPix. It relies on Canvas compositing properties to blend rendering layers according to the segmentation mask.
+
+Interactions with TFLite inference tool are executed on CPU to convert from UInt8 to Float32 for the model input and to apply softmax on the model output.
+
+The framerate is higher and the quality looks better than BodyPix
+
+#### SIMD and non-SIMD
+
+How to test on SIMD:
+1. Go to chrome://flags/
+2. Search for SIMD flag
+3. Enable WebAssembly SIMD support(Enables support for the WebAssembly SIMD proposal).
+4. Reopen Google Chrome
+
+More details:
+- [WebAssembly](https://webassembly.org/)
+- [WebAssembly SIMD](https://github.com/WebAssembly/simd)
+- [TFLite](https://blog.tensorflow.org/2020/07/accelerating-tensorflow-lite-xnnpack-integration.html)
diff --git a/react/features/stream-effects/blur/vendor/models/segm_full_v679.tflite b/react/features/stream-effects/blur/vendor/models/segm_full_v679.tflite
diff --git a/react/features/stream-effects/blur/vendor/models/segm_lite_v681.tflite b/react/features/stream-effects/blur/vendor/models/segm_lite_v681.tflite