Skip to content

Commit

Permalink
feat(blur) replace BodyPix with TFLite
Browse files Browse the repository at this point in the history
Use the Google Meet model and SIMD optimized WASM.
  • Loading branch information
tudor-phd committed Feb 17, 2021
1 parent f71e8a9 commit 946339a
Show file tree
Hide file tree
Showing 14 changed files with 218 additions and 69 deletions.
2 changes: 2 additions & 0 deletions .eslintignore
Expand Up @@ -6,6 +6,8 @@ build/*
flow-typed/*
libs/*

react/features/stream-effects/blur/vendor/*

# ESLint will by default ignore its own configuration file. However, there does
# not seem to be a reason why we will want to risk being inconsistent with our
# remaining JavaScript source code.
Expand Down
16 changes: 14 additions & 2 deletions Makefile
Expand Up @@ -5,6 +5,8 @@ LIBJITSIMEET_DIR = node_modules/lib-jitsi-meet/
LIBFLAC_DIR = node_modules/libflacjs/dist/min/
OLM_DIR = node_modules/olm
RNNOISE_WASM_DIR = node_modules/rnnoise-wasm/dist/
TFLITE_WASM = react/features/stream-effects/blur/vendor/tflite
MEET_MODELS_DIR = react/features/stream-effects/blur/vendor/models/
NODE_SASS = ./node_modules/.bin/sass
NPM = npm
OUTPUT_DIR = .
Expand All @@ -26,7 +28,7 @@ clean:
rm -fr $(BUILD_DIR)

.NOTPARALLEL:
deploy: deploy-init deploy-appbundle deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac deploy-olm deploy-css deploy-local
deploy: deploy-init deploy-appbundle deploy-rnnoise-binary deploy-tflite deploy-meet-models deploy-lib-jitsi-meet deploy-libflac deploy-olm deploy-css deploy-local

deploy-init:
rm -fr $(DEPLOY_DIR)
Expand Down Expand Up @@ -82,6 +84,16 @@ deploy-rnnoise-binary:
$(RNNOISE_WASM_DIR)/rnnoise.wasm \
$(DEPLOY_DIR)

deploy-tflite:
cp \
$(TFLITE_WASM)/*.wasm \
$(DEPLOY_DIR)

deploy-meet-models:
cp \
$(MEET_MODELS_DIR)/*.tflite \
$(DEPLOY_DIR)

deploy-css:
$(NODE_SASS) $(STYLES_MAIN) $(STYLES_BUNDLE) && \
$(CLEANCSS) --skip-rebase $(STYLES_BUNDLE) > $(STYLES_DESTINATION) ; \
Expand All @@ -91,7 +103,7 @@ deploy-local:
([ ! -x deploy-local.sh ] || ./deploy-local.sh)

.NOTPARALLEL:
dev: deploy-init deploy-css deploy-rnnoise-binary deploy-lib-jitsi-meet deploy-libflac deploy-olm
dev: deploy-init deploy-css deploy-rnnoise-binary deploy-tflite deploy-meet-models deploy-lib-jitsi-meet deploy-libflac deploy-olm
$(WEBPACK_DEV_SERVER) --detect-circular-deps

source-package:
Expand Down
10 changes: 5 additions & 5 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Expand Up @@ -95,10 +95,10 @@
"redux-thunk": "2.2.0",
"rnnoise-wasm": "github:jitsi/rnnoise-wasm#566a16885897704d6e6d67a1d5ac5d39781db2af",
"rtcstats": "github:jitsi/rtcstats#v6.2.0",
"stackblur-canvas": "2.3.0",
"styled-components": "3.4.9",
"util": "0.12.1",
"uuid": "3.1.0",
"wasm-check": "2.0.1",
"windows-iana": "^3.1.0",
"xmldom": "0.1.27",
"zxcvbn": "4.4.2"
Expand Down
152 changes: 102 additions & 50 deletions react/features/stream-effects/blur/JitsiStreamBlurEffect.js
@@ -1,29 +1,32 @@
// @flow

import * as StackBlur from 'stackblur-canvas';

import {
CLEAR_TIMEOUT,
TIMEOUT_TICK,
SET_TIMEOUT,
timerWorkerScript
} from './TimerWorker';

const segmentationWidth = 256;
const segmentationHeight = 144;
const segmentationPixelCount = segmentationWidth * segmentationHeight;
const blurValue = '25px';

/**
* Represents a modified MediaStream that adds blur to video background.
* <tt>JitsiStreamBlurEffect</tt> does the processing of the original
* video stream.
*/
export default class JitsiStreamBlurEffect {
_bpModel: Object;
_model: Object;
_inputVideoElement: HTMLVideoElement;
_inputVideoCanvasElement: HTMLCanvasElement;
_onMaskFrameTimer: Function;
_maskFrameTimerWorker: Worker;
_maskInProgress: boolean;
_outputCanvasElement: HTMLCanvasElement;
_outputCanvasCtx: Object;
_segmentationMaskCtx: Object;
_segmentationMask: Object;
_segmentationMaskCanvas: Object;
_renderMask: Function;
_segmentationData: Object;
isEnabled: Function;
startEffect: Function;
stopEffect: Function;
Expand All @@ -35,7 +38,7 @@ export default class JitsiStreamBlurEffect {
* @param {BodyPix} bpModel - BodyPix model.
*/
constructor(bpModel: Object) {
this._bpModel = bpModel;
this._model = bpModel;

// Bind event handler so it is only bound once for every instance.
this._onMaskFrameTimer = this._onMaskFrameTimer.bind(this);
Expand All @@ -44,7 +47,6 @@ export default class JitsiStreamBlurEffect {
this._outputCanvasElement = document.createElement('canvas');
this._outputCanvasElement.getContext('2d');
this._inputVideoElement = document.createElement('video');
this._inputVideoCanvasElement = document.createElement('canvas');
}

/**
Expand All @@ -61,61 +63,108 @@ export default class JitsiStreamBlurEffect {
}

/**
* Loop function to render the background mask.
* Represents the run post processing.
*
* @private
* @returns {void}
*/
async _renderMask() {
if (!this._maskInProgress) {
this._maskInProgress = true;
this._bpModel.segmentPerson(this._inputVideoElement, {
internalResolution: 'low', // resized to 0.5 times of the original resolution before inference
maxDetections: 1, // max. number of person poses to detect per image
segmentationThreshold: 0.7, // represents probability that a pixel belongs to a person
flipHorizontal: false,
scoreThreshold: 0.2
}).then(data => {
this._segmentationData = data;
this._maskInProgress = false;
});
}
const inputCanvasCtx = this._inputVideoCanvasElement.getContext('2d');

inputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);
runPostProcessing() {
this._outputCanvasCtx.globalCompositeOperation = 'copy';

const currentFrame = inputCanvasCtx.getImageData(
// Draw segmentation mask.
this._outputCanvasCtx.filter = `blur(${blurValue})`;
this._outputCanvasCtx.drawImage(
this._segmentationMaskCanvas,
0,
0,
segmentationWidth,
segmentationHeight,
0,
this._inputVideoCanvasElement.width,
this._inputVideoCanvasElement.height
0,
this._inputVideoElement.width,
this._inputVideoElement.height
);

if (this._segmentationData) {
const blurData = new ImageData(currentFrame.data.slice(), currentFrame.width, currentFrame.height);

StackBlur.imageDataRGB(blurData, 0, 0, currentFrame.width, currentFrame.height, 12);
this._outputCanvasCtx.globalCompositeOperation = 'source-in';
this._outputCanvasCtx.filter = 'none';
this._outputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);

for (let x = 0; x < this._outputCanvasElement.width; x++) {
for (let y = 0; y < this._outputCanvasElement.height; y++) {
const n = (y * this._outputCanvasElement.width) + x;
this._outputCanvasCtx.globalCompositeOperation = 'destination-over';
this._outputCanvasCtx.filter = `blur(${blurValue})`; // FIXME Does not work on Safari.
this._outputCanvasCtx.drawImage(this._inputVideoElement, 0, 0);
}

if (this._segmentationData.data[n] === 0) {
currentFrame.data[n * 4] = blurData.data[n * 4];
currentFrame.data[(n * 4) + 1] = blurData.data[(n * 4) + 1];
currentFrame.data[(n * 4) + 2] = blurData.data[(n * 4) + 2];
currentFrame.data[(n * 4) + 3] = blurData.data[(n * 4) + 3];
}
}
}
/**
* Represents the run Tensorflow Interference.
*
* @returns {void}
*/
runInference() {
this._model._runInference();
const outputMemoryOffset = this._model._getOutputMemoryOffset() / 4;

for (let i = 0; i < segmentationPixelCount; i++) {
const background = this._model.HEAPF32[outputMemoryOffset + (i * 2)];
const person = this._model.HEAPF32[outputMemoryOffset + (i * 2) + 1];
const shift = Math.max(background, person);
const backgroundExp = Math.exp(background - shift);
const personExp = Math.exp(person - shift);

// Sets only the alpha component of each pixel.
this._segmentationMask.data[(i * 4) + 3] = (255 * personExp) / (backgroundExp + personExp);
}
this._outputCanvasElement.getContext('2d').putImageData(currentFrame, 0, 0);
this._segmentationMaskCtx.putImageData(this._segmentationMask, 0, 0);
}

/**
* Loop function to render the background mask.
*
* @private
* @returns {void}
*/
_renderMask() {
this.resizeSource();
this.runInference();
this.runPostProcessing();

this._maskFrameTimerWorker.postMessage({
id: SET_TIMEOUT,
timeMs: 1000 / 30
});
}

/**
* Represents the resize source process.
*
* @returns {void}
*/
resizeSource() {
this._segmentationMaskCtx.drawImage(
this._inputVideoElement,
0,
0,
this._inputVideoElement.width,
this._inputVideoElement.height,
0,
0,
segmentationWidth,
segmentationHeight
);

const imageData = this._segmentationMaskCtx.getImageData(
0,
0,
segmentationWidth,
segmentationHeight
);
const inputMemoryOffset = this._model._getInputMemoryOffset() / 4;

for (let i = 0; i < segmentationPixelCount; i++) {
this._model.HEAPF32[inputMemoryOffset + (i * 3)] = imageData.data[i * 4] / 255;
this._model.HEAPF32[inputMemoryOffset + (i * 3) + 1] = imageData.data[(i * 4) + 1] / 255;
this._model.HEAPF32[inputMemoryOffset + (i * 3) + 2] = imageData.data[(i * 4) + 2] / 255;
}
}

/**
* Checks if the local track supports this effect.
*
Expand All @@ -136,15 +185,18 @@ export default class JitsiStreamBlurEffect {
startEffect(stream: MediaStream) {
this._maskFrameTimerWorker = new Worker(timerWorkerScript, { name: 'Blur effect worker' });
this._maskFrameTimerWorker.onmessage = this._onMaskFrameTimer;

const firstVideoTrack = stream.getVideoTracks()[0];
const { height, frameRate, width }
= firstVideoTrack.getSettings ? firstVideoTrack.getSettings() : firstVideoTrack.getConstraints();

this._segmentationMask = new ImageData(segmentationWidth, segmentationHeight);
this._segmentationMaskCanvas = document.createElement('canvas');
this._segmentationMaskCanvas.width = segmentationWidth;
this._segmentationMaskCanvas.height = segmentationHeight;
this._segmentationMaskCtx = this._segmentationMaskCanvas.getContext('2d');
this._outputCanvasElement.width = parseInt(width, 10);
this._outputCanvasElement.height = parseInt(height, 10);
this._inputVideoCanvasElement.width = parseInt(width, 10);
this._inputVideoCanvasElement.height = parseInt(height, 10);
this._outputCanvasCtx = this._outputCanvasElement.getContext('2d');
this._inputVideoElement.width = parseInt(width, 10);
this._inputVideoElement.height = parseInt(height, 10);
this._inputVideoElement.autoplay = true;
Expand Down
36 changes: 26 additions & 10 deletions react/features/stream-effects/blur/index.js
@@ -1,8 +1,15 @@
// @flow

import * as bodyPix from '@tensorflow-models/body-pix';
import * as wasmCheck from 'wasm-check';

import JitsiStreamBlurEffect from './JitsiStreamBlurEffect';
import createTFLiteModule from './vendor/tflite/tflite';
import createTFLiteSIMDModule from './vendor/tflite/tflite-simd';

const models = {
'96': '/libs/segm_lite_v681.tflite',
'144': '/libs/segm_full_v679.tflite'
};

/**
* Creates a new instance of JitsiStreamBlurEffect. This loads the bodyPix model that is used to
Expand All @@ -14,15 +21,24 @@ export async function createBlurEffect() {
if (!MediaStreamTrack.prototype.getSettings && !MediaStreamTrack.prototype.getConstraints) {
throw new Error('JitsiStreamBlurEffect not supported!');
}
let tflite;

if (wasmCheck.feature.simd) {
tflite = await createTFLiteSIMDModule();
} else {
tflite = await createTFLiteModule();
}

const modelBufferOffset = tflite._getModelBufferMemoryOffset();
const modelResponse = await fetch(
models['144']
);

const model = await modelResponse.arrayBuffer();

tflite.HEAPU8.set(new Uint8Array(model), modelBufferOffset);

// An output stride of 16 and a multiplier of 0.5 are used for improved
// performance on a larger range of CPUs.
const bpModel = await bodyPix.load({
architecture: 'MobileNetV1',
outputStride: 16,
multiplier: 0.50,
quantBytes: 2
});
tflite._loadModel(model.byteLength);

return new JitsiStreamBlurEffect(bpModel);
return new JitsiStreamBlurEffect(tflite);
}
24 changes: 24 additions & 0 deletions react/features/stream-effects/blur/vendor/README.md
@@ -0,0 +1,24 @@
# Virtual Background on stream effects

> Inspired from https://ai.googleblog.com/2020/10/background-features-in-google-meet.html and https://github.com/Volcomix/virtual-background.git
#### Canvas 2D + CPU

This rendering pipeline is pretty much the same as for BodyPix. It relies on Canvas compositing properties to blend rendering layers according to the segmentation mask.

Interactions with TFLite inference tool are executed on CPU to convert from UInt8 to Float32 for the model input and to apply softmax on the model output.

The framerate is higher and the quality looks better than BodyPix

#### SIMD and non-SIMD

How to test on SIMD:
1. Go to chrome://flags/
2. Search for SIMD flag
3. Enable WebAssembly SIMD support(Enables support for the WebAssembly SIMD proposal).
4. Reopen Google Chrome

More details:
- [WebAssembly](https://webassembly.org/)
- [WebAssembly SIMD](https://github.com/WebAssembly/simd)
- [TFLite](https://blog.tensorflow.org/2020/07/accelerating-tensorflow-lite-xnnpack-integration.html)
Binary file not shown.
Binary file not shown.

0 comments on commit 946339a

Please sign in to comment.