Switch video upload to angular frontend instead of Apps Script

This removes the need to use Data URLs, which was slowing down the process significantly and limiting uploads to ~20MB only. Now the process is much faster and limited only by browser limits (~4GB) Change-Id: Ie4eae3f50a61b691ae0476a9129482c2ab88dc3d
google-marketing-solutions · Jun 10, 2024 · 5091eb5 · 5091eb5
1 parent ab61412
commit 5091eb5
Show file tree

Hide file tree

Showing 8 changed files with 75 additions and 42 deletions.
diff --git a/README.md b/README.md
@@ -32,6 +32,10 @@ limitations under the License.
 
 Update to the latest version by running `npm run update-app` after pulling the latest changes from the repository via `git pull --rebase --autostash`; you would need to redploy the *UI* for features marked as `frontend`, and *GCP components* for features marked as `backend`.
 
+* [June 2024]
+  * `frontend`: Enhanced file upload process to support >20MB files and up to browser-specific limits (~2-4GB).
+  * `frontend`: Improved variants generation prompt and enhanced its adherence to user instructions.
+  * `backend`: Improved Demand Gen text assets generation prompt. It is recommended to set the `CONFIG_MULTIMODAL_ASSET_GENERATION` environment variable to `'true'` for optimal asset quality.
 * [May 2024]: Launch! 🚀
 
 ## Overview
@@ -139,7 +143,7 @@ The diagram below shows how Vigenair's components interact and communicate with
     * First, background music and voice-over (if available) are separated via the [spleeter](https://github.com/deezer/spleeter) library, and the voice-over is transcribed.
     * Transcription is done via the [faster-whisper](https://github.com/SYSTRAN/faster-whisper) library, which uses OpenAI's Whisper model under the hood. By default, Vigenair uses the [small](https://github.com/openai/whisper#available-models-and-languages) multilingual model which provides the optimal quality-performance balance. If you find that it is not working well for your target language you may change the model used by the Cloud Function by setting the `CONFIG_WHISPER_MODEL` variable in the [update_config.sh](service/update_config.sh) script, which can be used to update the function's runtime variables. The transcription output is stored in an `input.vtt` file, along with a `language.txt` file containing the video's primary language, in the same folder as the input video.
     * Video analysis is done via the Cloud [Video AI API](https://cloud.google.com/video-intelligence), where visual shots, detected objects - with tracking, labels, people and faces, and recognised logos and any on-screen text within the input video are extracted. The output is stored in an `analysis.json` file in the same folder as the input video.
-    * Finally, *coherent* audio/video segments are created using the transcription and video intelligence outputs and then cut into individual video files and stored on GCS in an `av_segments_cuts` subfolder under the root video folder. These cuts are then and annotated via multimodal models on Vertex AI, which provides a description and a set of associated keywords / topics per segment. The fully annotated segments (including all information from the Video AI API) are then compiled into a `data.json` file that is stored in the same folder as the input video.
+    * Finally, *coherent* audio/video segments are created using the transcription and video intelligence outputs and then cut into individual video files and stored on GCS in an `av_segments_cuts` subfolder under the root video folder. These cuts are then annotated via multimodal models on Vertex AI, which provide a description and a set of associated keywords / topics per segment. The fully annotated segments (including all information from the Video AI API) are then compiled into a `data.json` file that is stored in the same folder as the input video.
 3. The UI continuously queries GCS for updates while showing a preview of the uploaded video. <center><img src='./img/preview-waiting.png' width="600px" alt="Vigenair UI: Video preview while waiting for analysis results" /></center>
     * Once the `input.vtt` is available, a transcription track is embedded onto the video preview.
     * Once the `analysis.json` is available, [object tracking](https://cloud.google.com/video-intelligence/docs/object-tracking) results are displayed as bounding boxes directly on the video preview. These can be toggled on/off via the *Object tracking* toggle - which is set to *on* by default.

diff --git a/ui/src/config.ts b/ui/src/config.ts
@@ -19,6 +19,7 @@ export const CONFIG = {
     bucket: '<gcs-bucket>',
     endpointBase: 'https://storage.googleapis.com/storage/v1',
     uploadEndpointBase: 'https://storage.googleapis.com/upload/storage/v1',
+    authenticatedEndpointBase: 'https://storage.mtls.cloud.google.com',
   },
   vertexAi: {
     endpoint: 'aiplatform.googleapis.com',

diff --git a/ui/src/index.ts b/ui/src/index.ts
@@ -19,8 +19,8 @@
  * Do not rename without ensuring all references are updated.
  */
 
-import { CONFIG } from './config';
 import { GenerationHelper } from './generation';
+import { ScriptUtil } from './script-util';
 import { StorageManager } from './storage';
 import {
   GenerationSettings,
@@ -54,15 +54,8 @@ function getFromGcs(filePath: string, mimeType: string): string | null {
 }
 
 // eslint-disable-next-line @typescript-eslint/no-unused-vars
-function uploadVideo(
-  dataUrl: string,
-  uploadedFileName: string,
-  analyseAudio: boolean
-) {
-  const folder = `${uploadedFileName}--${analyseAudio ? '' : 'n--'}${Date.now()}--${getEncodedUserId()}`;
-  StorageManager.uploadFile(dataUrl.split(',')[1], folder);
-  const videoFilePath = `https://storage.mtls.cloud.google.com/${CONFIG.cloudStorage.bucket}/${folder}/input.mp4`;
-  return [folder, videoFilePath];
+function getUserAuthToken() {
+  return ScriptUtil.getOAuthToken();
 }
 
 // eslint-disable-next-line @typescript-eslint/no-unused-vars

diff --git a/ui/src/script-util.ts b/ui/src/script-util.ts
@@ -48,7 +48,7 @@ export class ScriptUtil {
       method: method,
       muteHttpExceptions: true,
       headers: {
-        Authorization: `Bearer ${ScriptApp.getOAuthToken()}`,
+        Authorization: `Bearer ${ScriptUtil.getOAuthToken()}`,
       },
     };
     const fullParams = Object.assign({}, baseParams, params || {});
@@ -59,4 +59,8 @@ export class ScriptUtil {
       )
     );
   }
+
+  static getOAuthToken() {
+    return ScriptApp.getOAuthToken();
+  }
 }
diff --git a/ui/src/ui/src/app/api-calls/api-calls.mock.service.ts b/ui/src/ui/src/app/api-calls/api-calls.mock.service.ts
@@ -56,7 +56,11 @@ export class ApiCallsService implements ApiCalls {
   loadPreviousRun(folder: string): string[] {
     return ['assets', 'assets/input.mp4'];
   }
-  uploadVideo(file: Blob, analyseAudio: boolean): Observable<string[]> {
+  uploadVideo(
+    file: Blob,
+    analyseAudio: boolean,
+    encodedUserId: string
+  ): Observable<string[]> {
     return new Observable(subscriber => {
       setTimeout(() => {
         this.ngZone.run(() => {

diff --git a/ui/src/ui/src/app/api-calls/api-calls.service.interface.ts b/ui/src/ui/src/app/api-calls/api-calls.service.interface.ts
@@ -101,7 +101,11 @@ export interface RenderedVariant {
 }
 
 export interface ApiCalls {
-  uploadVideo(file: Blob, analyseAudio: boolean): Observable<string[]>;
+  uploadVideo(
+    file: Blob,
+    analyseAudio: boolean,
+    encodedUserId: string
+  ): Observable<string[]>;
   loadPreviousRun(folder: string): string[];
   deleteGcsFolder(folder: string): void;
   getFromGcs(

diff --git a/ui/src/ui/src/app/api-calls/api-calls.service.ts b/ui/src/ui/src/app/api-calls/api-calls.service.ts
@@ -14,8 +14,9 @@
  * limitations under the License.
  */
 
+import { HttpClient, HttpHeaders } from '@angular/common/http';
 import { Injectable, NgZone } from '@angular/core';
-import { Observable, retry } from 'rxjs';
+import { Observable, of, retry, switchMap } from 'rxjs';
 import { CONFIG } from '../../../../config';
 import {
   ApiCalls,
@@ -29,42 +30,64 @@ import {
   providedIn: 'root',
 })
 export class ApiCallsService implements ApiCalls {
-  constructor(private ngZone: NgZone) {}
-
-  blobToDataURL(blob: Blob) {
-    return new Promise(resolve => {
-      const a = new FileReader();
-      a.onload = function (e) {
-        resolve(e.target!.result);
-      };
-      a.readAsDataURL(blob);
-    });
-  }
+  constructor(
+    private ngZone: NgZone,
+    private httpClient: HttpClient
+  ) {}
 
   loadPreviousRun(folder: string): string[] {
     return [
       folder,
-      `https://storage.mtls.cloud.google.com/${CONFIG.cloudStorage.bucket}/${folder}/input.mp4`,
+      `${CONFIG.cloudStorage.authenticatedEndpointBase}/${CONFIG.cloudStorage.bucket}/${folder}/input.mp4`,
     ];
   }
 
-  uploadVideo(file: File, analyseAudio: boolean): Observable<string[]> {
-    return new Observable(subscriber => {
-      this.blobToDataURL(file).then(dataUrl => {
-        // eslint-disable-next-line @typescript-eslint/ban-ts-comment
-        // @ts-ignore
-        google.script.run
-          .withSuccessHandler((response: string[]) => {
-            this.ngZone.run(() => {
-              subscriber.next(response);
-              subscriber.complete();
-            });
-          })
-          .uploadVideo(dataUrl, file.name, analyseAudio);
-      });
+  getUserAuthToken(): Observable<string> {
+    return new Observable<string>(subscriber => {
+      // eslint-disable-next-line @typescript-eslint/ban-ts-comment
+      // @ts-ignore
+      google.script.run
+        .withSuccessHandler((userAuthToken: string) => {
+          this.ngZone.run(() => {
+            subscriber.next(userAuthToken);
+            subscriber.complete();
+          });
+        })
+        .getUserAuthToken();
     });
   }
 
+  uploadVideo(
+    file: File,
+    analyseAudio: boolean,
+    encodedUserId: string,
+    filename = 'input.mp4',
+    contentType = 'video/mp4'
+  ): Observable<string[]> {
+    const folder = `${file.name}--${analyseAudio ? '' : 'n--'}${Date.now()}--${encodedUserId}`;
+    const fullName = encodeURIComponent(`${folder}/${filename}`);
+    const url = `${CONFIG.cloudStorage.uploadEndpointBase}/b/${CONFIG.cloudStorage.bucket}/o?uploadType=media&name=${fullName}`;
+
+    return this.getUserAuthToken().pipe(
+      switchMap(userAuthToken =>
+        this.httpClient
+          .post(url, file, {
+            headers: new HttpHeaders({
+              'Authorization': `Bearer ${userAuthToken}`,
+              'Content-Type': contentType,
+            }),
+          })
+          .pipe(
+            switchMap(response => {
+              console.log('Upload complete!', response);
+              const videoFilePath = `${CONFIG.cloudStorage.authenticatedEndpointBase}/${CONFIG.cloudStorage.bucket}/${folder}/input.mp4`;
+              return of([folder, videoFilePath]);
+            })
+          )
+      )
+    );
+  }
+
   deleteGcsFolder(folder: string): void {
     // eslint-disable-next-line @typescript-eslint/ban-ts-comment
     // @ts-ignore

diff --git a/ui/src/ui/src/app/app.component.ts b/ui/src/ui/src/app/app.component.ts
@@ -382,7 +382,7 @@ export class AppComponent {
   uploadVideo() {
     this.loading = true;
     this.apiCallsService
-      .uploadVideo(this.selectedFile!, this.analyseAudio)
+      .uploadVideo(this.selectedFile!, this.analyseAudio, this.encodedUserId!)
       .subscribe(response => {
         this.processVideo(response[0], response[1]);
       });