huggingface · Honry · Feb 21, 2024 · Feb 28, 2024
diff --git a/examples/demo-site/src/index.html b/examples/demo-site/src/index.html
@@ -86,7 +86,7 @@ <h2 class="fw-bolder">Demo</h2>
           <label>Task: </label>
           <div class="col-12 mt-1">
             <select id="task" class="form-select">
-              <option value="translation" selected>
+              <option value="translation">
                 Translation w/ t5-small (78 MB)
               </option>
               <option value="text-generation">
@@ -119,7 +119,7 @@ <h2 class="fw-bolder">Demo</h2>
               <option value="image-to-text">
                 Image to text w/ vit-gpt2-image-captioning (246 MB)
               </option>
-              <option value="image-classification">
+              <option value="image-classification" selected>
                 Image classification w/ google/vit-base-patch16-224 (88 MB)
               </option>
               <option value="zero-shot-image-classification">

diff --git a/examples/demo-site/src/worker.js b/examples/demo-site/src/worker.js
@@ -45,13 +45,15 @@ self.addEventListener('message', async (event) => {
 class PipelineFactory {
     static task = null;
     static model = null;
+    static quantized = true;
 
     // NOTE: instance stores a promise that resolves to the pipeline
     static instance = null;
 
-    constructor(tokenizer, model) {
+    constructor(tokenizer, model, quantized) {
         this.tokenizer = tokenizer;
         this.model = model;
+        this.quantized = quantized;
     }
 
     /**
@@ -65,7 +67,8 @@ class PipelineFactory {
         }
         if (this.instance === null) {
             this.instance = pipeline(this.task, this.model, {
-                progress_callback: progressCallback
+                progress_callback: progressCallback,
+                quantized: this.quantized,
             });
         }
 
@@ -131,6 +134,7 @@ class ImageToTextPipelineFactory extends PipelineFactory {
 class ImageClassificationPipelineFactory extends PipelineFactory {
     static task = 'image-classification';
     static model = 'Xenova/vit-base-patch16-224';
+    static quantized = false;
 }
 
 

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -38,12 +38,12 @@
   },
   "homepage": "https://github.com/xenova/transformers.js#readme",
   "dependencies": {
-    "onnxruntime-web": "1.14.0",
+    "onnxruntime-web": "1.18.0-dev.20240130-9f68a27c7a",
     "sharp": "^0.32.0",
     "@huggingface/jinja": "^0.1.0"
   },
   "optionalDependencies": {
-    "onnxruntime-node": "1.14.0"
+    "onnxruntime-node": "1.18.0-dev.20240130-9f68a27c7a"
   },
   "devDependencies": {
     "@types/jest": "^29.5.1",

diff --git a/src/backends/onnx.js b/src/backends/onnx.js
@@ -19,12 +19,13 @@
 // NOTE: Import order matters here. We need to import `onnxruntime-node` before `onnxruntime-web`.
 // In either case, we select the default export if it exists, otherwise we use the named export.
 import * as ONNX_NODE from 'onnxruntime-node';
-import * as ONNX_WEB from 'onnxruntime-web';
+import * as ONNX_WEB from 'onnxruntime-web/experimental';
 
 /** @type {import('onnxruntime-web')} The ONNX runtime module. */
 export let ONNX;
 
 export const executionProviders = [
+    // 'webnn',
     // 'webgpu',
     'wasm'
 ];

diff --git a/src/env.js b/src/env.js
@@ -59,8 +59,9 @@ const localModelPath = RUNNING_LOCALLY
 // In practice, users should probably self-host the necessary .wasm files.
 onnx_env.wasm.wasmPaths = RUNNING_LOCALLY
     ? path.join(__dirname, '/dist/')
-    : `https://cdn.jsdelivr.net/npm/@xenova/transformers@${VERSION}/dist/`;
-
+    // : `https://cdn.jsdelivr.net/npm/@xenova/transformers@${VERSION}/dist/`;
+    // Copy ort-web wasm files to examples/demo-site/src/dist/
+    : location.origin + '/dist/';
 
 /**
  * Global variable used to control execution. This provides users a simple way to configure Transformers.js.

diff --git a/src/models.js b/src/models.js
@@ -123,9 +123,25 @@ async function constructSession(pretrained_model_name_or_path, fileName, options
     let buffer = await getModelFile(pretrained_model_name_or_path, modelFileName, true, options);
 
     try {
-        return await InferenceSession.create(buffer, {
-            executionProviders,
-        });
+        let sessionOptions = { executionProviders };
+        if (pretrained_model_name_or_path == 'Xenova/vit-base-patch16-224') {
+            // Hard code example to use webnn for Xenova/vit-base-patch16-224
+            sessionOptions = {
+                executionProviders: [{
+                    name: "webnn",
+                    deviceType: "gpu",
+                }],
+                // input name: pixel_values, tensor: float32[batch_size,num_channels,height,width]
+                // WebNN only supports static shape model, use freeDimensionOverrides option to fix the input shape.
+                freeDimensionOverrides: {
+                    batch_size: 1,
+                    num_channels: 3,
+                    height: 224,
+                    width: 224,
+                },
+            }
+        }
+        return await InferenceSession.create(buffer, sessionOptions);
     } catch (err) {
         // If the execution provided was only wasm, throw the error
         if (executionProviders.length === 1 && executionProviders[0] === 'wasm') {
@@ -203,9 +219,16 @@ function validateInputs(session, inputs) {
 async function sessionRun(session, inputs) {
     const checkedInputs = validateInputs(session, inputs);
     try {
-        // @ts-ignore
-        let output = await session.run(checkedInputs);
+        // pass the original ort tensor
+        const ortFeed = Object.fromEntries(Object.entries(checkedInputs).map(([k, v]) => [k, v.ort_tensor]));
+        let output = await session.run(ortFeed);
         output = replaceTensors(output);
+        for (const [name, t] of Object.entries(checkedInputs)) {
+            // if we use gpu buffers for kv_caches, we own them and need to dispose()
+            // if (name.startsWith('past_key_values')) {
+            //     t.dispose();
+            // };
+        }
         return output;
     } catch (e) {
         // This usually occurs when the inputs are of the wrong type.

diff --git a/src/utils/tensor.js b/src/utils/tensor.js
@@ -17,6 +17,7 @@ import {
 
 const DataTypeMap = Object.freeze({
     float32: Float32Array,
+    float16: Uint16Array,
     float64: Float64Array,
     string: Array, // string[]
     int8: Int8Array,
@@ -39,33 +40,48 @@ const ONNXTensor = ONNX.Tensor;
 
 export class Tensor {
     /** @type {number[]} Dimensions of the tensor. */
-    dims;
+    get dims() {
+        // @ts-ignore
+        return this.ort_tensor.dims;
+    }
+    set dims(value) {
+        // FIXME: ONNXTensor declares dims as readonly so one needs to use the constructor() if dims change.
+        // @ts-ignore
+        this.ort_tensor.dims = value;
+    }
 
     /** @type {DataType} Type of the tensor. */
-    type;
+    get type() {
+        return this.ort_tensor.type;
+    };
 
     /** @type {DataArray} The data stored in the tensor. */
-    data;
+    get data() {
+        return this.ort_tensor.data;
+    }
 
     /** @type {number} The number of elements in the tensor. */
-    size;
+    get size() {
+        return this.ort_tensor.size;
+    };
+
+    ort_tensor;
 
     /**
      * Create a new Tensor or copy an existing Tensor.
      * @param {[DataType, DataArray, number[]]|[import('onnxruntime-common').Tensor]} args
      */
     constructor(...args) {
         if (args[0] instanceof ONNXTensor) {
-            // Create shallow copy
-            Object.assign(this, args[0]);
-
+            this.ort_tensor = args[0];
         } else {
             // Create new tensor
-            Object.assign(this, new ONNXTensor(
+            const t = new ONNXTensor(
                 /** @type {DataType} */(args[0]),
                 /** @type {Exclude<import('./maths.js').AnyTypedArray, Uint8ClampedArray>} */(args[1]),
                 args[2]
-            ));
+            );
+            this.ort_tensor = t;
         }
 
         return new Proxy(this, {
@@ -89,6 +105,11 @@ export class Tensor {
         });
     }
 
+    dispose() {
+        this.ort_tensor.dispose();
+        // this.ort_tensor = undefined;
+    }
+
     /**
      * Returns an iterator object for iterating over the tensor data in row-major order.
      * If the tensor has more than one dimension, the iterator will yield subarrays.