# WebLLM from JavaScript (JupyterLite)

This notebook uses the JavaScript kernel to talk to a WebLLM worker directly from the browser.

In [None]:
(async () => {
  if (!globalThis.webllmHelpers) {
    const workerSource = `
      import { WebWorkerMLCEngineHandler } from 'https://esm.run/@mlc-ai/web-llm';
      const handler = new WebWorkerMLCEngineHandler();
      self.onmessage = (msg) => handler.onmessage(msg);
    `;
    const module = await import('https://esm.run/@mlc-ai/web-llm');
    const workerURL = URL.createObjectURL(new Blob([workerSource], { type: 'application/javascript' }));
    globalThis.webllmHelpers = {
      module,
      workerURL,
      async createEngine(model, options = {}) {
        const worker = new Worker(workerURL, { type: 'module' });
        return await module.CreateWebWorkerMLCEngine(worker, model, options);
      },
      async chat(engine, request) {
        return await engine.chat.completions.create(request);
      },
      async stream(engine, request, callbacks = {}) {
        const stream = await engine.chat.completions.create({ ...request, stream: true });
        for await (const chunk of stream) {
          callbacks.onChunk?.(chunk);
        }
        return await engine.getMessage();
      }
    };
    console.log('WebLLM helpers initialised.');
  } else {
    console.log('WebLLM helpers already available.');
  }
})();


Create an engine that runs the chat model inside a Web Worker. Progress messages show download status.

In [None]:
const engine = await webllmHelpers.createEngine('SmolLM2-360M-Instruct-q0f16-MLC', {
  initProgressCallback(report) {
    console.log(report.text);
  },
});
engine;


Call the OpenAI-compatible chat API without streaming.

In [None]:
const response = await webllmHelpers.chat(engine, {
  messages: [
    { role: 'system', content: 'You are a succinct assistant living in a Web Worker.' },
    { role: 'user', content: 'Name three perks of browser-based LLMs.' }
  ],
  temperature: 0.3,
  max_tokens: 128
});
console.log(response.choices[0].message.content);
response;


Stream tokens as they are generated.

In [None]:
let streamed = '';
await webllmHelpers.stream(engine, {
  messages: [
    { role: 'system', content: 'Compose poetry with a playful tone.' },
    { role: 'user', content: 'Write a limerick about WebLLM notebooks.' }
  ],
  temperature: 0.6,
  max_tokens: 128,
  stream_options: { include_usage: true }
}, {
  onChunk(chunk) {
    const delta = chunk.choices?.[0]?.delta?.content ?? '';
    if (delta) {
      streamed += delta;
      console.log(delta);
    }
  }
});
streamed;
