# Installing WebLLM in Pyodide with Micropip

This notebook shows how to install and use WebLLM packages in Pyodide using micropip.

In [None]:
# JavaScript interop is built into Pyodide - no need to install packages
# Import the built-in JavaScript bridge
from js import console, document, window
from pyodide.ffi import create_proxy
import asyncio

print("JavaScript interop ready!")
print(f"Running in: {window.navigator.userAgent}")

In [None]:
# Load WebLLM from CDN using Pyodide's built-in JS access
from js import document, window, Promise
import asyncio

# Create script element to load WebLLM
script = document.createElement('script')
script.src = 'https://cdn.jsdelivr.net/npm/@mlc-ai/web-llm@0.2.46/lib/index.min.js'
script.type = 'module'

# Create a promise to wait for WebLLM to load
def on_load(event):
    console.log('WebLLM script loaded')
    # Set up WebLLM reference
    window.WebLLM = window.tvmjs.webllm if hasattr(window, 'tvmjs') else None

def on_error(event):
    console.error('Failed to load WebLLM script')

script.addEventListener('load', create_proxy(on_load))
script.addEventListener('error', create_proxy(on_error))

# Add script to document head
document.head.appendChild(script)

print("Loading WebLLM...")
# Give some time for the script to load
await asyncio.sleep(2)

if hasattr(window, 'tvmjs') and hasattr(window.tvmjs, 'webllm'):
    print("‚úÖ WebLLM loaded successfully!")
    window.WebLLM = window.tvmjs.webllm
else:
    print("‚ö†Ô∏è WebLLM might still be loading... try running the next cell")

In [None]:
# Create a Python wrapper for WebLLM in Pyodide
from js import window, console, Object
from pyodide.ffi import to_js, create_proxy
import json

class PyodideWebLLM:
    def __init__(self):
        self.engine = None
        self.ready = False
        self.webllm = None
    
    def check_webllm(self):
        """Check if WebLLM is available"""
        if hasattr(window, 'tvmjs') and hasattr(window.tvmjs, 'webllm'):
            self.webllm = window.tvmjs.webllm
            return True
        return False
    
    async def load_model(self, model_id="Llama-3.2-1B-Instruct-q4f16_1-MLC"):
        """Load a WebLLM model"""
        if not self.check_webllm():
            print("‚ùå WebLLM not loaded. Run the previous cell first.")
            return False
            
        try:
            print(f"üîÑ Loading model: {model_id}")
            print("‚è≥ This may take several minutes for the first download...")
            
            # Create the engine using WebLLM's CreateMLCEngine
            self.engine = await self.webllm.CreateMLCEngine(model_id)
            self.ready = True
            print("‚úÖ Model loaded successfully!")
            return True
        except Exception as e:
            print(f"‚ùå Error loading model: {e}")
            console.error(e)
            return False
    
    async def chat(self, message, max_tokens=512, temperature=0.7):
        """Chat with the model"""
        if not self.ready:
            return "‚ùå Model not loaded. Call load_model() first."
        
        try:
            # Prepare chat messages in the format WebLLM expects
            messages = [
                {"role": "user", "content": message}
            ]
            
            # Convert to JavaScript object
            js_messages = to_js(messages)
            
            # Create request object
            request = {
                "messages": js_messages,
                "max_tokens": max_tokens,
                "temperature": temperature
            }
            js_request = to_js(request)
            
            # Generate response
            response = await self.engine.chat.completions.create(js_request)
            
            # Extract the response content
            return response.choices[0].message.content
        except Exception as e:
            error_msg = f"‚ùå Chat error: {e}"
            console.error(e)
            return error_msg
    
    async def complete(self, prompt, max_tokens=512, temperature=0.7):
        """Complete text"""
        if not self.ready:
            return "‚ùå Model not loaded. Call load_model() first."
        
        try:
            request = {
                "prompt": prompt,
                "max_tokens": max_tokens,
                "temperature": temperature
            }
            js_request = to_js(request)
            
            response = await self.engine.completions.create(js_request)
            return response.choices[0].text
        except Exception as e:
            error_msg = f"‚ùå Completion error: {e}"
            console.error(e)
            return error_msg

# Create WebLLM instance
llm = PyodideWebLLM()
print("ü§ñ WebLLM wrapper created!")
print("üìù Next: Run the model loading cell")

In [None]:
# Load the model (this may take a few minutes for first download)
print("üöÄ Starting model load...")
print("üí° Tip: The first time will download ~1-2GB, please be patient!")

success = await llm.load_model()

if success:
    print("üéâ Ready to chat!")
    print("üí¨ You can now run the chat examples below")
else:
    print("‚ùå Model loading failed. Try reloading WebLLM in the previous cells.")

In [None]:
# Chat example
print("üí¨ Chat Example")
print("-" * 50)

question = "Explain what Pyodide is in simple terms"
print(f"üë§ User: {question}")
print("ü§ñ Assistant: ", end="")

response = await llm.chat(question)
print(response)
print("\n" + "="*50)

In [None]:
# Text completion example
print("‚úçÔ∏è Text Completion Example")
print("-" * 50)

prompt = "The advantages of running Python in the browser are:"
print(f"üìù Prompt: {prompt}")
print("ü§ñ Completion: ", end="")

response = await llm.complete(prompt)
print(response)
print("\n" + "="*50)

In [None]:
# Demo chat session with multiple examples
print("ü§ñ WebLLM Demo Chat Session")
print("="*50)

# Predefined test messages for demonstration
test_messages = [
    "Hello! What can you do?",
    "Explain machine learning in one sentence",
    "What makes WebLLM special?",
    "How does running AI in the browser help developers?"
]

for i, msg in enumerate(test_messages, 1):
    print(f"\nüí¨ Example {i}:")
    print("-" * 30)
    print(f"üë§ User: {msg}")
    print("ü§ñ Assistant: ", end="")
    
    response = await llm.chat(msg, max_tokens=256)
    print(response)

print("\nüéâ Demo complete!")
print("üí° Try modifying the test_messages list with your own questions!")

In [None]:
# Alternative: Using the WebLLM Extension

If the direct JavaScript approach above doesn't work, you can also use our custom WebLLM extension that should be automatically loaded in this JupyterLite environment.

```python
# Check if WebLLM extension is available
if hasattr(window, 'WebLLMHelper'):
    print("‚úÖ WebLLM Extension found!")
    
    # Use the extension's helper class
    extension_llm = window.WebLLMHelper.new()
    
    # Initialize and use
    await extension_llm.initialize()
    response = await extension_llm.chat("Hello from the extension!")
    print(f"Extension response: {response}")
else:
    print("‚ö†Ô∏è WebLLM Extension not found - using direct approach above")
```

Both approaches should work, but the extension provides a more integrated experience.