In [None]:
#@title Setup
import os
import sys
import subprocess

# 1. Install MLC LLM Nightly (Force Reinstall to ensure clean state)
print("📦 Installing MLC LLM and dependencies...")
!python -m pip install --pre -U -f https://mlc.ai/wheels mlc-llm-nightly-cu128 mlc-ai-nightly-cu128
!apt-get install -y git-lfs


In [None]:
!git clone https://www.github.com/emscripten-core/emsdk.git
!cd /content/emsdk && ./emsdk install tot
!cd /content/emsdk && ./emsdk activate tot

In [None]:
import os

# 1. Install Rust (Standard script)
# We use -y to say "yes" to prompts automatically
!curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y

# 2. Add Rust to the system PATH for this session
# (Colab doesn't automatically load the path after install)
os.environ['PATH'] += ":/root/.cargo/bin"
!rustup target add wasm32-unknown-emscripten

# 2. Verify it is installed
print("✅ Target installed. Verifying...")
!rustup target list --installed

In [None]:
%%shell
set -e

# 1. Install Rust if missing
if [ ! -f "$HOME/.cargo/env" ]; then
    echo "🦀 Installing Rust..."
    curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
    source "$HOME/.cargo/env"
    rustup target add wasm32-unknown-emscripten
else
    source "$HOME/.cargo/env"
fi

# 2. Install Emscripten if missing
if [ ! -d "/content/emsdk" ]; then
    echo "🔧 Installing Emscripten..."
    git clone https://github.com/emscripten-core/emsdk.git /content/emsdk
    cd /content/emsdk
    ./emsdk install latest
    ./emsdk activate latest
fi
source /content/emsdk/emsdk_env.sh

# 3. Clone MLC LLM if it doesn't exist
if [ ! -d "/content/mlc-llm" ]; then
    echo "📂 Cloning MLC LLM..."
    git clone --recursive https://github.com/mlc-ai/mlc-llm.git /content/mlc-llm
fi

# 4. Build the Web Runtime
cd /content/mlc-llm

# Pre-requisite: Prepare Emscripten dependencies
./web/prep_emcc_deps.sh

# Create build directory
mkdir -p build/wasm
cd build/wasm

# Configure with emcmake
emcmake cmake ../.. \
    -DCMAKE_BUILD_TYPE=Release \
    -DUSE_WEBGPU=ON \
    -DUSE_WASM=ON \
    -DCMAKE_CXX_FLAGS="-O3"

# Compile
make -j$(nproc) && make install
echo "✅ Build Complete!"

# Command

python -m mlc_llm convert_weight /content/dist/models/vicuna-7b-v1.5/ --quantization q4f32_1 -o /content/vc7b


In [None]:
#@title gen config

import json
import os

# 1. Settings (Must match your previous steps)
# We point to where your SHARDS are located (/content/vc7b)
OUTPUT_DIR = "/content/vc7b"
QUANTIZATION = "q4f32_1"

# 2. Define the Standard Vicuna 1.5 Configuration
# This replicates exactly what the tool *should* have generated.
config_data = {
    "model_type": "llama",
    "quantization": QUANTIZATION,
    "model_config": {
        "hidden_size": 4096,
        "intermediate_size": 11008,
        "num_attention_heads": 32,
        "num_hidden_layers": 32,
        "rms_norm_eps": 1e-05,
        "vocab_size": 32000,
        "position_embedding_base": 10000.0,
        "context_window_size": 4096,
        "prefill_chunk_size": 4096,
        "tensor_parallel_shards": 1,
        "head_dim": 128,
        "dtype": "float32"
    },
    "vocab_size": 32000,
    "context_window_size": 4096,
    "sliding_window_size": -1,
    "prefill_chunk_size": 4096,
    "attention_sink_size": -1,
    "tensor_parallel_shards": 1,
    # This is the part the CLI failed to generate:
    "conv_template": {
        "name": "vicuna_v1.1",
        "system_template": "{system_message}",
        "system_message": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
        "roles": {
            "user": "USER",
            "assistant": "ASSISTANT"
        },
        "role_msg_sep": " ",
        "role_empty_sep": " ",
        "seps": [
            " ",
            "</s>"
        ],
        "stop_str": [
            "</s>"
        ],
        "stop_token_ids": [
            2
        ],
        "add_bos": True
    }
}

# 3. Write the file
output_file = os.path.join(OUTPUT_DIR, "mlc-chat-config.json")

# Ensure directory exists (it should, since you have shards there)
os.makedirs(OUTPUT_DIR, exist_ok=True)

with open(output_file, "w") as f:
    json.dump(config_data, f, indent=2)

print(f"✅ Successfully created config file at: {output_file}")
print("   You can now proceed to upload the '/content/vc7b' folder to Hugging Face.")

In [None]:
!git clone --recursive https://github.com/mlc-ai/mlc-llm.git

# New Section

cd mlc-llm

ln -sf mlc_wasm_runtime.bc wasm_runtime.bc

 ./web/prep_emcc_deps.sh

cd web/dist/wasm

ln -sf mlc_wasm_runtime.bc wasm_runtime.bc

export TVM_LIBRARY_PATH=$PWD/web/dist/wasm

export TVM_HOME=$PWD/3rdparty/tvm



In [None]:
#@title get config
!wget https://huggingface.co/ford442/vicuna-7b-q4f32-webllm/resolve/main/mlc-chat-config.json

Compile wasm command:

python -m mlc_llm compile /content/mlc-chat-config.json --device webgpu -o /content/vicuna_model.wasm

In [None]:
import os
from huggingface_hub import login, HfApi

# 1. PASTE NEW "WRITE" TOKEN HERE
HF_TOKEN = "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"

# 2. VERIFY THIS IS YOUR USERNAME
# If your username is NOT 'ford442', change it here.
USERNAME = "ford442"
MODEL_NAME = "vicuna-7b-q4f32-web"
REPO_ID = f"{USERNAME}/{MODEL_NAME}"

FOLDER_PATH = "/content/vc7b"

# --- Upload Process ---
print(f"🔑 Logging in...")
try:
    login(token=HF_TOKEN)
except Exception as e:
    print(f"❌ Login failed: {e}")

print(f"\n🚀 Creating repo: {REPO_ID}")
api = HfApi()

try:
    # Create repo (if it fails here, the token or username is still wrong)
    api.create_repo(repo_id=REPO_ID, repo_type="model", exist_ok=True)

    # Upload
    print(f"📤 Uploading files from {FOLDER_PATH}...")
    api.upload_folder(
        folder_path=FOLDER_PATH,
        repo_id=REPO_ID,
        repo_type="model"
    )
    print(f"\n✅ Success! Your model is live: https://huggingface.co/{REPO_ID}")

except Exception as e:
    print(f"\n❌ Error: {e}")
    print("Double check that:")
    print("1. Your token is a 'WRITE' token.")
    print(f"2. You are actually the user '{USERNAME}' on Hugging Face.")

In [None]:

import os
from huggingface_hub import login, HfApi

# ---------------------------------------------------------
# PASTE YOUR WRITE TOKEN BELOW (Keep the quotes!)
# ---------------------------------------------------------
HF_TOKEN = "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"

# ---------------------------------------------------------
# Settings
# ---------------------------------------------------------
REPO_ID = "ford442/vicuna-7b-webllm-q4f32"
FOLDER_PATH = "/content/vc7b"

# 1. Login directly using the token string
print(f"🔑 Logging in with provided token...")
login(token=HF_TOKEN)

# 2. Upload
print(f"\n🚀 Uploading {FOLDER_PATH} to {REPO_ID}...")
api = HfApi()

# Create the repo if it doesn't exist
api.create_repo(repo_id=REPO_ID, repo_type="model", exist_ok=True)

# Upload folder
api.upload_folder(
    folder_path=FOLDER_PATH,
    repo_id=REPO_ID,
    repo_type="model"
)

print(f"✅ Done! Your model is live at: https://huggingface.co/{REPO_ID}")

In [None]:
%%shell
set -e

# 1. Install MLC LLM Python Package
# We use the nightly build to match the runtime setup
if ! python -c "import mlc_llm" &> /dev/null; then
    echo "📦 Installing MLC LLM..."
    python -m pip install --pre -U -f https://mlc.ai/wheels mlc-llm-nightly-cu128 mlc-ai-nightly-cu128
    apt-get install -y git-lfs
fi

# 2. Setup Emscripten Environment (Required for WASM compilation)
source /content/emsdk/emsdk_env.sh

# 3. Download Model
MODEL_ID="lmsys/vicuna-7b-v1.5"
MODEL_DIR="dist/models/vicuna-7b-v1.5"

echo "⬇️ Downloading $MODEL_ID..."
git lfs install
mkdir -p dist/models
if [ ! -d "$MODEL_DIR" ]; then
    git clone https://huggingface.co/$MODEL_ID $MODEL_DIR
else
    echo "   Model directory exists, skipping clone."
fi

# 4. Define Output Paths
QUANTIZATION="q4f32_1"
OUTPUT_NAME="vicuna-7b-v1.5-$QUANTIZATION-webllm"
OUTPUT_DIR="dist/$OUTPUT_NAME"

# 5. Convert Weights & Generate Config
echo "⚙️ Converting weights to $QUANTIZATION..."
python -m mlc_llm convert_weight $MODEL_DIR/ \
    --quantization $QUANTIZATION \
    -o $OUTPUT_DIR

echo "📝 Generating config..."
python -m mlc_llm gen_config $MODEL_DIR/ \
    --quantization $QUANTIZATION \
    --conv-template vicuna_v1.1 \
    -o $OUTPUT_DIR

# 6. Compile Model to WASM
echo "🔨 Compiling model to WASM..."
python -m mlc_llm compile $OUTPUT_DIR/mlc-chat-config.json \
    --device webgpu \
    -o $OUTPUT_DIR/vicuna-7b-v1.5-$QUANTIZATION-webgpu.wasm

echo "✅ Conversion and Compilation Complete!"
echo "📂 Output contents of $OUTPUT_DIR:"
ls -lh $OUTPUT_DIR

In [None]:
# @title WebLLM Model Converter
import os
import sys
import subprocess

# 1. Install MLC LLM Nightly (Using cu128 to match Colab's latest environment)
print("📦 Checking/Installing MLC LLM and dependencies...")
# We force reinstall to ensure we have the correct version matching the runtime
!{sys.executable} -m pip install --pre --force-reinstall mlc-llm-nightly-cu128 mlc-ai-nightly-cu128 -f https://mlc.ai/wheels
!apt-get install -y git-lfs

# 2. Configuration
MODEL_ID = "lmsys/vicuna-7b-v1.5"
MODEL_NAME = MODEL_ID.split("/")[-1]
QUANTIZATION = "q4f32_1"

# Create directories
!mkdir -p dist/models

# 3. Clone the Original Model
print(f"⬇️ Downloading {MODEL_ID} from HuggingFace...")
!git lfs install
if not os.path.exists(f"dist/models/{MODEL_NAME}"):
    !git clone https://huggingface.co/{MODEL_ID} dist/models/{MODEL_NAME}
else:
    print("   Model directory exists. Skipping clone (ensure it's complete).")

# 4. Conversion & Config Generation
output_name = f"{MODEL_NAME}-{QUANTIZATION}-MLC"
output_path = f"dist/{output_name}"

print(f"\n⚙️ Converting to {QUANTIZATION}...")
print(f"   Input: dist/models/{MODEL_NAME}")
print(f"   Output: {output_path}")

# Run conversion
convert_cmd = f"{sys.executable} -m mlc_llm convert_weight dist/models/{MODEL_NAME}/ --quantization {QUANTIZATION} -o {output_path}"
if os.system(convert_cmd) != 0:
    raise Exception("Weight conversion failed! (Possible OOM - Try restarting runtime)")

# Run config generation
print("\n📝 Generating Configuration...")
config_cmd = f"{sys.executable} -m mlc_llm gen_config dist/models/{MODEL_NAME}/ --quantization {QUANTIZATION} --conv-template vicuna_v1.1 -o {output_path}"
if os.system(config_cmd) != 0:
    raise Exception("Config generation failed!")

print(f"\n✅ Success! Model prepared at: {output_path}")

In [None]:
# @title 🚀 Final Upload Script
import os
from huggingface_hub import login, HfApi

# ---------------------------------------------------------
# 1. PASTE YOUR NEW "WRITE" TOKEN HERE
# ---------------------------------------------------------
HF_TOKEN = "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"

# ---------------------------------------------------------
# 2. CONFIGURATION
# ---------------------------------------------------------
# Based on your previous logs, your username is ford442
USERNAME = "ford442"
MODEL_NAME = "vicuna-7b-q4f32-web"
REPO_ID = f"{USERNAME}/{MODEL_NAME}"
FOLDER_PATH = "/content/vc7b"

# ---------------------------------------------------------
# 3. UPLOAD
# ---------------------------------------------------------
print(f"🔑 Logging in...")
login(token=HF_TOKEN)

print(f"\n🚀 Deploying model to: https://huggingface.co/{REPO_ID}")
api = HfApi()

try:
    # Create repo if it doesn't exist
    api.create_repo(repo_id=REPO_ID, repo_type="model", exist_ok=True)

    # Upload all files
    api.upload_folder(
        folder_path=FOLDER_PATH,
        repo_id=REPO_ID,
        repo_type="model"
    )

    print(f"\n✅ SUCCESS! The model is live and ready for WebLLM.")
    print(f"🔗 Link: https://huggingface.co/{REPO_ID}")

except Exception as e:
    print(f"\n❌ Error: {e}")

In [None]:
!pip install paramiko

In [None]:
import os
import paramiko

# --- Configuration ---
LOCAL_DIR = "dist/vicuna-7b-v1.5-q4f32_1-MLC"  # Directory containing the shards
REMOTE_HOST = "1ink.us"
REMOTE_PORT = 22
USERNAME = "ford442"
PASSWORD = "GoogleBez12!"
REMOTE_DIR = "files/vicuna"  # Destination folder on the server (relative to home)

# --- Upload Script ---
print(f"🚀 Connecting to {REMOTE_HOST}...")
transport = paramiko.Transport((REMOTE_HOST, REMOTE_PORT))
transport.connect(username=USERNAME, password=PASSWORD)
sftp = paramiko.SFTPClient.from_transport(transport)

# helper to create remote dir recursively
def mkdir_p(sftp, remote_directory):
    if remote_directory == '/': return
    dirs = remote_directory.split("/")

    current_dir = ""
    # Handle absolute paths if provided, otherwise treat as relative
    if remote_directory.startswith("/"):
        current_dir = "/"
        if dirs and dirs[0] == "":
            dirs.pop(0)

    for dir_part in dirs:
        if not dir_part: continue

        if current_dir == "" or current_dir == "/":
            current_dir += dir_part
        else:
            current_dir += f"/{dir_part}"

        try:
            sftp.stat(current_dir)
        except IOError:
            print(f"📁 Creating remote directory: {current_dir}")
            try:
                sftp.mkdir(current_dir)
            except IOError as e:
                print(f"   ⚠️ Could not create {current_dir}: {e}")
                raise

# Ensure remote directory exists
try:
    mkdir_p(sftp, REMOTE_DIR)
except Exception as e:
    print(f"❌ Error setting up directories: {e}")
    # We don't exit here immediately to allow debugging, but usually this is fatal
    # sftp.close(); transport.close(); raise

# Upload files
if os.path.exists(LOCAL_DIR):
    print(f"📤 Uploading files from {LOCAL_DIR} to {REMOTE_DIR}...")
    files = os.listdir(LOCAL_DIR)
    for filename in files:
        local_path = os.path.join(LOCAL_DIR, filename)
        remote_path = f"{REMOTE_DIR}/{filename}"

        if os.path.isfile(local_path):
            print(f"   - Uploading {filename}...")
            try:
                sftp.put(local_path, remote_path)
            except Exception as e:
                print(f"     ❌ Failed to upload {filename}: {e}")

    print("✅ Upload process finished!")
else:
    print(f"❌ Local directory {LOCAL_DIR} not found. Did the previous step finish?")

sftp.close()
transport.close()

In [None]:
import shutil

# 1. Create the alias locally
source_cache = os.path.join(LOCAL_DIR, "tensor-cache.json")
dest_cache = os.path.join(LOCAL_DIR, "ndarray-cache.json")

if os.path.exists(source_cache):
    shutil.copy(source_cache, dest_cache)
    print(f"✅ Created ndarray-cache.json from tensor-cache.json")
else:
    print(f"⚠️ Could not find {source_cache}")

# 2. Upload only the new file
print(f"🚀 Connecting to {REMOTE_HOST} to upload the alias...")
transport = paramiko.Transport((REMOTE_HOST, REMOTE_PORT))
transport.connect(username=USERNAME, password=PASSWORD)
sftp = paramiko.SFTPClient.from_transport(transport)

local_file = dest_cache
remote_file = f"{REMOTE_DIR}/ndarray-cache.json"

if os.path.exists(local_file):
    print(f"📤 Uploading ndarray-cache.json...")
    try:
        #sftp.put(local_file, remote_file)
        print("✅ Upload success!")
    except Exception as e:
        print(f"❌ Upload failed: {e}")

sftp.close()
transport.close()

In [None]:
# @title
%%shell
#sudo update-alternatives --set python3 /usr/bin/python3.13
pip install paramiko

In [None]:
local_path = "/content/vc7b.zip" #@param ["sh4.1ijs", "sh5.1ijs", "g3007.wasm", "g3008.wasm", "g3009.wasm", "sh6.1ijs", "g3010.wasm"] {allow-input: true}
loc_file = "vc7b.zip" #@param ["sh4.1ijs", "sh5.1ijs", "g3007.wasm", "g3008.wasm", "g3009.wasm", "sh6.1ijs", "g3010.wasm"] {allow-input: true}
dest_path = "1ink.us/files/" #@param ["sh4.1ijs", "sh5.1ijs", "g3007.wasm", "g3008.wasm", "g3009.wasm", "sh6.1ijs", "g3010.wasm"] {allow-input: true}
import os
import urllib
import requests as reqs
import re
import paramiko
host = "1ink.us"
username  = "ford442"
password  = "GoogleBez12!"
port = 22
file_name=loc_file
transport = paramiko.Transport((host, port))
destination_path=dest_path+file_name
transport.connect(username = username, password = password)
sftp = paramiko.SFTPClient.from_transport(transport)
sftp.put(local_path, destination_path)
sftp.close()
transport.close()

# Task
Compile the Vicuna-7b-v1.5 model into a WebAssembly (WASM) binary using MLC LLM, preparing it for deployment with WebLLM, and ensure the resulting WASM file is generated and its path is provided. This includes installing necessary build dependencies (git-lfs, cmake, Rust, Emscripten), cloning the MLC LLM repository, building the TVM Web Runtime, and then compiling the model.

## Install Build Dependencies

### Subtask:
Install essential build tools including git-lfs, cmake, Rust, and Emscripten. Clone the MLC LLM repository recursively and configure the Emscripten environment variables.


**Reasoning**:
The first instruction is to install MLC LLM nightly package and git-lfs. Cell `pBK2wCzN7tI8` in the provided notebook handles this installation.



In [None]:
#@title Setup
import os
import sys
import subprocess

# 1. Install MLC LLM Nightly (Force Reinstall to ensure clean state)
print("📦 Installing MLC LLM and dependencies...")
!python -m pip install --pre -U -f https://mlc.ai/wheels mlc-llm-nightly-cu128 mlc-ai-nightly-cu128
!apt-get install -y git-lfs

# Testing Existing HuggingFace Model

This section demonstrates how to test the pre-converted `ford442/vicuna-7b-q4f32-webllm` model directly without reconversion.

In [None]:
#@title 📥 Download Pre-converted Model from HuggingFace
import os
import subprocess

# Model configuration
HF_MODEL_ID = "ford442/vicuna-7b-q4f32-webllm"
MODEL_DIR = "/content/ford442-vicuna-7b-q4f32"

print(f"📦 Downloading model: {HF_MODEL_ID}")
print(f"📂 Destination: {MODEL_DIR}\n")

# Install huggingface_hub if not present
try:
    from huggingface_hub import snapshot_download
except ImportError:
    print("Installing huggingface_hub...")
    !pip install -q huggingface_hub
    from huggingface_hub import snapshot_download

# Download the model
try:
    model_path = snapshot_download(
        repo_id=HF_MODEL_ID,
        local_dir=MODEL_DIR,
        local_dir_use_symlinks=False
    )
    print(f"\n✅ Model downloaded successfully to: {model_path}")
    
    # List downloaded files
    print("\n📋 Downloaded files:")
    !ls -lh {MODEL_DIR}
    
except Exception as e:
    print(f"❌ Error downloading model: {e}")
    raise

In [None]:
#@title 💬 CLI Chat Test
import os
import sys

# Ensure MLC LLM is installed
try:
    import mlc_llm
except ImportError:
    print("Installing MLC LLM...")
    !python -m pip install --pre -U -f https://mlc.ai/wheels mlc-llm-nightly-cu128 mlc-ai-nightly-cu128
    import mlc_llm

MODEL_DIR = "/content/ford442-vicuna-7b-q4f32"

print("🤖 Starting CLI chat with Vicuna-7B...")
print("   Model path:", MODEL_DIR)
print("   Type your message and press Enter.\n")

# Simple CLI chat test
from mlc_llm import MLCEngine

# Create model path
model_path = MODEL_DIR

# Initialize engine
print("⚙️ Initializing MLC Engine...\n")
try:
    engine = MLCEngine(model=model_path)
    
    # Test prompt
    test_message = "Hello! Tell me a short joke about AI."
    print(f"👤 User: {test_message}\n")
    print("🤖 Assistant: ", end="", flush=True)
    
    # Generate response
    response = ""
    for chunk in engine.chat.completions.create(
        messages=[{"role": "user", "content": test_message}],
        model=model_path,
        stream=True,
        max_tokens=256
    ):
        if chunk.choices:
            delta = chunk.choices[0].delta.content
            if delta:
                print(delta, end="", flush=True)
                response += delta
    
    print("\n\n✅ CLI chat test completed successfully!")
    
except Exception as e:
    print(f"\n❌ Error during CLI chat: {e}")
    import traceback
    traceback.print_exc()

In [None]:
#@title 🌐 Web Chat Demo (WebLLM)
import os
from IPython.display import HTML, display
import json

MODEL_DIR = "/content/ford442-vicuna-7b-q4f32"

print("🌐 Setting up WebLLM demo...\n")

# Create a simple HTML demo that uses WebLLM
html_content = '''
<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <title>WebLLM Chat Demo</title>
    <style>
        body { 
            font-family: Arial, sans-serif; 
            max-width: 800px; 
            margin: 20px auto; 
            padding: 20px;
            background: #f5f5f5;
        }
        #chat-box {
            background: white;
            border: 1px solid #ddd;
            border-radius: 8px;
            padding: 20px;
            height: 400px;
            overflow-y: auto;
            margin-bottom: 20px;
        }
        .message {
            margin: 10px 0;
            padding: 10px;
            border-radius: 5px;
        }
        .user-message { background: #e3f2fd; text-align: right; }
        .assistant-message { background: #f1f8e9; }
        .system-message { background: #fff3e0; font-style: italic; }
        #input-area {
            display: flex;
            gap: 10px;
        }
        #user-input {
            flex: 1;
            padding: 10px;
            border: 1px solid #ddd;
            border-radius: 5px;
            font-size: 14px;
        }
        button {
            padding: 10px 20px;
            background: #2196f3;
            color: white;
            border: none;
            border-radius: 5px;
            cursor: pointer;
            font-size: 14px;
        }
        button:hover { background: #1976d2; }
        button:disabled { background: #ccc; cursor: not-allowed; }
        #status { 
            margin: 10px 0; 
            padding: 10px;
            background: #fff;
            border-radius: 5px;
            border: 1px solid #ddd;
        }
    </style>
</head>
<body>
    <h1>🤖 WebLLM Chat Demo - Vicuna 7B</h1>
    <div id="status">⏳ Initializing WebGPU and loading model...</div>
    <div id="chat-box"></div>
    <div id="input-area">
        <input type="text" id="user-input" placeholder="Type your message..." disabled>
        <button id="send-btn" onclick="sendMessage()" disabled>Send</button>
    </div>

    <script type="module">
        import * as webllm from "https://esm.run/@mlc-ai/web-llm";

        let engine = null;
        const chatBox = document.getElementById('chat-box');
        const statusDiv = document.getElementById('status');
        const userInput = document.getElementById('user-input');
        const sendBtn = document.getElementById('send-btn');

        // Configure the model
        const modelConfig = {
            model_id: "ford442/vicuna-7b-q4f32-webllm",
            model: "https://huggingface.co/ford442/vicuna-7b-q4f32-webllm/resolve/main/",
            model_lib: "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/web-llm-models/v0_2_80/Llama-2-7b-chat-hf-q4f32_1-ctx4k_cs1k-webgpu.wasm",
            vram_required_MB: 4096,
            low_resource_required: false,
        };

        function addMessage(role, content) {
            const div = document.createElement('div');
            div.className = `message ${role}-message`;
            div.textContent = content;
            chatBox.appendChild(div);
            chatBox.scrollTop = chatBox.scrollHeight;
        }

        function updateStatus(message, isError = false) {
            statusDiv.textContent = message;
            statusDiv.style.background = isError ? '#ffebee' : '#fff';
        }

        async function initEngine() {
            try {
                updateStatus('🔧 Checking WebGPU support...');
                
                if (!navigator.gpu) {
                    throw new Error('WebGPU not supported in this browser');
                }

                updateStatus('📦 Initializing WebLLM engine...');
                engine = await webllm.CreateMLCEngine(
                    modelConfig.model_id,
                    { 
                        initProgressCallback: (progress) => {
                            updateStatus(`⏳ Loading: ${progress.text}`);
                        },
                        appConfig: {
                            model_list: [modelConfig]
                        }
                    }
                );

                updateStatus('✅ Model loaded! Ready to chat.');
                userInput.disabled = false;
                sendBtn.disabled = false;
                userInput.focus();
                
                addMessage('system', 'WebLLM initialized with Vicuna-7B model. Start chatting!');
            } catch (error) {
                updateStatus(`❌ Error: ${error.message}`, true);
                console.error('Initialization error:', error);
            }
        }

        window.sendMessage = async function() {
            const message = userInput.value.trim();
            if (!message || !engine) return;

            addMessage('user', message);
            userInput.value = '';
            userInput.disabled = true;
            sendBtn.disabled = true;
            updateStatus('🤖 Generating response...');

            try {
                const messages = [{ role: 'user', content: message }];
                let response = '';

                const chunks = await engine.chat.completions.create({
                    messages: messages,
                    temperature: 0.7,
                    max_tokens: 256,
                    stream: true
                });

                const assistantDiv = document.createElement('div');
                assistantDiv.className = 'message assistant-message';
                chatBox.appendChild(assistantDiv);

                for await (const chunk of chunks) {
                    const delta = chunk.choices[0]?.delta?.content;
                    if (delta) {
                        response += delta;
                        assistantDiv.textContent = response;
                        chatBox.scrollTop = chatBox.scrollHeight;
                    }
                }

                updateStatus('✅ Model loaded! Ready to chat.');
            } catch (error) {
                updateStatus(`❌ Error: ${error.message}`, true);
                addMessage('system', `Error: ${error.message}`);
            } finally {
                userInput.disabled = false;
                sendBtn.disabled = false;
                userInput.focus();
            }
        };

        // Handle Enter key
        userInput.addEventListener('keypress', (e) => {
            if (e.key === 'Enter' && !sendBtn.disabled) {
                sendMessage();
            }
        });

        // Initialize on load
        initEngine();
    </script>
</body>
</html>
'''

# Save HTML file
html_file = '/content/webllm_demo.html'
with open(html_file, 'w') as f:
    f.write(html_content)

print(f"✅ WebLLM demo HTML created: {html_file}\n")
print("📝 Note: In Colab, this demo requires:")
print("   1. WebGPU support (Chrome 113+ with flags enabled)")
print("   2. Proper CORS headers (use with local server or colab.research.google.com)")
print("\n🚀 To test locally, you can:")
print("   1. Download the HTML file")
print("   2. Open it in a WebGPU-enabled browser")
print("   3. Or serve it with: python -m http.server 8000\n")

# Display the HTML (note: full WebGPU may not work in Colab iframe)
print("📺 Displaying demo (WebGPU may be limited in Colab):")
display(HTML(html_content))

In [None]:
#@title 🎭 Simplified Joksters Routine Test
import os
import json
from IPython.display import HTML, display, Javascript
import subprocess
import time

MODEL_DIR = "/content/ford442-vicuna-7b-q4f32"

print("🎭 Setting up Joksters app test...\n")

# Clone the joksters repository if not present
REPO_DIR = "/content/the_jokesters"
if not os.path.exists(REPO_DIR):
    print("📥 Cloning the_jokesters repository...")
    !git clone https://github.com/ford442/the_jokesters.git {REPO_DIR}
else:
    print("✅ Repository already cloned.")

# Install Node.js dependencies
print("\n📦 Installing Node.js dependencies...")
os.chdir(REPO_DIR)
!npm install

# Create a test HTML that loads the Joksters app with the HF model
print("\n🔧 Creating test configuration...")

test_html = '''
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Joksters Test - Vicuna 7B</title>
    <style>
        body {
            margin: 0;
            padding: 20px;
            font-family: Arial, sans-serif;
            background: #1a1a1a;
            color: white;
        }
        #container {
            max-width: 1200px;
            margin: 0 auto;
        }
        h1 { color: #4fc3f7; }
        #status {
            background: #263238;
            padding: 15px;
            border-radius: 8px;
            margin: 20px 0;
            border-left: 4px solid #4fc3f7;
        }
        #chat-container {
            background: #263238;
            padding: 20px;
            border-radius: 8px;
            margin: 20px 0;
            min-height: 400px;
        }
        .message {
            margin: 10px 0;
            padding: 12px;
            border-radius: 6px;
            line-height: 1.5;
        }
        .agent-message {
            background: #37474f;
            border-left: 3px solid #4fc3f7;
        }
        .user-message {
            background: #1e3a5f;
            border-left: 3px solid #64b5f6;
        }
        .agent-name {
            font-weight: bold;
            color: #4fc3f7;
            margin-bottom: 5px;
        }
        button {
            background: #4fc3f7;
            color: #1a1a1a;
            border: none;
            padding: 12px 24px;
            border-radius: 6px;
            cursor: pointer;
            font-size: 16px;
            font-weight: bold;
            margin: 10px 5px;
        }
        button:hover { background: #81d4fa; }
        button:disabled {
            background: #555;
            cursor: not-allowed;
        }
        input {
            width: calc(100% - 24px);
            padding: 12px;
            background: #37474f;
            border: 1px solid #4fc3f7;
            border-radius: 6px;
            color: white;
            font-size: 14px;
            margin: 10px 0;
        }
    </style>
</head>
<body>
    <div id="container">
        <h1>🎭 The Joksters - Test Mode</h1>
        <p>Testing with Vicuna-7B model from HuggingFace (ford442/vicuna-7b-q4f32-webllm)</p>
        
        <div id="status">⏳ Initializing WebGPU and loading model...</div>
        
        <div id="controls" style="display:none;">
            <input type="text" id="user-input" placeholder="Type your message to the agents..." />
            <button onclick="sendMessage()">Send Message</button>
            <button onclick="startImprov()">Start Improv Scene</button>
        </div>
        
        <div id="chat-container"></div>
    </div>

    <script type="module">
        import * as webllm from "https://esm.run/@mlc-ai/web-llm";

        let engine = null;
        let currentAgent = 0;
        const agents = [
            { name: 'The Comedian', color: '#ff5252', temp: 0.9 },
            { name: 'The Philosopher', color: '#26c6da', temp: 0.7 },
            { name: 'The Scientist', color: '#42a5f5', temp: 0.3 }
        ];

        const statusDiv = document.getElementById('status');
        const chatContainer = document.getElementById('chat-container');
        const controlsDiv = document.getElementById('controls');
        const userInput = document.getElementById('user-input');

        function updateStatus(message) {
            statusDiv.textContent = message;
        }

        function addMessage(agentName, content, color) {
            const div = document.createElement('div');
            div.className = 'message agent-message';
            div.innerHTML = `
                <div class="agent-name" style="color: ${color}">${agentName}</div>
                <div>${content}</div>
            `;
            chatContainer.appendChild(div);
            chatContainer.scrollTop = chatContainer.scrollHeight;
        }

        async function initEngine() {
            try {
                updateStatus('🔧 Checking WebGPU support...');
                
                if (!navigator.gpu) {
                    throw new Error('WebGPU not supported. Please use Chrome 113+ with WebGPU enabled.');
                }

                updateStatus('📦 Loading Vicuna-7B model from HuggingFace...');
                
                const modelConfig = {
                    model_id: "ford442/vicuna-7b-q4f32-webllm",
                    model: "https://huggingface.co/ford442/vicuna-7b-q4f32-webllm/resolve/main/",
                    model_lib: "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/web-llm-models/v0_2_80/Llama-2-7b-chat-hf-q4f32_1-ctx4k_cs1k-webgpu.wasm",
                    vram_required_MB: 4096,
                    low_resource_required: false,
                };

                engine = await webllm.CreateMLCEngine(
                    modelConfig.model_id,
                    { 
                        initProgressCallback: (progress) => {
                            updateStatus(`⏳ Loading: ${progress.text}`);
                        },
                        appConfig: {
                            model_list: [modelConfig]
                        }
                    }
                );

                updateStatus('✅ Model loaded! The Joksters are ready to perform.');
                controlsDiv.style.display = 'block';
                
                addMessage('System', 'Welcome! Three AI agents are ready: The Comedian, The Philosopher, and The Scientist. Send a message or start an improv scene!', '#4fc3f7');
            } catch (error) {
                updateStatus(`❌ Error: ${error.message}`);
                console.error('Initialization error:', error);
            }
        }

        window.sendMessage = async function() {
            const message = userInput.value.trim();
            if (!message || !engine) return;

            const agent = agents[currentAgent];
            userInput.value = '';
            
            addMessage('You', message, '#64b5f6');
            updateStatus(`🤖 ${agent.name} is responding...`);

            try {
                const systemPrompt = `You are ${agent.name}. ${agent.name === 'The Comedian' ? 
                    'You are witty and humorous.' : agent.name === 'The Philosopher' ?
                    'You are thoughtful and profound.' : 'You are logical and precise.'} Keep responses brief.`;
                
                const messages = [
                    { role: 'system', content: systemPrompt },
                    { role: 'user', content: message }
                ];

                let response = '';
                const chunks = await engine.chat.completions.create({
                    messages: messages,
                    temperature: agent.temp,
                    max_tokens: 150,
                    stream: true
                });

                for await (const chunk of chunks) {
                    const delta = chunk.choices[0]?.delta?.content;
                    if (delta) response += delta;
                }

                addMessage(agent.name, response, agent.color);
                currentAgent = (currentAgent + 1) % agents.length;
                updateStatus('✅ Ready for next message');
            } catch (error) {
                updateStatus(`❌ Error: ${error.message}`);
                addMessage('System', `Error: ${error.message}`, '#ff5252');
            }
        };

        window.startImprov = async function() {
            if (!engine) return;
            
            updateStatus('🎭 Starting improv scene...');
            addMessage('System', 'Starting an improv comedy scene: "At the Coffee Shop"', '#4fc3f7');
            
            const scene = "Three friends meet at a coffee shop to discuss their latest adventures with AI.";
            
            for (let i = 0; i < 3; i++) {
                const agent = agents[i];
                updateStatus(`🎭 ${agent.name} is improvising...`);
                
                try {
                    const messages = [
                        { role: 'system', content: `You are ${agent.name} in an improv scene: "${scene}". Stay in character and contribute to the scene with a brief response.` },
                        { role: 'user', content: 'Continue the scene.' }
                    ];

                    let response = '';
                    const chunks = await engine.chat.completions.create({
                        messages: messages,
                        temperature: agent.temp,
                        max_tokens: 100,
                        stream: true
                    });

                    for await (const chunk of chunks) {
                        const delta = chunk.choices[0]?.delta?.content;
                        if (delta) response += delta;
                    }

                    addMessage(agent.name, response, agent.color);
                } catch (error) {
                    addMessage('System', `Error with ${agent.name}: ${error.message}`, '#ff5252');
                }
            }
            
            updateStatus('✅ Improv scene complete!');
        };

        userInput.addEventListener('keypress', (e) => {
            if (e.key === 'Enter') sendMessage();
        });

        initEngine();
    </script>
</body>
</html>
'''

# Save the test HTML
test_file = '/content/joksters_test.html'
with open(test_file, 'w') as f:
    f.write(test_html)

print(f"\n✅ Joksters test HTML created: {test_file}")
print("\n📝 This simplified test demonstrates:")
print("   • Loading the Vicuna-7B model from HuggingFace")
print("   • Three AI agents with distinct personalities")
print("   • Interactive chat with rotating agents")
print("   • Improv scene generation")
print("   • WebGPU-powered inference in the browser")
print("\n🚀 To test:")
print("   1. Download the HTML file")
print("   2. Open in Chrome 113+ with WebGPU enabled")
print("   3. Wait for model to load (~4GB download)")
print("   4. Interact with the agents!\n")

# Display in Colab
print("📺 Displaying test interface:")
display(HTML(test_html))

## 📋 Testing Notes\n",
\n",
### WebGPU Requirements\n",
\n",
To run WebLLM with WebGPU in Colab:\n",
\n",
1. **Browser Support**: Chrome 113+ or Edge 113+\n",
2. **Enable WebGPU**: `chrome://flags/#enable-unsafe-webgpu`\n",
3. **CORS Headers**: Required for cross-origin isolation\n",
4. **GPU Access**: Browser needs access to GPU\n",
\n",
### Model Information\n",
\n",
- **Model**: ford442/vicuna-7b-q4f32-webllm\n",
- **Size**: ~4GB download\n",
- **Quantization**: q4f32_1 (4-bit weights, 32-bit activations)\n",
- **Context**: 4096 tokens\n",
- **VRAM**: ~4GB required\n",
\n",
### Limitations in Colab\n",
\n",
- WebGPU may not work in Colab's iframe environment\n",
- Full testing requires downloading HTML and running locally\n",
- CLI test (Cell 2) works fully in Colab with GPU runtime\n",
- Web demos (Cells 3-4) are best tested outside Colab\n",
\n",
### Alternative Testing\n",
\n",
For full WebGPU testing:\n",
1. Download the generated HTML files\n",
2. Serve locally: `python -m http.server 8000`\n",
3. Open in WebGPU-enabled browser\n",
4. Ensure proper CORS headers are set"
