In [None]:
#@title Setup
import os
import sys
import subprocess

# 1. Install MLC LLM Nightly (Force Reinstall to ensure clean state)
print("üì¶ Installing MLC LLM and dependencies...")
!python -m pip install --pre -U -f https://mlc.ai/wheels mlc-llm-nightly-cu128 mlc-ai-nightly-cu128
!apt-get install -y git-lfs


In [None]:
!git clone https://www.github.com/emscripten-core/emsdk.git
!cd /content/emsdk && ./emsdk install tot
!cd /content/emsdk && ./emsdk activate tot

In [None]:
import os

# 1. Install Rust (Standard script)
# We use -y to say "yes" to prompts automatically
!curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y

# 2. Add Rust to the system PATH for this session
# (Colab doesn't automatically load the path after install)
os.environ['PATH'] += ":/root/.cargo/bin"
!rustup target add wasm32-unknown-emscripten

# 2. Verify it is installed
print("‚úÖ Target installed. Verifying...")
!rustup target list --installed

In [None]:
%%shell
set -e

# 1. Install Rust if missing
if [ ! -f "$HOME/.cargo/env" ]; then
    echo "ü¶Ä Installing Rust..."
    curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
    source "$HOME/.cargo/env"
    rustup target add wasm32-unknown-emscripten
else
    source "$HOME/.cargo/env"
fi

# 2. Install Emscripten if missing
if [ ! -d "/content/emsdk" ]; then
    echo "üîß Installing Emscripten..."
    git clone https://github.com/emscripten-core/emsdk.git /content/emsdk
    cd /content/emsdk
    ./emsdk install latest
    ./emsdk activate latest
fi
source /content/emsdk/emsdk_env.sh

# 3. Clone MLC LLM if it doesn't exist
if [ ! -d "/content/mlc-llm" ]; then
    echo "üìÇ Cloning MLC LLM..."
    git clone --recursive https://github.com/mlc-ai/mlc-llm.git /content/mlc-llm
fi

# 4. Build the Web Runtime
cd /content/mlc-llm

# Pre-requisite: Prepare Emscripten dependencies
./web/prep_emcc_deps.sh

# Create build directory
mkdir -p build/wasm
cd build/wasm

# Configure with emcmake
emcmake cmake ../.. \
    -DCMAKE_BUILD_TYPE=Release \
    -DUSE_WEBGPU=ON \
    -DUSE_WASM=ON \
    -DCMAKE_CXX_FLAGS="-O3"

# Compile
make -j$(nproc) && make install
echo "‚úÖ Build Complete!"

# Command

python -m mlc_llm convert_weight /content/dist/models/vicuna-7b-v1.5/ --quantization q4f32_1 -o /content/vc7b


In [None]:
#@title gen config

import json
import os

# 1. Settings (Must match your previous steps)
# We point to where your SHARDS are located (/content/vc7b)
OUTPUT_DIR = "/content/vc7b"
QUANTIZATION = "q4f32_1"

# 2. Define the Standard Vicuna 1.5 Configuration
# This replicates exactly what the tool *should* have generated.
config_data = {
    "model_type": "llama",
    "quantization": QUANTIZATION,
    "model_config": {
        "hidden_size": 4096,
        "intermediate_size": 11008,
        "num_attention_heads": 32,
        "num_hidden_layers": 32,
        "rms_norm_eps": 1e-05,
        "vocab_size": 32000,
        "position_embedding_base": 10000.0,
        "context_window_size": 4096,
        "prefill_chunk_size": 4096,
        "tensor_parallel_shards": 1,
        "head_dim": 128,
        "dtype": "float32"
    },
    "vocab_size": 32000,
    "context_window_size": 4096,
    "sliding_window_size": -1,
    "prefill_chunk_size": 4096,
    "attention_sink_size": -1,
    "tensor_parallel_shards": 1,
    # This is the part the CLI failed to generate:
    "conv_template": {
        "name": "vicuna_v1.1",
        "system_template": "{system_message}",
        "system_message": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.",
        "roles": {
            "user": "USER",
            "assistant": "ASSISTANT"
        },
        "role_msg_sep": " ",
        "role_empty_sep": " ",
        "seps": [
            " ",
            "</s>"
        ],
        "stop_str": [
            "</s>"
        ],
        "stop_token_ids": [
            2
        ],
        "add_bos": True
    }
}

# 3. Write the file
output_file = os.path.join(OUTPUT_DIR, "mlc-chat-config.json")

# Ensure directory exists (it should, since you have shards there)
os.makedirs(OUTPUT_DIR, exist_ok=True)

with open(output_file, "w") as f:
    json.dump(config_data, f, indent=2)

print(f"‚úÖ Successfully created config file at: {output_file}")
print("   You can now proceed to upload the '/content/vc7b' folder to Hugging Face.")

In [None]:
!git clone --recursive https://github.com/mlc-ai/mlc-llm.git

# New Section

cd mlc-llm

ln -sf mlc_wasm_runtime.bc wasm_runtime.bc

 ./web/prep_emcc_deps.sh

cd web/dist/wasm

ln -sf mlc_wasm_runtime.bc wasm_runtime.bc

export TVM_LIBRARY_PATH=$PWD/web/dist/wasm

export TVM_HOME=$PWD/3rdparty/tvm



In [None]:
#@title get config
!wget https://huggingface.co/ford442/vicuna-7b-q4f32-webllm/resolve/main/mlc-chat-config.json

Compile wasm command:

python -m mlc_llm compile /content/mlc-chat-config.json --device webgpu -o /content/vicuna_model.wasm

In [None]:
import os
from huggingface_hub import login, HfApi

# 1. PASTE NEW "WRITE" TOKEN HERE
HF_TOKEN = "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"

# 2. VERIFY THIS IS YOUR USERNAME
# If your username is NOT 'ford442', change it here.
USERNAME = "ford442"
MODEL_NAME = "vicuna-7b-q4f32-web"
REPO_ID = f"{USERNAME}/{MODEL_NAME}"

FOLDER_PATH = "/content/vc7b"

# --- Upload Process ---
print(f"üîë Logging in...")
try:
    login(token=HF_TOKEN)
except Exception as e:
    print(f"‚ùå Login failed: {e}")

print(f"\nüöÄ Creating repo: {REPO_ID}")
api = HfApi()

try:
    # Create repo (if it fails here, the token or username is still wrong)
    api.create_repo(repo_id=REPO_ID, repo_type="model", exist_ok=True)

    # Upload
    print(f"üì§ Uploading files from {FOLDER_PATH}...")
    api.upload_folder(
        folder_path=FOLDER_PATH,
        repo_id=REPO_ID,
        repo_type="model"
    )
    print(f"\n‚úÖ Success! Your model is live: https://huggingface.co/{REPO_ID}")

except Exception as e:
    print(f"\n‚ùå Error: {e}")
    print("Double check that:")
    print("1. Your token is a 'WRITE' token.")
    print(f"2. You are actually the user '{USERNAME}' on Hugging Face.")

In [None]:

import os
from huggingface_hub import login, HfApi

# ---------------------------------------------------------
# PASTE YOUR WRITE TOKEN BELOW (Keep the quotes!)
# ---------------------------------------------------------
HF_TOKEN = "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"

# ---------------------------------------------------------
# Settings
# ---------------------------------------------------------
REPO_ID = "ford442/vicuna-7b-webllm-q4f32"
FOLDER_PATH = "/content/vc7b"

# 1. Login directly using the token string
print(f"üîë Logging in with provided token...")
login(token=HF_TOKEN)

# 2. Upload
print(f"\nüöÄ Uploading {FOLDER_PATH} to {REPO_ID}...")
api = HfApi()

# Create the repo if it doesn't exist
api.create_repo(repo_id=REPO_ID, repo_type="model", exist_ok=True)

# Upload folder
api.upload_folder(
    folder_path=FOLDER_PATH,
    repo_id=REPO_ID,
    repo_type="model"
)

print(f"‚úÖ Done! Your model is live at: https://huggingface.co/{REPO_ID}")

In [None]:
%%shell
set -e

# 1. Install MLC LLM Python Package
# We use the nightly build to match the runtime setup
if ! python -c "import mlc_llm" &> /dev/null; then
    echo "üì¶ Installing MLC LLM..."
    python -m pip install --pre -U -f https://mlc.ai/wheels mlc-llm-nightly-cu128 mlc-ai-nightly-cu128
    apt-get install -y git-lfs
fi

# 2. Setup Emscripten Environment (Required for WASM compilation)
source /content/emsdk/emsdk_env.sh

# 3. Download Model
MODEL_ID="lmsys/vicuna-7b-v1.5"
MODEL_DIR="dist/models/vicuna-7b-v1.5"

echo "‚¨áÔ∏è Downloading $MODEL_ID..."
git lfs install
mkdir -p dist/models
if [ ! -d "$MODEL_DIR" ]; then
    git clone https://huggingface.co/$MODEL_ID $MODEL_DIR
else
    echo "   Model directory exists, skipping clone."
fi

# 4. Define Output Paths
QUANTIZATION="q4f32_1"
OUTPUT_NAME="vicuna-7b-v1.5-$QUANTIZATION-webllm"
OUTPUT_DIR="dist/$OUTPUT_NAME"

# 5. Convert Weights & Generate Config
echo "‚öôÔ∏è Converting weights to $QUANTIZATION..."
python -m mlc_llm convert_weight $MODEL_DIR/ \
    --quantization $QUANTIZATION \
    -o $OUTPUT_DIR

echo "üìù Generating config..."
python -m mlc_llm gen_config $MODEL_DIR/ \
    --quantization $QUANTIZATION \
    --conv-template vicuna_v1.1 \
    -o $OUTPUT_DIR

# 6. Compile Model to WASM
echo "üî® Compiling model to WASM..."
python -m mlc_llm compile $OUTPUT_DIR/mlc-chat-config.json \
    --device webgpu \
    -o $OUTPUT_DIR/vicuna-7b-v1.5-$QUANTIZATION-webgpu.wasm

echo "‚úÖ Conversion and Compilation Complete!"
echo "üìÇ Output contents of $OUTPUT_DIR:"
ls -lh $OUTPUT_DIR

In [None]:
# @title WebLLM Model Converter
import os
import sys
import subprocess

# 1. Install MLC LLM Nightly (Using cu128 to match Colab's latest environment)
print("üì¶ Checking/Installing MLC LLM and dependencies...")
# We force reinstall to ensure we have the correct version matching the runtime
!{sys.executable} -m pip install --pre --force-reinstall mlc-llm-nightly-cu128 mlc-ai-nightly-cu128 -f https://mlc.ai/wheels
!apt-get install -y git-lfs

# 2. Configuration
MODEL_ID = "lmsys/vicuna-7b-v1.5"
MODEL_NAME = MODEL_ID.split("/")[-1]
QUANTIZATION = "q4f32_1"

# Create directories
!mkdir -p dist/models

# 3. Clone the Original Model
print(f"‚¨áÔ∏è Downloading {MODEL_ID} from HuggingFace...")
!git lfs install
if not os.path.exists(f"dist/models/{MODEL_NAME}"):
    !git clone https://huggingface.co/{MODEL_ID} dist/models/{MODEL_NAME}
else:
    print("   Model directory exists. Skipping clone (ensure it's complete).")

# 4. Conversion & Config Generation
output_name = f"{MODEL_NAME}-{QUANTIZATION}-MLC"
output_path = f"dist/{output_name}"

print(f"\n‚öôÔ∏è Converting to {QUANTIZATION}...")
print(f"   Input: dist/models/{MODEL_NAME}")
print(f"   Output: {output_path}")

# Run conversion
convert_cmd = f"{sys.executable} -m mlc_llm convert_weight dist/models/{MODEL_NAME}/ --quantization {QUANTIZATION} -o {output_path}"
if os.system(convert_cmd) != 0:
    raise Exception("Weight conversion failed! (Possible OOM - Try restarting runtime)")

# Run config generation
print("\nüìù Generating Configuration...")
config_cmd = f"{sys.executable} -m mlc_llm gen_config dist/models/{MODEL_NAME}/ --quantization {QUANTIZATION} --conv-template vicuna_v1.1 -o {output_path}"
if os.system(config_cmd) != 0:
    raise Exception("Config generation failed!")

print(f"\n‚úÖ Success! Model prepared at: {output_path}")

In [None]:
# @title üöÄ Final Upload Script
import os
from huggingface_hub import login, HfApi

# ---------------------------------------------------------
# 1. PASTE YOUR NEW "WRITE" TOKEN HERE
# ---------------------------------------------------------
HF_TOKEN = "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"

# ---------------------------------------------------------
# 2. CONFIGURATION
# ---------------------------------------------------------
# Based on your previous logs, your username is ford442
USERNAME = "ford442"
MODEL_NAME = "vicuna-7b-q4f32-web"
REPO_ID = f"{USERNAME}/{MODEL_NAME}"
FOLDER_PATH = "/content/vc7b"

# ---------------------------------------------------------
# 3. UPLOAD
# ---------------------------------------------------------
print(f"üîë Logging in...")
login(token=HF_TOKEN)

print(f"\nüöÄ Deploying model to: https://huggingface.co/{REPO_ID}")
api = HfApi()

try:
    # Create repo if it doesn't exist
    api.create_repo(repo_id=REPO_ID, repo_type="model", exist_ok=True)

    # Upload all files
    api.upload_folder(
        folder_path=FOLDER_PATH,
        repo_id=REPO_ID,
        repo_type="model"
    )

    print(f"\n‚úÖ SUCCESS! The model is live and ready for WebLLM.")
    print(f"üîó Link: https://huggingface.co/{REPO_ID}")

except Exception as e:
    print(f"\n‚ùå Error: {e}")

In [None]:
!pip install paramiko

In [None]:
import os
import paramiko

# --- Configuration ---
LOCAL_DIR = "dist/vicuna-7b-v1.5-q4f32_1-MLC"  # Directory containing the shards
REMOTE_HOST = "1ink.us"
REMOTE_PORT = 22
USERNAME = "ford442"
PASSWORD = "GoogleBez12!"
REMOTE_DIR = "files/vicuna"  # Destination folder on the server (relative to home)

# --- Upload Script ---
print(f"üöÄ Connecting to {REMOTE_HOST}...")
transport = paramiko.Transport((REMOTE_HOST, REMOTE_PORT))
transport.connect(username=USERNAME, password=PASSWORD)
sftp = paramiko.SFTPClient.from_transport(transport)

# helper to create remote dir recursively
def mkdir_p(sftp, remote_directory):
    if remote_directory == '/': return
    dirs = remote_directory.split("/")

    current_dir = ""
    # Handle absolute paths if provided, otherwise treat as relative
    if remote_directory.startswith("/"):
        current_dir = "/"
        if dirs and dirs[0] == "":
            dirs.pop(0)

    for dir_part in dirs:
        if not dir_part: continue

        if current_dir == "" or current_dir == "/":
            current_dir += dir_part
        else:
            current_dir += f"/{dir_part}"

        try:
            sftp.stat(current_dir)
        except IOError:
            print(f"üìÅ Creating remote directory: {current_dir}")
            try:
                sftp.mkdir(current_dir)
            except IOError as e:
                print(f"   ‚ö†Ô∏è Could not create {current_dir}: {e}")
                raise

# Ensure remote directory exists
try:
    mkdir_p(sftp, REMOTE_DIR)
except Exception as e:
    print(f"‚ùå Error setting up directories: {e}")
    # We don't exit here immediately to allow debugging, but usually this is fatal
    # sftp.close(); transport.close(); raise

# Upload files
if os.path.exists(LOCAL_DIR):
    print(f"üì§ Uploading files from {LOCAL_DIR} to {REMOTE_DIR}...")
    files = os.listdir(LOCAL_DIR)
    for filename in files:
        local_path = os.path.join(LOCAL_DIR, filename)
        remote_path = f"{REMOTE_DIR}/{filename}"

        if os.path.isfile(local_path):
            print(f"   - Uploading {filename}...")
            try:
                sftp.put(local_path, remote_path)
            except Exception as e:
                print(f"     ‚ùå Failed to upload {filename}: {e}")

    print("‚úÖ Upload process finished!")
else:
    print(f"‚ùå Local directory {LOCAL_DIR} not found. Did the previous step finish?")

sftp.close()
transport.close()

In [None]:
import shutil

# 1. Create the alias locally
source_cache = os.path.join(LOCAL_DIR, "tensor-cache.json")
dest_cache = os.path.join(LOCAL_DIR, "ndarray-cache.json")

if os.path.exists(source_cache):
    shutil.copy(source_cache, dest_cache)
    print(f"‚úÖ Created ndarray-cache.json from tensor-cache.json")
else:
    print(f"‚ö†Ô∏è Could not find {source_cache}")

# 2. Upload only the new file
print(f"üöÄ Connecting to {REMOTE_HOST} to upload the alias...")
transport = paramiko.Transport((REMOTE_HOST, REMOTE_PORT))
transport.connect(username=USERNAME, password=PASSWORD)
sftp = paramiko.SFTPClient.from_transport(transport)

local_file = dest_cache
remote_file = f"{REMOTE_DIR}/ndarray-cache.json"

if os.path.exists(local_file):
    print(f"üì§ Uploading ndarray-cache.json...")
    try:
        #sftp.put(local_file, remote_file)
        print("‚úÖ Upload success!")
    except Exception as e:
        print(f"‚ùå Upload failed: {e}")

sftp.close()
transport.close()

In [None]:
# @title
%%shell
#sudo update-alternatives --set python3 /usr/bin/python3.13
pip install paramiko

In [None]:
local_path = "/content/vc7b.zip" #@param ["sh4.1ijs", "sh5.1ijs", "g3007.wasm", "g3008.wasm", "g3009.wasm", "sh6.1ijs", "g3010.wasm"] {allow-input: true}
loc_file = "vc7b.zip" #@param ["sh4.1ijs", "sh5.1ijs", "g3007.wasm", "g3008.wasm", "g3009.wasm", "sh6.1ijs", "g3010.wasm"] {allow-input: true}
dest_path = "1ink.us/files/" #@param ["sh4.1ijs", "sh5.1ijs", "g3007.wasm", "g3008.wasm", "g3009.wasm", "sh6.1ijs", "g3010.wasm"] {allow-input: true}
import os
import urllib
import requests as reqs
import re
import paramiko
host = "1ink.us"
username  = "ford442"
password  = "GoogleBez12!"
port = 22
file_name=loc_file
transport = paramiko.Transport((host, port))
destination_path=dest_path+file_name
transport.connect(username = username, password = password)
sftp = paramiko.SFTPClient.from_transport(transport)
sftp.put(local_path, destination_path)
sftp.close()
transport.close()

# Task
Compile the Vicuna-7b-v1.5 model into a WebAssembly (WASM) binary using MLC LLM, preparing it for deployment with WebLLM, and ensure the resulting WASM file is generated and its path is provided. This includes installing necessary build dependencies (git-lfs, cmake, Rust, Emscripten), cloning the MLC LLM repository, building the TVM Web Runtime, and then compiling the model.

## Install Build Dependencies

### Subtask:
Install essential build tools including git-lfs, cmake, Rust, and Emscripten. Clone the MLC LLM repository recursively and configure the Emscripten environment variables.


**Reasoning**:
The first instruction is to install MLC LLM nightly package and git-lfs. Cell `pBK2wCzN7tI8` in the provided notebook handles this installation.



In [None]:
#@title Setup
import os
import sys
import subprocess

# 1. Install MLC LLM Nightly (Force Reinstall to ensure clean state)
print("üì¶ Installing MLC LLM and dependencies...")
!python -m pip install --pre -U -f https://mlc.ai/wheels mlc-llm-nightly-cu128 mlc-ai-nightly-cu128
!apt-get install -y git-lfs