<a href="https://colab.research.google.com/github/gitleon8301/MY-AI-Gizmo-working/blob/main/Colab-TextGen-GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# oobabooga/text-generation-webui

After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.

* Project page: https://github.com/oobabooga/text-generation-webui
* Gradio server status: https://status.gradio.app/

In [None]:
#@title 1. Keep this tab alive to prevent Colab from disconnecting you { display-mode: "form" }

#@markdown Press play on the music player that will appear below:
%%html
<audio src="https://oobabooga.github.io/silence.m4a" controls>

In [None]:
# PASTE THIS IN COLAB AND RUN - NO TOKEN NEEDED!
import os, shutil, subprocess, sys, time
from pathlib import Path

# ---------- USER-EDITABLE ----------
GIT_URL = "https://github.com/gitleon8301/MY-AI-Gizmo-working"
REPO_DIR_NAME = "text-generation-webui"

# ‚úÖ OPTION 1: Mistral 7B (recommended - good quality, no token needed)
MODEL_REPO = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
SPECIFIC_FILE = "mistral-7b-instruct-v0.2.Q4_K_M.gguf"

# ‚úÖ OPTION 2: Phi-3 Mini (smaller, faster, still good)
# MODEL_REPO = "microsoft/Phi-3-mini-4k-instruct-gguf"
# SPECIFIC_FILE = "Phi-3-mini-4k-instruct-q4.gguf"

# ‚úÖ OPTION 3: TinyLlama (very small, very fast, lower quality)
# MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
# SPECIFIC_FILE = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"

# NO TOKEN NEEDED - these models are fully open!
# ------------------------------------

# Logs
DOWNLOAD_LOG = Path("download.log")
START_LOG = Path("webui.log")

# Setup paths
cwd = Path.cwd()
repo_dir = cwd / REPO_DIR_NAME
models_dir = cwd / "models" / MODEL_REPO.replace("/", "_")
models_dir.mkdir(parents=True, exist_ok=True)

# Force CPU only
os.environ.pop('PYTHONPATH', None)
os.environ.pop('MPLBACKEND', None)
os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Hide GPUs

def run(cmd, cwd=None, env=None, logfile=None, check=False):
    print(f"\n>>> RUN: {cmd}")
    with subprocess.Popen(cmd, shell=True, cwd=cwd, env=env or os.environ,
                          stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as p:
        out_lines = []
        for line in p.stdout:
            out_lines.append(line)
            if logfile:
                with open(logfile, "a") as f:
                    f.write(line)
            print(line, end="")
        p.wait()
        if check and p.returncode != 0:
            raise subprocess.CalledProcessError(p.returncode, cmd)
        return p.returncode, "".join(out_lines)

# 1) Clone the repo if missing
if not repo_dir.exists():
    print("üì¶ Cloning web-UI repo...")
    run(f"git clone {GIT_URL} {REPO_DIR_NAME}", check=True)

os.chdir(repo_dir)

# 2) Install dependencies (skip auto-launch)
print("\nüîß Running installation (CPU mode)...")
run('GPU_CHOICE=N LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=FALSE ./start_linux.sh',
    check=False, logfile=START_LOG)

# 3) Install huggingface_hub
print("\nüìö Installing huggingface_hub...")
run("pip install -q huggingface_hub", logfile=DOWNLOAD_LOG)

# 4) Download model - NO TOKEN REQUIRED!
print(f"\n‚¨áÔ∏è  Downloading {SPECIFIC_FILE} from {MODEL_REPO}...")
print("‚úÖ This model is fully open - no authentication needed!")

try:
    from huggingface_hub import hf_hub_download

    target_file = models_dir / SPECIFIC_FILE

    if target_file.exists() and target_file.stat().st_size > 1_000_000:
        print(f"‚úì Model file already exists: {target_file}")
    else:
        print(f"Downloading to: {models_dir}")
        downloaded_path = hf_hub_download(
            repo_id=MODEL_REPO,
            filename=SPECIFIC_FILE,
            local_dir=str(models_dir),
            resume_download=True  # Resume if interrupted
            # NO token parameter - not needed!
        )
        print(f"‚úì Downloaded: {downloaded_path}")

except Exception as e:
    print(f"\n‚ùå Download failed: {repr(e)}")
    print("\nTrying alternative download method...")

    # Fallback: use wget
    try:
        file_url = f"https://huggingface.co/{MODEL_REPO}/resolve/main/{SPECIFIC_FILE}"
        target = models_dir / SPECIFIC_FILE
        run(f"wget -c '{file_url}' -O '{target}'", logfile=DOWNLOAD_LOG, check=True)
        print(f"‚úì Downloaded via wget: {target}")
    except Exception as e2:
        print(f"‚ùå Wget also failed: {repr(e2)}")
        print("\nüîß Manual download option:")
        print(f"1. Go to: https://huggingface.co/{MODEL_REPO}/tree/main")
        print(f"2. Download: {SPECIFIC_FILE}")
        print(f"3. Upload it to Colab into this folder: {models_dir}")
        raise SystemExit(1)

# 5) Verify model file exists
gguf_files = list(models_dir.glob("*.gguf"))
if not gguf_files:
    print(f"\n‚ùå No .gguf files found in {models_dir}")
    raise SystemExit(1)

print(f"\n‚úì Model ready! Files in {models_dir}:")
run(f"ls -lh {models_dir}")

# 6) Start the web UI (CPU mode with --share)
print("\nüöÄ Starting web UI in CPU mode...")
print("‚ö†Ô∏è  CPU inference is slow - expect 1-5 tokens/second")
print("üìù Logs streaming to", START_LOG)

model_name = MODEL_REPO.replace('/', '_')
start_cmd = (
    f"python server.py "
    f"--cpu "
    f"--share "
    f"--model-dir {models_dir.parent} "
    f"--model {model_name} "
    f"--n-gpu-layers 0 "
    f"--threads {os.cpu_count() or 4}"
)

print(f"\nüåê Look for the Gradio public URL in the output below!")
print("=" * 60)

try:
    run(start_cmd, logfile=START_LOG)
except KeyboardInterrupt:
    print("\n\n‚èπÔ∏è  Stopped by user")
except Exception as e:
    print(f"\n‚ùå Web UI failed: {repr(e)}")
    print("\nLast 50 lines of log:")
    run(f"tail -n 50 {START_LOG}")
    raise