<a href="https://colab.research.google.com/github/gitleon8301/MY-AI-Gizmo-working/blob/main/Colab-TextGen-GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# oobabooga/text-generation-webui

After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.

* Project page: https://github.com/oobabooga/text-generation-webui
* Gradio server status: https://status.gradio.app/

In [None]:
#@title 1. Keep this tab alive to prevent Colab from disconnecting you { display-mode: "form" }

#@markdown Press play on the music player that will appear below:
%%html
<audio src="https://oobabooga.github.io/silence.m4a" controls>

In [None]:
# KListra in HELA den här cellen i Colab och kör
import os, shutil, subprocess, sys, time
from pathlib import Path

# ---------- USER-EDITABLE ----------
GIT_URL = "https://github.com/gitleon8301/MY-AI-Gizmo-working"
REPO_DIR_NAME = "text-generation-webui"   # where the start script lives after cloning
MODEL_REPO = "google/gemma-2-9b-it-GGUF"  # CPU-compatible repo (GGUF)
SPECIFIC_FILE = "gemma-2-9b-it-Q4_K_M.gguf"  # recommended CPU quant
# If the repo requires authentication for large files, set HF_TOKEN in Colab runtime env:
# os.environ["HF_TOKEN"] = "hf_..."  (or set via Colab UI: Runtime -> Change runtime variables)
# ------------------------------------

# logs
DOWNLOAD_LOG = Path("download.log")
START_LOG = Path("webui.log")

# ensure deterministic paths
cwd = Path.cwd()
repo_dir = cwd / REPO_DIR_NAME
models_dir = cwd / "models" / MODEL_REPO.replace("/", "_")
models_dir.mkdir(parents=True, exist_ok=True)

# force CPU only
os.environ.pop('PYTHONPATH', None)
os.environ.pop('MPLBACKEND', None)
os.environ["CUDA_VISIBLE_DEVICES"] = ""   # hide GPUs

def run(cmd, cwd=None, env=None, logfile=None, check=False):
    print(f"\n>>> RUN: {cmd}")
    with subprocess.Popen(cmd, shell=True, cwd=cwd, env=env or os.environ,
                          stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as p:
        out_lines = []
        for line in p.stdout:
            out_lines.append(line)
            if logfile:
                with open(logfile, "a") as f:
                    f.write(line)
            # also print a subset in realtime so user sees progress
            print(line, end="")
        p.wait()
        if check and p.returncode != 0:
            raise subprocess.CalledProcessError(p.returncode, cmd)
        return p.returncode, "".join(out_lines)

# 1) Clone the repo if missing
if not repo_dir.exists():
    print("Cloning web-UI repo...")
    run(f"git clone {GIT_URL} {REPO_DIR_NAME}", check=True)

os.chdir(repo_dir)

# 2) Run install script but do not auto-launch
print("\nRunning install (LAUNCH_AFTER_INSTALL=FALSE)...")
try:
    run('GPU_CHOICE=A LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=FALSE ./start_linux.sh', check=False, logfile=START_LOG)
except Exception as e:
    print("Install script returned non-zero, continuing (some installs print non-zero).")

# 3) Clear HF cache to avoid 416 partial-range issues
hf_cache = Path.home() / ".cache" / "huggingface" / "hub"
if hf_cache.exists():
    print("Removing Hugging Face cache to avoid Range/partial-file issues...")
    try:
        shutil.rmtree(hf_cache)
    except Exception as e:
        print("Failed to remove HF cache:", e)

# 4) Attempt to download the specific GGUF file using download-model.py with retries
download_success = False
attempts = 3
for attempt in range(1, attempts+1):
    print(f"\nDownload attempt {attempt}/{attempts} using download-model.py (specific file)...")
    # ensure previous log truncated on first attempt
    if attempt == 1 and DOWNLOAD_LOG.exists():
        DOWNLOAD_LOG.unlink()
    cmd = f"python download-model.py {MODEL_REPO} --specific-file {SPECIFIC_FILE}"
    ret, out = run(cmd, logfile=DOWNLOAD_LOG)
    if ret == 0:
        print("download-model.py succeeded.")
        download_success = True
        break
    else:
        print(f"download-model.py failed (exit {ret}). Sleeping before retry...")
        time.sleep(5 * attempt)

# 5) If download-model.py failed, try huggingface_hub.fallback (more robust)
if not download_success:
    print("\nPrimary downloader failed. Trying direct download via huggingface_hub.hf_hub_download fallback...")
    # ensure huggingface_hub is installed
    run("pip install -q huggingface_hub", logfile=DOWNLOAD_LOG)
    try:
        from huggingface_hub import hf_hub_download
        repo_id = MODEL_REPO
        filename = SPECIFIC_FILE
        target_path = models_dir / filename
        if target_path.exists():
            print("File already exists:", target_path)
            download_success = True
        else:
            # If the model repo is public, hf_hub_download will work without token. If private, set HF_TOKEN env variable.
            hf_token = os.environ.get("HF_TOKEN", None)
            if hf_token:
                print("Using HF_TOKEN from environment.")
            print(f"Downloading {filename} from repo {repo_id} to {target_path} ...")
            # hf_hub_download may stream; wrap in try/except
            try:
                local_file = hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=str(models_dir), token=hf_token)
                # move or ensure it is placed where we want
                # hf_hub_download with cache_dir will put files under that cache; ensure a copy in desired dir
                local_file_path = Path(local_file)
                if local_file_path.exists():
                    # move or copy to models_dir root (some hf_hub places nested folders)
                    dest = models_dir / local_file_path.name
                    if local_file_path.resolve() != dest.resolve():
                        shutil.copy(local_file_path, dest)
                    print("Downloaded to:", dest)
                    download_success = True
                else:
                    print("hf_hub_download did not create expected file.")
            except Exception as e:
                print("hf_hub_download failed:", repr(e))
    except Exception as e:
        print("Failed to import huggingface_hub or run fallback:", repr(e))

# 6) Final check: look for any .gguf file in models dir
gguf_files = list(models_dir.glob("*.gguf"))
if not download_success and not gguf_files:
    print("\nERROR: model download still failed. Check download.log for details:")
    if DOWNLOAD_LOG.exists():
        print("\n--- Last 200 lines of download.log ---")
        run(f"tail -n 200 {DOWNLOAD_LOG}")
    else:
        print("download.log not found.")
    print("\nPossible actions:\n - Ensure MODEL_REPO and SPECIFIC_FILE are correct.\n - If the repo is private, set HF_TOKEN environment variable (hf_... token) before running.\n - Manually download the desired .gguf file from Hugging Face and upload it to the Colab session into `models/{MODEL_REPO.replace('/', '_')}`.\n")
    raise SystemExit(1)

print("\nModel file present. Files in", models_dir)
run(f"ls -lh {models_dir}")

# 7) Start the web UI in CPU mode and stream logs to webui.log
print("\nStarting the web UI in CPU mode. Logs appended to", START_LOG)
start_cmd = f"./start_linux.sh --cpu --share --model {MODEL_REPO.replace('/', '_')}"
# Launch and stream
try:
    run(start_cmd, logfile=START_LOG, check=True)
except subprocess.CalledProcessError as e:
    print(f"Web UI failed with exit code {e.returncode}. See {START_LOG} for details.")
    run(f"tail -n 200 {START_LOG}")
    raise

# note: normally the web UI stays running (this cell will block while UI runs).
