<a href="https://colab.research.google.com/github/gitleon8301/MY-AI-Gizmo-working/blob/main/Colab-TextGen-GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# oobabooga/text-generation-webui

After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.

* Project page: https://github.com/oobabooga/text-generation-webui
* Gradio server status: https://status.gradio.app/

In [None]:
#@title 1. Keep this tab alive to prevent Colab from disconnecting you { display-mode: "form" }

#@markdown Press play on the music player that will appear below:
%%html
<audio src="https://oobabooga.github.io/silence.m4a" controls>

In [None]:


import os
import subprocess
import shutil
import re
import time
import threading
from pathlib import Path

# ---------- User configuration ----------
REPO_ZIP = "https://github.com/gitleon8301/MY-AI-Gizmo-working/archive/refs/heads/main.zip"
WORK_DIR = Path("/content/text-generation-webui")             # where repo will live
DRIVE_ROOT = Path("/content/drive/MyDrive/MY-AI-Gizmo")      # persistent storage
LOG_DIR = DRIVE_ROOT / "logs"
HEARTBEAT_INTERVAL = 30  # seconds for silent-install heartbeat
MODEL_TO_DOWNLOAD = os.environ.get("MODEL_TO_DOWNLOAD")  # optional: model id or filename for download-model.py
# ----------------------------------------

def run_cmd(cmd, cwd=None, env=None, capture=False):
    """Run a shell command; return CompletedProcess."""
    return subprocess.run(cmd, shell=True, cwd=cwd, env=env, capture_output=capture, text=True)

def stream_cmd(cmd, cwd=None, env=None, out_path=None):
    """
    Stream a command's stdout/stderr live and append to out_path if provided.
    Returns exit code.
    """
    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
                            cwd=cwd, env=env, text=True, bufsize=1)
    last_output = time.time()
    stop_flag = threading.Event()

    def heartbeat():
        while not stop_flag.wait(HEARTBEAT_INTERVAL):
            if time.time() - last_output >= HEARTBEAT_INTERVAL:
                print(f"[heartbeat] still working... (no new output for ~{HEARTBEAT_INTERVAL}s)")
                if out_path:
                    with open(out_path, "a", encoding="utf-8") as f:
                        f.write(f"[heartbeat] still working... (no new output for ~{HEARTBEAT_INTERVAL}s)\n")

    hb = threading.Thread(target=heartbeat, daemon=True)
    hb.start()

    try:
        with open(out_path, "a", encoding="utf-8") if out_path else None as logfile:
            for line in proc.stdout:
                last_output = time.time()
                print(line, end="")
                if logfile:
                    logfile.write(line)
    except Exception as e:
        print(f"[error] stream read error: {e}")
    finally:
        proc.wait()
        stop_flag.set()
        hb.join(timeout=1)
    return proc.returncode

def ensure_drive_dirs():
    folders = [
        "models", "loras", "training", "characters", "presets", "prompts",
        "settings", "chat-history", "instruct-history", "outputs", "images",
        "logs", "cache", "extensions", "softprompts"
    ]
    for name in folders:
        p = DRIVE_ROOT / name
        p.mkdir(parents=True, exist_ok=True)
    LOG_DIR.mkdir(parents=True, exist_ok=True)

def _remove_path(p: Path):
    try:
        if p.is_symlink():
            p.unlink()
        elif p.is_dir():
            shutil.rmtree(p)
        elif p.exists():
            p.unlink()
    except Exception:
        pass

def create_link_or_copy(src: Path, dest: Path):
    """
    Try to create symlink dest -> src. If symlink fails (e.g., Windows or permission),
    fallback to copying content.
    """
    try:
        dest.parent.mkdir(parents=True, exist_ok=True)
        if dest.exists() or dest.is_symlink():
            _remove_path(dest)
        os.symlink(str(src), str(dest), target_is_directory=src.is_dir())
        return True
    except Exception:
        try:
            if src.is_dir():
                if dest.exists():
                    _remove_path(dest)
                shutil.copytree(src, dest)
            else:
                dest.parent.mkdir(parents=True, exist_ok=True)
                shutil.copy2(src, dest)
            return True
        except Exception:
            return False

def cleanup_broken_models():
    models_dir = DRIVE_ROOT / "models"
    if not models_dir.exists():
        return
    broken = []
    for ext in ("*.gguf", "*.safetensors", "*.bin", "*.pth", "*.pt"):
        for f in models_dir.rglob(ext):
            try:
                if f.stat().st_size < 100 * 1024:
                    broken.append(f)
            except Exception:
                pass
    if broken:
        for f in broken:
            try:
                f.unlink()
            except Exception:
                pass

def download_and_extract_repo():
    """
    Download repo ZIP and extract to /content. Rename to WORK_DIR.
    If WORK_DIR already exists, do nothing.
    """
    if WORK_DIR.exists():
        print(f"[info] WORK_DIR exists: {WORK_DIR}")
        return True

    tmp_zip = Path("/content/repo.zip")
    try:
        tmp_zip.unlink()
    except Exception:
        pass

    print("[info] downloading repository zip...")
    # try wget then curl
    ok = False
    for cmd in (
        f"wget -q -O {tmp_zip} {REPO_ZIP}",
        f"curl -s -L -o {tmp_zip} {REPO_ZIP}"
    ):
        res = run_cmd(cmd)
        if tmp_zip.exists() and tmp_zip.stat().st_size > 1000:
            ok = True
            break
    if not ok:
        print("[error] download failed. Check network or REPO_ZIP.")
        return False
    print("[info] extracting...")
    try:
        run_cmd(f"unzip -q {tmp_zip} -d /content")
        found = next(Path("/content").glob("MY-AI-Gizmo-working-*"), None)
        if not found:
            print("[error] extracted but expected folder not found")
            return False
        found.rename(WORK_DIR)
        print(f"[info] repo extracted to {WORK_DIR}")
        return True
    except Exception as e:
        print(f"[error] unzip/extract failed: {e}")
        return False

def patch_fast_install_flags():
    """
    Protect against automatic llama.cpp build: rename folder if present,
    and prepare env flags for fast install.
    """
    llama_dir = WORK_DIR / "repositories" / "llama.cpp"
    renamed = None
    try:
        if llama_dir.exists() and llama_dir.is_dir():
            renamed = llama_dir.with_name(llama_dir.name + ".disabled")
            if renamed.exists():
                shutil.rmtree(renamed, ignore_errors=True)
            llama_dir.rename(renamed)
            print("[info] renamed repositories/llama.cpp to prevent auto-build")
    except Exception:
        print("[warn] unable to rename llama.cpp; installer flags will be used instead")
        renamed = None
    env_flags = {
        "GPU_CHOICE": "A",
        "LAUNCH_AFTER_INSTALL": "FALSE",
        "INSTALL_EXTENSIONS": "FALSE",
        "SKIP_LLAMACPP_BUILD": "TRUE",
        "SKIP_TORCH_TEST": "TRUE",
        "FORCE_CUDA": "FALSE",
        "MPLBACKEND": "Agg",
    }
    return renamed, env_flags

def maybe_download_model(env=None):
    """
    Run repo's download-model.py if MODEL_TO_DOWNLOAD env var is set.
    This downloads models into repo's models folder; we ensure models link to Drive.
    """
    if not MODEL_TO_DOWNLOAD:
        return
    script = WORK_DIR / "download-model.py"
    if not script.exists():
        print("[warn] download-model.py not found in repo; skipping model download.")
        return
    print(f"[info] downloading model: {MODEL_TO_DOWNLOAD}")
    cmd = f'python "{script}" "{MODEL_TO_DOWNLOAD}"'
    rc = stream_cmd(cmd, cwd=str(WORK_DIR), env=env, out_path=str(LOG_DIR / "download-model.log"))
    if rc != 0:
        print(f"[warn] model download exited with code {rc}")

# ---------- Main flow ----------

# 1) If in Colab, try mount Drive
IN_COLAB = False
try:
    import google.colab
    from google.colab import drive as gdrive
    IN_COLAB = True
except Exception:
    IN_COLAB = False

if IN_COLAB:
    try:
        gdrive.mount("/content/drive", force_remount=False)
    except Exception as e:
        print(f"[warn] google drive mount failed: {e}")

# 2) Ensure Drive dirs & cleanup
ensure_drive_dirs()
cleanup_broken_models()

# 3) Download or detect repo
ok = download_and_extract_repo()
if not ok and not WORK_DIR.exists():
    raise SystemExit("[fatal] cannot obtain repository. Fix REPO_ZIP or network and retry.")

# 4) Change cwd
os.chdir(str(WORK_DIR))

# 5) Create symlinks from repo -> Drive for persistence
links_map = [
    ("models", "models", False),
    ("loras", "loras", False),
    ("user_data/characters", "characters", False),
    ("user_data/presets", "presets", False),
    ("user_data/settings.yaml", "settings/settings.yaml", True),
    ("user_data/settings.json", "settings/settings.json", True),
    ("user_data/chat", "chat-history", False),
    ("outputs", "outputs", False),
]
for local, drive_folder, is_settings in links_map:
    drive_path = DRIVE_ROOT / drive_folder
    # ensure Drive path exists (settings file creation handled below)
    if is_settings and drive_path.suffix == "":
        # defensive: ensure parent exists
        drive_path.parent.mkdir(parents=True, exist_ok=True)
    else:
        drive_path.mkdir(parents=True, exist_ok=True) if not drive_path.exists() and not drive_path.suffix else None
    local_path = WORK_DIR / local
    try:
        if local_path.exists() or local_path.is_symlink():
            _remove_path(local_path)
    except Exception:
        pass
    ok = create_link_or_copy(drive_path, local_path)
    if not ok:
        print(f"[warn] failed to link or copy {local} -> {drive_path}")

# 6) Ensure settings file exists on Drive; copy to repo user_data if needed
drive_settings = DRIVE_ROOT / "settings" / "settings.yaml"
local_settings = WORK_DIR / "user_data" / "settings.yaml"
local_settings.parent.mkdir(parents=True, exist_ok=True)
if drive_settings.exists() and drive_settings.stat().st_size > 0:
    try:
        shutil.copy2(drive_settings, local_settings)
    except Exception:
        pass
else:
    try:
        local_settings.write_text("# minimal\nlisten: true\nshare: true\n")
        # ensure Drive copy exists
        drive_settings.parent.mkdir(parents=True, exist_ok=True)
        shutil.copy2(local_settings, drive_settings)
    except Exception:
        pass

# 7) Installer preparation: avoid long llama.cpp build and prepare env
env_marker = WORK_DIR / "installer_files" / "env" / "bin" / "python"
renamed_llama, fast_env_flags = patch_fast_install_flags()

# 8) Make start script executable if exists
start_sh = WORK_DIR / "start_linux.sh"
if start_sh.exists():
    try:
        start_sh.chmod(start_sh.stat().st_mode | 0o111)
    except Exception:
        pass

# 9) Run installer if needed (live stream + log)
installer_log = LOG_DIR / f"installer_{int(time.time())}.log"
if env_marker.exists():
    print("[info] virtualenv already exists; skipping full install")
else:
    print("[info] running installer (logs -> {})".format(installer_log))
    env = os.environ.copy()
    env.update(fast_env_flags)
    # spawn installer; use stream_cmd to log and provide heartbeat
    if start_sh.exists():
        rc = stream_cmd("bash start_linux.sh", cwd=str(WORK_DIR), env=env, out_path=str(installer_log))
        if rc != 0:
            print(f"[warn] installer exited with code {rc} (see {installer_log})")
    else:
        print("[warn] start_linux.sh not found; skipping installer step")

# 10) Optionally download a model (user sets MODEL_TO_DOWNLOAD)
maybe_download_model(env=os.environ.copy())

# 11) Attempt to restore llama.cpp name (do not auto-build - user can build later)
if renamed_llama:
    try:
        dst = WORK_DIR / "repositories" / "llama.cpp"
        if not dst.exists():
            renamed_llama.rename(dst)
            print("[info] restored repositories/llama.cpp to allow manual build later")
    except Exception:
        pass

# 12) Final CUDA fix (best effort)
def fix_cuda_library_path():
    cuda_paths = [
        '/usr/local/cuda/lib64',
        '/usr/local/cuda-12/lib64',
        '/usr/lib/x86_64-linux-gnu',
        '/usr/local/nvidia/lib64',
    ]
    valid = []
    for p in cuda_paths:
        pp = Path(p)
        if pp.exists() and any(pp.glob("libcuda.so*")):
            valid.append(p)
    if valid:
        os.environ['LD_LIBRARY_PATH'] = ':'.join(valid)

fix_cuda_library_path()

# 13) Launch server, streaming output to a log file on Drive so you keep the URL and logs
server_log = LOG_DIR / f"server_{int(time.time())}.log"
python_exe = str(env_marker) if env_marker.exists() else "python3"
cmd = f'{python_exe} -u server.py --share --listen'
print(f"[info] launching server with: {cmd}")
rc = stream_cmd(cmd, cwd=str(WORK_DIR), env=os.environ.copy(), out_path=str(server_log))
if rc != 0:
    print(f"[warn] server exited with code {rc} (see {server_log})")
else:
    print(f"[info] server terminated (see {server_log})")

# End
print("[done] launcher finished. Logs and persistent data are in:", str(DRIVE_ROOT))


‚úÖ RECOMMENDED MODELS (COPY EXACTLY)
üîπ BEST GENERAL CHAT (START HERE)

Llama-2-7B-Chat

Repo: TheBloke/Llama-2-7B-Chat-GGUF
File: llama-2-7b-chat.Q4_K_M.gguf

üîπ FAST + LIGHT (LOW RAM)

TinyLlama-1.1B-Chat

Repo: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF
File: tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf

üîπ STRONG CHAT (BETTER THAN LLAMA-2)

Mistral-7B-Instruct

Repo: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
File: mistral-7b-instruct-v0.2.Q4_K_M.gguf

üîπ CODING MODEL

Code LLaMA-7B

Repo: TheBloke/CodeLlama-7B-GGUF
File: codellama-7b.Q4_K_M.gguf

üîπ ROLEPLAY / STORY

MythoMax-L2-13B (needs more RAM)

Repo: TheBloke/MythoMax-L2-13B-GGUF
File: mythomax-l2-13b.Q4_K_M.gguf

üîπ VERY FAST / TEST MODEL

Phi-2 (2.7B)

Repo: TheBloke/phi-2-GGUF
File: phi-2.Q4_K_M.gguf

‚öôÔ∏è WHAT LOADER TO USE (IMPORTANT)

For ALL models above:

Loader: llama.cpp


Repo: TheBloke/Llama-2-7B-Chat-GGUF
File: llama-2-7b-chat.Q4_K_M.gguf
