<a href="https://colab.research.google.com/github/gitleon8301/MY-AI-Gizmo-working/blob/main/Colab-TextGen-GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# oobabooga/text-generation-webui

After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.

* Project page: https://github.com/oobabooga/text-generation-webui
* Gradio server status: https://status.gradio.app/

In [None]:
#@title 1. Keep this tab alive to prevent Colab from disconnecting you { display-mode: "form" }

#@markdown Press play on the music player that will appear below:
%%html
<audio src="https://oobabooga.github.io/silence.m4a" controls>

In [None]:
# ================================================================
# MY-AI-Gizmo ‚Ä¢ LAUNCHER WITH VERBOSE INSTALLATION (FIXED)
# - avoids long silent llama.cpp builds when possible
# - skips re-install if environment already exists
# - provides heartbeat when install is silent
# - more robust download + symlink fallback
# ================================================================

import os
import subprocess
import shutil
import re
import time
import threading
from pathlib import Path

try:
    from google.colab import drive
except Exception:
    drive = None

# ========== Configuration ==========
REPO_ZIP = "https://github.com/gitleon8301/MY-AI-Gizmo-working/archive/refs/heads/main.zip"
WORK_DIR = Path("/content/text-generation-webui")
DRIVE_ROOT = Path("/content/drive/MyDrive/MY-AI-Gizmo")
HEARTBEAT_INTERVAL = 30  # seconds
# ===================================

def sh(cmd, check=False, cwd=None, env=None):
    return subprocess.run(cmd, shell=True, capture_output=True, text=True, check=check, cwd=cwd, env=env)

def sh_live(cmd, cwd=None, env=None):
    """
    Run command and stream stdout/stderr in real-time.
    Provides a heartbeat message if no output for HEARTBEAT_INTERVAL.
    Returns process returncode.
    """
    proc = subprocess.Popen(
        cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        text=True, bufsize=1, cwd=cwd, env=env
    )

    last_output = time.time()
    stop_flag = threading.Event()

    def heartbeat():
        while not stop_flag.wait(HEARTBEAT_INTERVAL):
            # If more than HEARTBEAT_INTERVAL since last output, print heartbeat
            if time.time() - last_output >= HEARTBEAT_INTERVAL:
                print(f"‚è≥ still working... (no new output for ~{HEARTBEAT_INTERVAL}s)")

    hb = threading.Thread(target=heartbeat, daemon=True)
    hb.start()

    try:
        for line in proc.stdout:
            print(line, end='')
            last_output = time.time()
    except Exception:
        pass
    finally:
        proc.wait()
        stop_flag.set()
        hb.join(timeout=1)

    return proc.returncode

# Environment setup
print("üîß Setting up environment...")
os.environ.pop("MPLBACKEND", None)
os.environ["MPLBACKEND"] = "Agg"
print("‚úì Environment ready\n")

# CUDA helper
def fix_cuda_library_path():
    print("üîß Fixing CUDA library paths...")
    cuda_paths = [
        '/usr/local/cuda/lib64',
        '/usr/local/cuda-12/lib64',
        '/usr/lib/x86_64-linux-gnu',
        '/usr/local/nvidia/lib64',
    ]
    valid_paths = []
    for path in cuda_paths:
        p = Path(path)
        if p.exists() and any(p.glob('libcuda.so*')):
            valid_paths.append(path)
            print(f"  ‚úì {path}")
    if valid_paths:
        os.environ['LD_LIBRARY_PATH'] = ':'.join(valid_paths)
        print("  ‚úì Set LD_LIBRARY_PATH")
        return True
    return False

# File helpers
def _ensure_drive_path(drive_path: Path, is_settings_file=False):
    if drive_path.suffix:
        drive_path.parent.mkdir(parents=True, exist_ok=True)
        if is_settings_file and not drive_path.exists():
            drive_path.touch(exist_ok=True)
    else:
        drive_path.mkdir(parents=True, exist_ok=True)

def _remove_path(path: Path):
    try:
        if path.is_symlink() or path.exists():
            if path.is_symlink():
                path.unlink()
            elif path.is_dir():
                shutil.rmtree(path)
            else:
                path.unlink()
    except Exception:
        pass

def _create_symlink_or_fallback(src: Path, dest: Path):
    try:
        dest.parent.mkdir(parents=True, exist_ok=True)
        if dest.exists() or dest.is_symlink():
            _remove_path(dest)
        os.symlink(str(src), str(dest), target_is_directory=src.is_dir())
        return True
    except Exception:
        # fallback to copy
        try:
            if src.is_dir():
                if dest.exists():
                    _remove_path(dest)
                shutil.copytree(src, dest)
            else:
                dest.parent.mkdir(parents=True, exist_ok=True)
                shutil.copy2(src, dest)
            return True
        except Exception:
            return False

def cleanup_broken_files(drive_root: Path):
    print("\nüßπ Cleaning broken files...")
    models_dir = drive_root / "models"
    if not models_dir.exists():
        print("  No models directory yet")
        return
    extensions = ["*.gguf", "*.safetensors", "*.bin", "*.pth", "*.pt"]
    broken = []
    for ext in extensions:
        for f in models_dir.rglob(ext):
            try:
                if f.stat().st_size < (100 * 1024):
                    broken.append(f)
            except Exception:
                pass
    if broken:
        print(f"  Found {len(broken)} broken files - deleting...")
        for f in broken:
            try:
                f.unlink()
            except Exception:
                pass
        print("  ‚úì Cleaned")
    else:
        print("  ‚úì No broken files")

# MAIN
print("=" * 60)
print("üöÄ MY-AI-Gizmo Setup (Verbose Mode)")
print("=" * 60)

# Step 1: Mount Drive
print("\nüìÅ Step 1/6: Mounting Drive...")
if drive:
    try:
        drive.mount("/content/drive", force_remount=False)
        print("‚úì Mounted")
    except Exception as e:
        print(f"‚ö†Ô∏è  {e}")
else:
    print("  Note: google.colab.drive not available in this environment")

# Cleanup broken models
cleanup_broken_files(DRIVE_ROOT)

# Step 2: Create folders
print("\nüíæ Step 2/6: Creating folders...")
folders = ["models", "loras", "training", "characters", "presets", "prompts",
           "settings", "chat-history", "instruct-history", "outputs", "images",
           "logs", "cache", "extensions", "softprompts"]
for f in folders:
    (DRIVE_ROOT / f).mkdir(parents=True, exist_ok=True)
print(f"‚úì {len(folders)} folders")

# Step 3: Download repo (if needed)
print("\nüì• Step 3/6: Repository...")
if not WORK_DIR.exists():
    # try download with wget or curl, with retries
    tmp_zip = Path("/content/repo.zip")
    try:
        tmp_zip.unlink()
    except Exception:
        pass
    print("  Downloading...")
    got = False
    # try wget
    try:
        r = sh(f"wget -q -O {tmp_zip} {REPO_ZIP}")
        if tmp_zip.exists() and tmp_zip.stat().st_size > 1000:
            got = True
    except Exception:
        got = False
    # try curl fallback
    if not got:
        try:
            r = sh(f"curl -s -L -o {tmp_zip} {REPO_ZIP}")
            if tmp_zip.exists() and tmp_zip.stat().st_size > 1000:
                got = True
        except Exception:
            got = False
    if not got:
        print("‚ö†Ô∏è  Download failed. Please check network or REPO_ZIP URL.")
    else:
        try:
            sh(f"unzip -q {tmp_zip} -d /content")
            found = next(Path("/content").glob("MY-AI-Gizmo-working-*"), None)
            if found:
                found.rename(WORK_DIR)
                print("‚úì Downloaded and extracted")
            else:
                print("‚ö†Ô∏è  Extraction succeeded but expected folder not found")
        except Exception as e:
            print(f"‚ö†Ô∏è  Unzip failed: {e}")
else:
    print("‚úì Exists")

if WORK_DIR.exists():
    os.chdir(WORK_DIR)

# Step 4: Symlinks
print("\nüîó Step 4/6: Linking...")
links_map = [
    ("models", "models", False),
    ("loras", "loras", False),
    ("user_data/characters", "characters", False),
    ("user_data/presets", "presets", False),
    ("user_data/settings.yaml", "settings/settings.yaml", True),
    ("user_data/settings.json", "settings/settings.json", True),
    ("user_data/chat", "chat-history", False),
    ("outputs", "outputs", False),
]

for local, drive_folder, is_settings in links_map:
    drive_path = DRIVE_ROOT / drive_folder
    _ensure_drive_path(drive_path, is_settings_file=is_settings)
    local_path = WORK_DIR / local
    if local_path.exists() or local_path.is_symlink():
        _remove_path(local_path)
    local_path.parent.mkdir(parents=True, exist_ok=True)
    ok = _create_symlink_or_fallback(drive_path, local_path)
    if not ok:
        print(f"  ‚ö†Ô∏è  Failed to link {local} -> {drive_path}")
print("‚úì Linked")

# Step 5: Settings
print("\n‚öôÔ∏è  Step 5/6: Settings...")
drive_settings = DRIVE_ROOT / "settings" / "settings.yaml"
local_settings = WORK_DIR / "user_data" / "settings.yaml"
local_settings.parent.mkdir(parents=True, exist_ok=True)
if local_settings.is_symlink():
    try:
        local_settings.unlink()
    except Exception:
        pass
if drive_settings.exists() and drive_settings.stat().st_size > 0:
    try:
        shutil.copy2(drive_settings, local_settings)
        print("‚úì Copied from Drive")
    except Exception as e:
        print(f"‚ö†Ô∏è  Copy failed: {e}")
else:
    try:
        local_settings.write_text("# minimal\nlisten: true\nshare: true\n")
        print("‚úì Created")
    except Exception as e:
        print(f"‚ö†Ô∏è  Could not create settings: {e}")

# ====== PREPARE FOR INSTALL: avoid long silent llama.cpp builds ======
# Strategy:
#  - If a local prebuilt environment exists, skip full install.
#  - If repositories/llama.cpp exists, rename it to avoid automatic rebuild (safe fallback).
#  - Set environment flags to request a fast install.
print("\nüîç Preparing fast-install safeguards...")
env_marker = WORK_DIR / "installer_files/env/bin/python"
if env_marker.exists():
    print("‚ö° Environment already exists ‚Äî installer will be skipped by default.")
else:
    # detect and disable llama.cpp auto-build by renaming the folder if present
    llama_dir = WORK_DIR / "repositories" / "llama.cpp"
    if llama_dir.exists() and llama_dir.is_dir():
        disabled = llama_dir.with_name(llama_dir.name + ".disabled")
        try:
            if disabled.exists():
                shutil.rmtree(disabled, ignore_errors=True)
            llama_dir.rename(disabled)
            print("‚ö° Renamed repositories/llama.cpp to prevent automatic rebuild (fast mode).")
        except Exception:
            print("  ‚ö†Ô∏è Could not rename llama.cpp; will attempt env flags instead.")

# Step 6: Install (LIVE)
print("\nüì¶ Step 6/6: Installing dependencies...")
print("=" * 60)
print("üìä LIVE INSTALLATION OUTPUT (showing progress)")
print("=" * 60)
print("\nThis may take several minutes on first run; if it looks silent, a heartbeat will be printed every 30s.\n")
print("Watch for these stages:")
print("  1Ô∏è‚É£  Creating environment")
print("  2Ô∏è‚É£  Installing PyTorch (~2GB)")
print("  3Ô∏è‚É£  Installing packages")
print("  4Ô∏è‚É£  (optional) Compiling llama-cpp (skipped in fast mode)")
print("  5Ô∏è‚É£  Finishing up")
print("\n" + "=" * 60 + "\n")

# Ensure start script executable
try:
    sh("chmod +x start_linux.sh")
except Exception:
    pass

# If env exists, skip installation
start_time = time.time()
returncode = 0
if env_marker.exists():
    print("‚ö° Skipping install; virtualenv already present.")
else:
    install_env = os.environ.copy()
    # Suggested flags; start_linux.sh may read these. They reduce long rebuilds.
    install_env.update({
        "GPU_CHOICE": "A",
        "LAUNCH_AFTER_INSTALL": "FALSE",
        "INSTALL_EXTENSIONS": "FALSE",
        "SKIP_LLAMACPP_BUILD": "TRUE",   # best-effort; depends on start_linux.sh
        "SKIP_TORCH_TEST": "TRUE",       # best-effort
        "FORCE_CUDA": "FALSE",           # avoid forcing heavy cuda builds
        # keep MPLBACKEND set
        "MPLBACKEND": "Agg",
    })

    # Run installer with live output; cwd = WORK_DIR
    cmd = "bash start_linux.sh"
    try:
        returncode = sh_live(cmd, cwd=str(WORK_DIR), env=install_env)
    except Exception as e:
        print(f"‚ö†Ô∏è  Installer execution error: {e}")
        returncode = 1

elapsed = time.time() - start_time
print("\n" + "=" * 60)
if returncode == 0:
    print(f"‚úÖ Installation finished ({elapsed:.1f}s)")
else:
    print(f"‚ö†Ô∏è  Installation completed with warnings or errors (exit code {returncode}, took {elapsed:.1f}s)")
print("=" * 60)

# Attempt to restore llama.cpp folder name so user can opt into building later
renamed = WORK_DIR / "repositories" / "llama.cpp.disabled"
if renamed.exists():
    try:
        restored = renamed.with_name("llama.cpp")
        if not restored.exists():
            renamed.rename(restored)
            print("‚ÑπÔ∏è  Restored repositories/llama.cpp (it won't be auto-built now unless start script forces it).")
    except Exception:
        pass

# CUDA fix
print("\nüîß Setting up CUDA (best-effort)...")
fix_cuda_library_path()

# LAUNCH
print("\n" + "=" * 70)
print("üåê LAUNCHING UI")
print("=" * 70 + "\n")

# kill stray python processes that might block port binding
try:
    sh("pkill -9 -f python")
except Exception:
    pass
time.sleep(1.5)

env = os.environ.copy()
env["MPLBACKEND"] = "Agg"
env.pop("PYTHONPATH", None)

python_exe = str(WORK_DIR / "installer_files/env/bin/python")
if not Path(python_exe).exists():
    python_exe = "python3"

cmd = f"{python_exe} -u server.py --share --listen"
proc = subprocess.Popen(
    cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
    text=True, bufsize=1, env=env, cwd=str(WORK_DIR)
)

shown = False
for line in proc.stdout:
    print(line, end="")
    # detect Gradio public URL
    if not shown and ("Running on public URL:" in line or "Running on local URL:" in line or "Running on " in line):
        m = re.search(r"(https?://[a-z0-9\-\._]+\.gradio\.live\S*)", line)
        if not m:
            m = re.search(r"(https?://\S+:\d+)", line)
        if m:
            print("\n" + "=" * 70)
            print("‚ú® SUCCESS! ‚ú®")
            print("=" * 70)
            print(f"\nüåç {m.group(1)}")
            print("\nüéØ Model tab ‚Üí Select model ‚Üí Load ‚Üí Chat!")
            print("=" * 70 + "\n")
            shown = True

print("\n‚úì Done")


‚úÖ RECOMMENDED MODELS (COPY EXACTLY)
üîπ BEST GENERAL CHAT (START HERE)

Llama-2-7B-Chat

Repo: TheBloke/Llama-2-7B-Chat-GGUF
File: llama-2-7b-chat.Q4_K_M.gguf

üîπ FAST + LIGHT (LOW RAM)

TinyLlama-1.1B-Chat

Repo: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF
File: tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf

üîπ STRONG CHAT (BETTER THAN LLAMA-2)

Mistral-7B-Instruct

Repo: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
File: mistral-7b-instruct-v0.2.Q4_K_M.gguf

üîπ CODING MODEL

Code LLaMA-7B

Repo: TheBloke/CodeLlama-7B-GGUF
File: codellama-7b.Q4_K_M.gguf

üîπ ROLEPLAY / STORY

MythoMax-L2-13B (needs more RAM)

Repo: TheBloke/MythoMax-L2-13B-GGUF
File: mythomax-l2-13b.Q4_K_M.gguf

üîπ VERY FAST / TEST MODEL

Phi-2 (2.7B)

Repo: TheBloke/phi-2-GGUF
File: phi-2.Q4_K_M.gguf

‚öôÔ∏è WHAT LOADER TO USE (IMPORTANT)

For ALL models above:

Loader: llama.cpp


Repo: TheBloke/Llama-2-7B-Chat-GGUF
File: llama-2-7b-chat.Q4_K_M.gguf
