<a href="https://colab.research.google.com/github/gitleon8301/MY-AI-Gizmo-working/blob/main/Colab-TextGen-GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# oobabooga/text-generation-webui

After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.

* Project page: https://github.com/oobabooga/text-generation-webui
* Gradio server status: https://status.gradio.app/

In [None]:
#@title 1. Keep this tab alive to prevent Colab from disconnecting you { display-mode: "form" }

#@markdown Press play on the music player that will appear below:
%%html
<audio src="https://oobabooga.github.io/silence.m4a" controls>

In [None]:
# LLAMA MODELS - NO TOKEN - SIMPLE DOWNLOAD
import os, shutil, subprocess, sys, time
from pathlib import Path

# ---------- LLAMA MODEL CHOICES ----------

# ü¶ô OPTION 1: Llama 3.2 3B (NEWEST, SMALL, FAST)
MODEL_REPO = "bartowski/Llama-3.2-3B-Instruct-GGUF"
SPECIFIC_FILE = "Llama-3.2-3B-Instruct-Q5_K_M.gguf"
# Size: 2.6GB | Speed: 3-5 tok/s | RAM: 5GB | NO TOKEN NEEDED

# ü¶ô OPTION 2: Llama 3.1 8B (BETTER QUALITY, STILL FAST)
# MODEL_REPO = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
# SPECIFIC_FILE = "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
# Size: 4.9GB | Speed: 2-3 tok/s | RAM: 7GB | NO TOKEN NEEDED

# ü¶ô OPTION 3: Llama 2 7B Chat (CLASSIC, PROVEN)
# MODEL_REPO = "TheBloke/Llama-2-7B-Chat-GGUF"
# SPECIFIC_FILE = "llama-2-7b-chat.Q4_K_M.gguf"
# Size: 4.1GB | Speed: 2-3 tok/s | RAM: 7GB | NO TOKEN NEEDED

# ü¶ô OPTION 4: Llama 3.1 70B (BEST QUALITY - 42GB!)
# MODEL_REPO = "bartowski/Meta-Llama-3.1-70B-Instruct-GGUF"
# SPECIFIC_FILE = "Meta-Llama-3.1-70B-Instruct-Q3_K_M.gguf"
# Size: 29GB | Speed: 0.3 tok/s | RAM: 35GB | NO TOKEN NEEDED
# ‚ö†Ô∏è TOO BIG FOR COLAB FREE - Need Colab Pro or local machine

# All these are COMMUNITY UPLOADS - NO TOKEN REQUIRED!
# ------------------------------------------

GIT_URL = "https://github.com/gitleon8301/MY-AI-Gizmo-working"
REPO_DIR_NAME = "text-generation-webui"

# Download method: try both HF and wget
USE_HF = True  # Set to False to skip HuggingFace library entirely

DOWNLOAD_LOG = Path("download.log")
START_LOG = Path("webui.log")

cwd = Path.cwd()
repo_dir = cwd / REPO_DIR_NAME
models_dir = cwd / "models" / MODEL_REPO.replace("/", "_")
models_dir.mkdir(parents=True, exist_ok=True)

# Force CPU
os.environ["CUDA_VISIBLE_DEVICES"] = ""

def run(cmd, cwd=None, env=None, logfile=None, check=False):
    print(f"\n>>> {cmd}")
    with subprocess.Popen(cmd, shell=True, cwd=cwd, env=env or os.environ,
                          stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as p:
        for line in p.stdout:
            if logfile:
                with open(logfile, "a") as f:
                    f.write(line)
            print(line, end="")
        p.wait()
        if check and p.returncode != 0:
            raise subprocess.CalledProcessError(p.returncode, cmd)
        return p.returncode

print("=" * 70)
print(f"ü¶ô MODEL: {MODEL_REPO}")
print(f"üì¶ FILE: {SPECIFIC_FILE}")
print("‚úÖ NO TOKEN NEEDED - Community upload!")
print("=" * 70)

# 1) Clone repo
if not repo_dir.exists():
    print("\nüì¶ Cloning web-UI...")
    run(f"git clone {GIT_URL} {REPO_DIR_NAME}", check=True)

os.chdir(repo_dir)

# 2) Install
print("\nüîß Installing (CPU mode)...")
run('GPU_CHOICE=N LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=FALSE ./start_linux.sh',
    check=False, logfile=START_LOG)

# 3) Download model
target_file = models_dir / SPECIFIC_FILE
download_success = False

if target_file.exists() and target_file.stat().st_size > 1_000_000:
    print(f"\n‚úì Model already exists: {target_file}")
    download_success = True

# METHOD 1: Try HuggingFace Hub (simple, resumable)
if not download_success and USE_HF:
    print("\n‚¨áÔ∏è  Method 1: Downloading via HuggingFace Hub...")
    print("üì• Installing huggingface_hub...")
    run("pip install -q huggingface_hub", logfile=DOWNLOAD_LOG)

    try:
        from huggingface_hub import hf_hub_download
        print(f"üì• Downloading {SPECIFIC_FILE}...")
        downloaded = hf_hub_download(
            repo_id=MODEL_REPO,
            filename=SPECIFIC_FILE,
            local_dir=str(models_dir),
            resume_download=True
            # NO TOKEN PARAMETER - not needed!
        )
        print(f"‚úì Downloaded: {downloaded}")
        download_success = True
    except Exception as e:
        print(f"‚ùå HF download failed: {e}")
        print("Trying alternative method...")

# METHOD 2: Direct wget (no HF dependency)
if not download_success:
    print("\n‚¨áÔ∏è  Method 2: Direct download via wget (no HuggingFace needed)...")
    file_url = f"https://huggingface.co/{MODEL_REPO}/resolve/main/{SPECIFIC_FILE}"
    print(f"üì• URL: {file_url}")
    print(f"üìÅ Saving to: {target_file}")

    try:
        # wget -c allows resuming if interrupted
        ret = run(f"wget -c '{file_url}' -O '{target_file}'", logfile=DOWNLOAD_LOG, check=False)
        if ret == 0 and target_file.exists() and target_file.stat().st_size > 1_000_000:
            print(f"‚úì Downloaded via wget: {target_file}")
            download_success = True
        else:
            print(f"‚ùå Wget failed (exit code {ret})")
    except Exception as e:
        print(f"‚ùå Wget error: {e}")

# METHOD 3: Manual instructions
if not download_success:
    print("\n" + "=" * 70)
    print("‚ùå AUTOMATIC DOWNLOAD FAILED")
    print("=" * 70)
    print("\nüì• MANUAL DOWNLOAD INSTRUCTIONS:")
    print(f"\n1. Open this URL in your browser:")
    print(f"   https://huggingface.co/{MODEL_REPO}/blob/main/{SPECIFIC_FILE}")
    print(f"\n2. Click the 'download' button")
    print(f"\n3. In Colab, click the folder icon (üìÅ) in left sidebar")
    print(f"\n4. Navigate to: {models_dir}")
    print(f"\n5. Click upload button and select the downloaded file")
    print(f"\n6. Re-run this cell after upload completes")
    print("\n" + "=" * 70)
    raise SystemExit(1)

# 4) Verify
gguf_files = list(models_dir.glob("*.gguf"))
if not gguf_files:
    print(f"\n‚ùå No .gguf files in {models_dir}")
    raise SystemExit(1)

print(f"\n‚úì Model ready!")
run(f"ls -lh {models_dir}")

# 5) Start
print("\n" + "=" * 70)
print("üöÄ STARTING WEB UI")
print("=" * 70)

model_name = MODEL_REPO.replace('/', '_')
start_cmd = (
    f"python server.py "
    f"--cpu "
    f"--share "
    f"--model-dir {models_dir.parent} "
    f"--model {model_name} "
    f"--n-gpu-layers 0 "
    f"--threads {os.cpu_count() or 4}"
)

print("\nüåê Look for the Gradio public URL below...")
print("üìù Logs:", START_LOG)
print("=" * 70 + "\n")

try:
    run(start_cmd, logfile=START_LOG)
except KeyboardInterrupt:
    print("\n‚èπÔ∏è  Stopped")
except Exception as e:
    print(f"\n‚ùå Error: {e}")
    print("\nüìã Last 50 log lines:")
    run(f"tail -n 50 {START_LOG}")
    raise