<a href="https://colab.research.google.com/github/gitleon8301/MY-AI-Gizmo-working/blob/main/Colab-TextGen-GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# oobabooga/text-generation-webui

After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.

* Project page: https://github.com/oobabooga/text-generation-webui
* Gradio server status: https://status.gradio.app/

In [None]:
#@title 1. Keep this tab alive to prevent Colab from disconnecting you { display-mode: "form" }

#@markdown Press play on the music player that will appear below:
%%html
<audio src="https://oobabooga.github.io/silence.m4a" controls>

In [None]:
#!/usr/bin/env python3
# ================================================================
# MY-AI-Gizmo ‚Ä¢ CPU-OPTIMIZED LAUNCHER (FIXED VERSION)
# - Forces CPU-only llama.cpp installation (no CUDA dependencies)
# - Fixed URL capture with multiple fallback patterns
# - Improved Gradio share configuration
# - Better error handling and logging
# - Auto-retry on failures
# - Proper model loader configuration
# ================================================================

import os
import subprocess
import shutil
import re
import time
import threading
from pathlib import Path

try:
    from google.colab import drive as colab_drive
    IN_COLAB = True
except Exception:
    colab_drive = None
    IN_COLAB = False

# ---------- Configuration ----------
REPO_ZIP = "https://github.com/gitleon8301/MY-AI-Gizmo-working/archive/refs/heads/main.zip"
WORK_DIR = Path("/content/text-generation-webui")
DRIVE_ROOT = Path("/content/drive/MyDrive/MY-AI-Gizmo")
LOG_DIR = DRIVE_ROOT / "logs"
MPL_CONFIG_DIR = DRIVE_ROOT / "matplotlib"
HEARTBEAT_INTERVAL = 30  # seconds
PUBLIC_URL_FILE = DRIVE_ROOT / "public_url.txt"
# -----------------------------------

def sh(cmd, cwd=None, env=None, check=False):
    return subprocess.run(cmd, shell=True, cwd=cwd, env=env, capture_output=True, text=True, check=check)

def stream_with_heartbeat(cmd, cwd=None, env=None, logfile_path=None, capture_url_to=None):
    """
    Run command, stream output line-by-line, print heartbeat if silent.
    Capture first Gradio/public URL found and optionally save it to capture_url_to.
    Returns (returncode, captured_url_or_None).
    """
    proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
                            cwd=cwd, env=env, text=True, bufsize=1)

    last_output = time.time()
    stop = threading.Event()
    captured_url = None

    # Enhanced URL patterns with priority ordering
    url_patterns = [
        # Gradio public URLs (highest priority)
        re.compile(r'Running on public URL:\s*(https?://[^\s]+\.gradio\.live[^\s,)\'\"]*)', re.IGNORECASE),
        re.compile(r'Public URL:\s*(https?://[^\s]+\.gradio\.live[^\s,)\'\"]*)', re.IGNORECASE),
        re.compile(r'(https?://[a-zA-Z0-9\-]+\.gradio\.live[^\s,)\'\"]*)', re.IGNORECASE),

        # Gradio app URLs
        re.compile(r'(https?://[^\s]+\.gradio\.app[^\s,)\'\"]*)', re.IGNORECASE),

        # Local URLs with port
        re.compile(r'Running on local URL:\s*(https?://[^\s]+:[0-9]+)', re.IGNORECASE),
        re.compile(r'(https?://(?:localhost|127\.0\.0\.1|0\.0\.0\.0):[0-9]+)', re.IGNORECASE),

        # Any HTTPS URL on port
        re.compile(r'(https?://[0-9.]+:[0-9]+)', re.IGNORECASE),
    ]

    def heartbeat():
        while not stop.wait(HEARTBEAT_INTERVAL):
            if time.time() - last_output >= HEARTBEAT_INTERVAL:
                msg = f"[heartbeat] still working... (no output for ~{HEARTBEAT_INTERVAL}s)\n"
                print(msg, end='')
                if logfile_path:
                    try:
                        with open(logfile_path, "a", encoding="utf-8") as f:
                            f.write(msg)
                    except Exception:
                        pass

    hb = threading.Thread(target=heartbeat, daemon=True)
    hb.start()

    logfile = None
    if logfile_path:
        try:
            logfile = open(logfile_path, "a", encoding="utf-8")
        except Exception:
            logfile = None

    try:
        for line in proc.stdout:
            last_output = time.time()
            print(line, end='')
            if logfile:
                try:
                    logfile.write(line)
                except Exception:
                    pass

            # URL capture logic (fixed)
            for pat in url_patterns:
                m = pat.search(line)
                if m:
                    candidate = m.group(1).rstrip(').,\'"')
                    # Prioritize gradio.live URLs
                    if 'gradio.live' in candidate.lower():
                        captured_url = candidate
                        print(f"\n{'='*70}")
                        print(f"üåê PUBLIC URL FOUND: {captured_url}")
                        print(f"{'='*70}\n")
                        if capture_url_to:
                            try:
                                Path(capture_url_to).write_text(captured_url, encoding="utf-8")
                            except Exception:
                                pass
                        break
                    # Use other URLs only if we don't have a gradio.live URL yet
                    elif not captured_url:
                        captured_url = candidate
                        print(f"\nüîó URL DETECTED: {captured_url}\n")
                        if capture_url_to:
                            try:
                                Path(capture_url_to).write_text(captured_url, encoding="utf-8")
                            except Exception:
                                pass

    except Exception as e:
        print(f"[stream error] {e}")
    finally:
        proc.wait()
        stop.set()
        hb.join(timeout=1)
        if logfile:
            try:
                logfile.close()
            except Exception:
                pass

    return proc.returncode, captured_url

def ensure_dirs():
    for d in (DRIVE_ROOT, LOG_DIR, MPL_CONFIG_DIR):
        try:
            d.mkdir(parents=True, exist_ok=True)
        except Exception:
            pass

def download_repo_if_missing():
    if WORK_DIR.exists():
        print(f"[info] WORK_DIR exists: {WORK_DIR}")
        return True
    tmp_zip = Path("/content/repo.zip")
    try:
        tmp_zip.unlink()
    except Exception:
        pass
    print("[info] downloading repository...")
    ok = False
    for cmd in (f"wget -q -O {tmp_zip} {REPO_ZIP}", f"curl -s -L -o {tmp_zip} {REPO_ZIP}"):
        try:
            result = sh(cmd)
            if result.returncode == 0 and tmp_zip.exists() and tmp_zip.stat().st_size > 1000:
                ok = True
                break
        except Exception:
            pass
    if not ok:
        print("[error] download failed. Check network/URL.")
        return False
    print("[info] extracting...")
    try:
        sh(f"unzip -q {tmp_zip} -d /content")
        found = next(Path("/content").glob("MY-AI-Gizmo-working-*"), None)
        if not found:
            print("[error] expected extracted folder not found")
            return False
        found.rename(WORK_DIR)
        print("[info] repo extracted to", WORK_DIR)
        return True
    except Exception as e:
        print("[error] extract failed:", e)
        return False

def ensure_symlinks_and_files():
    links_map = [
        ("models", "models", False),
        ("loras", "loras", False),
        ("user_data/characters", "characters", False),
        ("user_data/presets", "presets", False),
        ("user_data/settings.yaml", "settings/settings.yaml", True),
        ("user_data/settings.json", "settings/settings.json", True),
        ("user_data/chat", "chat-history", False),
        ("outputs", "outputs", False),
    ]
    for local, drive_folder, is_settings in links_map:
        drive_path = DRIVE_ROOT / drive_folder
        if is_settings:
            drive_path.parent.mkdir(parents=True, exist_ok=True)
            if not drive_path.exists():
                try:
                    drive_path.write_text("", encoding="utf-8")
                except Exception:
                    pass
        else:
            drive_path.mkdir(parents=True, exist_ok=True)
        local_path = WORK_DIR / local
        try:
            if local_path.exists() or local_path.is_symlink():
                if local_path.is_symlink():
                    local_path.unlink()
                elif local_path.is_dir():
                    shutil.rmtree(local_path)
                else:
                    local_path.unlink()
        except Exception:
            pass
        try:
            local_path.parent.mkdir(parents=True, exist_ok=True)
            os.symlink(str(drive_path), str(local_path), target_is_directory=drive_path.is_dir())
        except Exception:
            try:
                if drive_path.is_dir():
                    shutil.copytree(drive_path, local_path, dirs_exist_ok=True)
                else:
                    local_path.parent.mkdir(parents=True, exist_ok=True)
                    shutil.copy2(drive_path, local_path)
            except Exception:
                pass

def prepare_settings_file():
    """Create optimized settings for CPU mode with public sharing enabled"""
    drive_settings = DRIVE_ROOT / "settings" / "settings.yaml"
    local_settings = WORK_DIR / "user_data" / "settings.yaml"
    local_settings.parent.mkdir(parents=True, exist_ok=True)

    # Comprehensive settings for CPU mode
    settings_content = """# MY-AI-Gizmo Settings - CPU Mode (Auto-configured)
# Network settings
listen: true
share: true
auto_launch: false

# CPU-optimized loader preferences
loader: llama.cpp

# Performance settings for CPU
n_ctx: 2048
n_batch: 512
threads: 4
n_gpu_layers: 0

# UI settings
chat_style: cai-chat
character: Assistant

# API settings
api: true
api_port: 5000
"""

    try:
        # Write to both locations
        local_settings.write_text(settings_content, encoding="utf-8")
        drive_settings.parent.mkdir(parents=True, exist_ok=True)
        drive_settings.write_text(settings_content, encoding="utf-8")
        print("[‚úì] Settings configured for CPU mode with public sharing")
    except Exception as e:
        print(f"[warn] Could not update settings: {e}")

def cleanup_broken_files(drive_root: Path):
    """Remove incomplete/broken model files from Drive"""
    models_dir = drive_root / "models"
    if not models_dir.exists():
        return
    extensions = ["*.gguf", "*.safetensors", "*.bin", "*.pth", "*.pt"]
    broken = []
    for ext in extensions:
        for f in models_dir.rglob(ext):
            try:
                if f.stat().st_size < (100 * 1024):
                    broken.append(f)
            except Exception:
                pass
    if broken:
        print(f"[info] Removing {len(broken)} broken/incomplete files from Drive models folder")
        for f in broken:
            try:
                f.unlink()
            except Exception:
                pass

def force_cpu_only_llama_cpp():
    """
    Force CPU-only llama-cpp-python installation to avoid CUDA errors.
    This removes any existing GPU-compiled version and reinstalls for CPU.
    """
    print("\nüîß Ensuring CPU-only llama-cpp-python installation...")

    env_marker = WORK_DIR / "installer_files" / "env" / "bin" / "python"
    if not env_marker.exists():
        print("[info] Virtual environment not yet created, will be handled by installer")
        return

    python_exe = str(env_marker)
    pip_exe = str(env_marker.parent / "pip")

    # Uninstall any existing llama-cpp-python (might have CUDA)
    print("[info] Removing any existing llama-cpp-python...")
    sh(f'"{python_exe}" -m pip uninstall -y llama-cpp-python llama-cpp-python-cuda', check=False)

    # Install CPU-only version with explicit flags
    print("[info] Installing llama-cpp-python for CPU-only...")
    cpu_env = os.environ.copy()
    cpu_env.update({
        'CMAKE_ARGS': '-DLLAMA_CUDA=OFF -DLLAMA_CUBLAS=OFF -DLLAMA_METAL=OFF -DLLAMA_OPENCL=OFF -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS',
        'FORCE_CMAKE': '1',
        'CUDACXX': '',  # Prevent CUDA compiler detection
    })

    result = sh(
        f'"{pip_exe}" install llama-cpp-python --no-cache-dir --force-reinstall --upgrade',
        env=cpu_env,
        check=False
    )

    if result.returncode == 0:
        print("[‚úì] llama-cpp-python installed for CPU")
    else:
        print(f"[warn] llama-cpp-python installation returned code {result.returncode}")
        print("[info] Will continue; transformers loader will work as fallback")

def create_model_loader_config():
    """Create a model-config.yaml with CPU-optimized loader settings"""
    config_file = WORK_DIR / "model-config.yaml"
    config_content = """# CPU-Optimized Model Loader Configuration
# This file helps configure models to load properly on CPU

default:
  loader: llama.cpp
  n_gpu_layers: 0
  n_ctx: 2048
  n_batch: 512
  threads: 4
  use_mmap: true
  use_mlock: false

# For GGUF models (recommended for CPU)
*.gguf:
  loader: llama.cpp
  n_gpu_layers: 0
  n_ctx: 2048
  threads: 4

# For transformers models
*.safetensors:
  loader: Transformers
  cpu: true
  load_in_8bit: false
  load_in_4bit: false
"""

    try:
        config_file.write_text(config_content, encoding="utf-8")
        print(f"[‚úì] Created model loader config: {config_file}")
    except Exception as e:
        print(f"[warn] Could not create model config: {e}")

def create_cpu_requirements():
    """Create a requirements override file for CPU-only dependencies"""
    req_file = WORK_DIR / "requirements_cpu.txt"
    cpu_requirements = """# CPU-only requirements override
torch
torchvision
torchaudio
llama-cpp-python
transformers>=4.35.0
accelerate
sentencepiece
protobuf
gradio>=3.50.0
"""
    try:
        req_file.write_text(cpu_requirements, encoding="utf-8")
        print(f"[‚úì] Created {req_file}")
    except Exception as e:
        print(f"[warn] Could not create CPU requirements file: {e}")

def patch_gradio_launch():
    """
    Patch the server.py to ensure share=True is properly set
    """
    server_py = WORK_DIR / "server.py"
    if not server_py.exists():
        print("[warn] server.py not found, cannot patch")
        return

    try:
        content = server_py.read_text(encoding="utf-8")

        # Look for the main gradio launch section
        # Add explicit share=True if not present
        if 'demo.queue()' in content and 'share=' not in content:
            content = content.replace(
                'demo.queue()',
                'demo.queue()\n    # Force public URL sharing\n    shared_settings["share"] = True'
            )

        # Ensure launch has share parameter
        if '.launch(' in content:
            # Add share parameter if missing
            if 'share=' not in content:
                content = re.sub(
                    r'\.launch\((.*?)\)',
                    r'.launch(\1, share=True)',
                    content
                )

        server_py.write_text(content, encoding="utf-8")
        print("[‚úì] Patched server.py for public URL sharing")
    except Exception as e:
        print(f"[warn] Could not patch server.py: {e}")

# ---------- Main flow ----------
print("=" * 70)
print("MY-AI-Gizmo CPU-Optimized Launcher (FIXED)")
print("=" * 70)

ensure_dirs()

if IN_COLAB:
    try:
        print("[info] Mounting Google Drive...")
        colab_drive.mount("/content/drive", force_remount=False)
        print("[‚úì] Google Drive mounted")
    except Exception as e:
        print(f"[warn] Could not mount Drive: {e}")

cleanup_broken_files(DRIVE_ROOT)

# Download and extract repository
if not download_repo_if_missing() and not WORK_DIR.exists():
    raise SystemExit("Repository unavailable. Fix network/REPO_ZIP and retry.")

os.chdir(WORK_DIR)

ensure_symlinks_and_files()
prepare_settings_file()
create_cpu_requirements()
create_model_loader_config()

# CRITICAL: Remove llama.cpp repository to prevent compilation attempts
llama_dir = WORK_DIR / "repositories" / "llama.cpp"
llama_backup = WORK_DIR / "repositories" / "llama.cpp.backup"
try:
    if llama_dir.exists():
        if llama_backup.exists():
            shutil.rmtree(llama_backup, ignore_errors=True)
        print("[info] Moving repositories/llama.cpp to .backup (prevents build attempts)")
        llama_dir.rename(llama_backup)
except Exception as e:
    print(f"[warn] Could not move llama.cpp directory: {e}")

# Remove any existing CUDA-compiled llama.cpp binaries
llama_binaries = WORK_DIR / "installer_files" / "env" / "lib"
if llama_binaries.exists():
    for pattern in ["**/llama_cpp_binaries", "**/llama-cpp-python*"]:
        for path in llama_binaries.glob(pattern):
            try:
                if path.is_dir():
                    shutil.rmtree(path, ignore_errors=True)
                    print(f"[info] Removed existing binaries: {path.name}")
            except Exception:
                pass

# Prepare environment for CPU-only installation
MPL_CONFIG_DIR.mkdir(parents=True, exist_ok=True)
start_sh = WORK_DIR / "start_linux.sh"
installer_log = LOG_DIR / f"installer_{int(time.time())}.log"
env_marker = WORK_DIR / "installer_files" / "env" / "bin" / "python"

install_env = os.environ.copy()
install_env.update({
    # Matplotlib backend
    "MPLBACKEND": "Agg",
    "MPLCONFIGDIR": str(MPL_CONFIG_DIR),

    # Installation options
    "GPU_CHOICE": "N",  # NO GPU
    "LAUNCH_AFTER_INSTALL": "FALSE",
    "INSTALL_EXTENSIONS": "FALSE",

    # Force CPU-only builds
    "CUDA_VISIBLE_DEVICES": "",  # Hide any GPU
    "TORCH_CUDA_ARCH_LIST": "",  # No CUDA architectures
    "CMAKE_ARGS": "-DLLAMA_CUDA=OFF -DLLAMA_CUBLAS=OFF -DLLAMA_METAL=OFF -DLLAMA_OPENCL=OFF",
    "FORCE_CMAKE": "1",
    "CUDACXX": "",  # Prevent CUDA compiler detection

    # Skip tests and builds
    "SKIP_LLAMACPP_BUILD": "TRUE",
    "SKIP_TORCH_TEST": "TRUE",
    "FORCE_CUDA": "FALSE",
})

print("\nüì¶ Installing dependencies (CPU-only mode)...")
print(f"Installer log -> {installer_log}")

if start_sh.exists():
    sh("chmod +x start_linux.sh")

    if not env_marker.exists():
        print("[info] Running installer...")
        code, url = stream_with_heartbeat(
            "bash start_linux.sh",
            cwd=str(WORK_DIR),
            env=install_env,
            logfile_path=str(installer_log),
            capture_url_to=str(PUBLIC_URL_FILE)
        )

        if code != 0:
            print(f"[warn] Installer exited with code {code}. See {installer_log}")
        else:
            print(f"[‚úì] Installer completed. Log: {installer_log}")
    else:
        print("[info] Virtual environment exists, skipping full install")

    # After installation, force CPU-only llama-cpp-python
    force_cpu_only_llama_cpp()

    # Patch server for public URL
    patch_gradio_launch()

else:
    print("[error] start_linux.sh not found!")
    raise SystemExit("Cannot proceed without installation script")

# Create enhanced server wrapper
wrapper = WORK_DIR / "_run_server_cpu.py"
wrapper_code = f"""# CPU-optimized server wrapper with enhanced URL sharing
import os
import sys

# Force CPU-only environment
os.environ['CUDA_VISIBLE_DEVICES'] = ''
os.environ['MPLBACKEND'] = 'Agg'
os.environ['MPLCONFIGDIR'] = r'{MPL_CONFIG_DIR}'

# Force matplotlib Agg backend
try:
    import matplotlib
    matplotlib.use('Agg', force=True)
except Exception:
    pass

# Disable CUDA in PyTorch if present
try:
    import torch
    if hasattr(torch, 'cuda'):
        torch.cuda.is_available = lambda: False
        print("[CPU MODE] PyTorch CUDA disabled")
except Exception:
    pass

# Ensure gradio sharing is enabled
print("[INFO] Starting server with public URL sharing enabled...")

# Run the actual server
import runpy
runpy.run_path('server.py', run_name='__main__')
"""

try:
    wrapper.write_text(wrapper_code, encoding="utf-8")
    wrapper.chmod(0o755)
    print("[‚úì] Created CPU-optimized server wrapper")
except Exception as e:
    print(f"[warn] Could not create wrapper: {e}")

# Kill any stray processes
try:
    sh("pkill -9 -f 'python.*server.py'")
    sh("pkill -9 -f 'python.*gradio'")
except Exception:
    pass
time.sleep(2)

# Launch the server with explicit share flags
server_log = LOG_DIR / f"server_{int(time.time())}.log"
python_exe = str(env_marker) if env_marker.exists() else "python3"

# Create enhanced launch wrapper
launch_wrapper = WORK_DIR / "_launch_with_share.py"
launch_wrapper_code = f"""# Enhanced launch wrapper with guaranteed public URL sharing
import sys
import os

# CPU environment
os.environ['CUDA_VISIBLE_DEVICES'] = ''
os.environ['MPLBACKEND'] = 'Agg'
os.environ['MPLCONFIGDIR'] = r'{MPL_CONFIG_DIR}'

# Ensure these flags are passed to the server
# Add them to sys.argv before importing
if '--listen' not in sys.argv:
    sys.argv.append('--listen')
if '--share' not in sys.argv:
    sys.argv.append('--share')
if '--auto-launch' not in sys.argv:
    sys.argv.append('--auto-launch')

print("[INFO] Launch flags:", ' '.join(sys.argv))

try:
    import matplotlib
    matplotlib.use('Agg', force=True)
except:
    pass

try:
    import torch
    if hasattr(torch, 'cuda'):
        torch.cuda.is_available = lambda: False
        print("[CPU MODE] CUDA disabled")
except:
    pass

import runpy
runpy.run_path('server.py', run_name='__main__')
"""
try:
    launch_wrapper.write_text(launch_wrapper_code, encoding="utf-8")
    print("[‚úì] Created launch wrapper with share flags")
except Exception:
    pass

launch_cmd = f'{python_exe} -u "{launch_wrapper.name}"'

server_env = os.environ.copy()
server_env.update({
    "CUDA_VISIBLE_DEVICES": "",
    "MPLBACKEND": "Agg",
    "MPLCONFIGDIR": str(MPL_CONFIG_DIR),
    "GRADIO_SERVER_NAME": "0.0.0.0",  # Listen on all interfaces
    "GRADIO_SHARE": "1",  # Force sharing
})

print("\n" + "=" * 70)
print("üöÄ LAUNCHING WEB UI (CPU MODE WITH PUBLIC URL)")
print("=" * 70)
print(f"Server log -> {server_log}")
print("\n‚öôÔ∏è  CPU OPTIMIZATION TIPS:")
print("  ‚Ä¢ Use GGUF models (best CPU performance)")
print("  ‚Ä¢ Recommended: llama.cpp loader")
print("  ‚Ä¢ Fallback: Transformers loader")
print("  ‚Ä¢ Keep context length ‚â§ 2048 for faster response")
print("=" * 70 + "\n")

print("‚è≥ Starting server (this may take 1-2 minutes)...")
print("   Looking for public URL...\n")

code, captured = stream_with_heartbeat(
    launch_cmd,
    cwd=str(WORK_DIR),
    env=server_env,
    logfile_path=str(server_log),
    capture_url_to=str(PUBLIC_URL_FILE)
)

print("\n" + "=" * 70)
if captured:
    print(f"‚úÖ WEB UI AVAILABLE!")
    print(f"üåê PUBLIC URL: {captured}")
    print("=" * 70)
    try:
        PUBLIC_URL_FILE.write_text(captured, encoding="utf-8")
        print(f"[‚úì] URL saved to: {PUBLIC_URL_FILE}")
    except Exception:
        pass
    print("\nüìã NEXT STEPS:")
    print("  1. Click the URL above to open the Web UI")
    print("  2. Go to 'Model' tab")
    print("  3. Select 'llama.cpp' as loader")
    print("  4. Download a GGUF model or use transformers")
    print("  5. Load the model and start chatting!")
else:
    print("‚ö†Ô∏è  NO PUBLIC URL CAPTURED")
    print("=" * 70)
    print("\nüîç TROUBLESHOOTING:")
    print(f"  ‚Ä¢ Check server log: {server_log}")
    print("  ‚Ä¢ The server may still be running locally")
    print("  ‚Ä¢ Try checking the last 50 lines of the log:")
    print(f"    tail -n 50 {server_log}")

    # Try to read the URL from file
    if PUBLIC_URL_FILE.exists():
        try:
            saved_url = PUBLIC_URL_FILE.read_text().strip()
            if saved_url:
                print(f"\n  ‚Ä¢ Previously saved URL found: {saved_url}")
        except:
            pass

if code != 0:
    print(f"\n[warn] Server exited with code {code}")
    print(f"[info] Check logs: {server_log}")
else:
    print("\n[info] Server terminated normally")

print("\n‚úÖ Persistent data location:", DRIVE_ROOT)
print("   ‚îú‚îÄ‚îÄ models/      <- Place your GGUF models here")
print("   ‚îú‚îÄ‚îÄ characters/  <- Character cards")
print("   ‚îú‚îÄ‚îÄ logs/        <- All logs")
print("   ‚îî‚îÄ‚îÄ settings/    <- Configuration files")
print("=" * 70)

‚úÖ RECOMMENDED MODELS (COPY EXACTLY)
üîπ BEST GENERAL CHAT (START HERE)

Llama-2-7B-Chat

Repo: TheBloke/Llama-2-7B-Chat-GGUF
File: llama-2-7b-chat.Q4_K_M.gguf

üîπ FAST + LIGHT (LOW RAM)

TinyLlama-1.1B-Chat

Repo: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF
File: tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf

üîπ STRONG CHAT (BETTER THAN LLAMA-2)

Mistral-7B-Instruct

Repo: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
File: mistral-7b-instruct-v0.2.Q4_K_M.gguf

üîπ CODING MODEL

Code LLaMA-7B

Repo: TheBloke/CodeLlama-7B-GGUF
File: codellama-7b.Q4_K_M.gguf

üîπ ROLEPLAY / STORY

MythoMax-L2-13B (needs more RAM)

Repo: TheBloke/MythoMax-L2-13B-GGUF
File: mythomax-l2-13b.Q4_K_M.gguf

üîπ VERY FAST / TEST MODEL

Phi-2 (2.7B)

Repo: TheBloke/phi-2-GGUF
File: phi-2.Q4_K_M.gguf

‚öôÔ∏è WHAT LOADER TO USE (IMPORTANT)

For ALL models above:

Loader: llama.cpp


Repo: TheBloke/Llama-2-7B-Chat-GGUF
File: llama-2-7b-chat.Q4_K_M.gguf
