<a href="https://colab.research.google.com/github/gitleon8301/MY-AI-Gizmo-working/blob/main/Colab-TextGen-GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# oobabooga/text-generation-webui

After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.

* Project page: https://github.com/oobabooga/text-generation-webui
* Gradio server status: https://status.gradio.app/

In [None]:
#@title 1. Keep this tab alive to prevent Colab from disconnecting you { display-mode: "form" }

#@markdown Press play on the music player that will appear below:
%%html
<audio src="https://oobabooga.github.io/silence.m4a" controls>

In [None]:
#@title Launch AI Gizmo with Llama 2 7B (Everything Saves to Drive)

import os
import re
import subprocess
from pathlib import Path
from google.colab import drive
import shutil

# Mount Google Drive
print("\033[1;32;1m\n --> Mounting Google Drive...\033[0;37;0m\n")
drive.mount('/content/drive')

# Set up main directory in Google Drive
DRIVE_PATH = '/content/drive/MyDrive/MY-AI-Gizmo'
os.makedirs(DRIVE_PATH, exist_ok=True)

os.environ.pop('PYTHONPATH', None)
os.environ.pop('MPLBACKEND', None)

# Clone or update repository
if not Path(f'{DRIVE_PATH}/.git').exists():
  print("\033[1;32;1m\n --> Installing AI Gizmo in Google Drive (first time setup)...\033[0;37;0m\n")

  %cd /content/drive/MyDrive

  # Clone the repo
  !git clone https://github.com/gitleon8301/MY-AI-Gizmo-working.git MY-AI-Gizmo
  %cd MY-AI-Gizmo

  # Install dependencies
  print("\033[1;32;1m --> Installing dependencies...\033[0;37;0m\n")
  !pip install -q -r requirements.txt

else:
  print("\033[1;32;1m\n --> Using existing AI Gizmo installation from Drive\033[0;37;0m\n")
  %cd {DRIVE_PATH}

  # Update the repo
  !git pull

# Create necessary directories in Drive
for folder in ['models', 'characters', 'presets', 'prompts', 'user_data', 'logs', 'extensions']:
    os.makedirs(f'{DRIVE_PATH}/{folder}', exist_ok=True)

# Detect GPU
try:
    import torch
    has_gpu = torch.cuda.is_available()
    if has_gpu:
        gpu_name = torch.cuda.get_device_name(0)
        print(f"\033[1;32;1m ‚úì GPU detected: {gpu_name}\033[0;37;0m")
    else:
        print("\033[1;33;1m ‚ö† No GPU detected - using CPU mode\033[0;37;0m")
except:
    has_gpu = False
    print("\033[1;33;1m ‚ö† No GPU detected - using CPU mode\033[0;37;0m")

# Model configuration - Llama 2 7B Chat
model_url = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF"
model_file = "llama-2-7b-chat.Q4_K_M.gguf"  # 4-bit quantized, good balance
output_folder = "TheBloke_Llama-2-7B-Chat-GGUF"

model_path = Path(f"{DRIVE_PATH}/models/{output_folder}")

# Download model if not exists
if not model_path.exists():
    print(f"\033[1;32;1m --> Downloading Llama 2 7B Chat to Drive...\033[0;37;0m\n")
    os.makedirs(model_path, exist_ok=True)

    # Download using huggingface-cli
    !pip install -q huggingface_hub

    from huggingface_hub import hf_hub_download

    try:
        downloaded_file = hf_hub_download(
            repo_id="TheBloke/Llama-2-7B-Chat-GGUF",
            filename=model_file,
            local_dir=str(model_path),
            local_dir_use_symlinks=False
        )
        print(f"\033[1;32;1m ‚úì Model downloaded successfully!\033[0;37;0m\n")
    except Exception as e:
        print(f"\033[1;31;1m ‚úó Download failed: {e}\033[0;37;0m\n")
else:
    # Check if model file exists
    if (model_path / model_file).exists():
        print(f"\033[1;32;1m ‚úì Llama 2 7B already exists in Drive\033[0;37;0m\n")
    else:
        print(f"\033[1;33;1m ‚ö† Model folder exists but file missing. Re-downloading...\033[0;37;0m\n")

        !pip install -q huggingface_hub
        from huggingface_hub import hf_hub_download

        try:
            downloaded_file = hf_hub_download(
                repo_id="TheBloke/Llama-2-7B-Chat-GGUF",
                filename=model_file,
                local_dir=str(model_path),
                local_dir_use_symlinks=False
            )
            print(f"\033[1;32;1m ‚úì Model downloaded successfully!\033[0;37;0m\n")
        except Exception as e:
            print(f"\033[1;31;1m ‚úó Download failed: {e}\033[0;37;0m\n")

# Configure launch command
if has_gpu:
    # GPU settings for T4
    cmd = f"""python server.py \
    --model {output_folder}/{model_file} \
    --n-gpu-layers 35 \
    --api \
    --share \
    --listen \
    --verbose"""
else:
    # CPU settings
    cmd = f"""python server.py \
    --model {output_folder}/{model_file} \
    --cpu \
    --threads 2 \
    --api \
    --share \
    --listen \
    --verbose"""

# Display storage info
print("\n" + "\033[1;36;1m" + "="*80)
print("üíæ PERSISTENT STORAGE:")
print("="*80)
print(f"üìÅ Root:        {DRIVE_PATH}")
print(f"ü§ñ Model:       {DRIVE_PATH}/models/{output_folder}")
print(f"üí¨ Chats:       {DRIVE_PATH}/characters")
print(f"üìù All data saves automatically to Google Drive!")
print("="*80 + "\033[0;37;0m\n")

# Start the server
print("\n" + "\033[1;35;1m" + "="*80)
print("üöÄ STARTING LLAMA 2 7B CHAT")
print("="*80 + "\033[0;37;0m\n")

process = subprocess.Popen(
    cmd,
    shell=True,
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    universal_newlines=True,
    bufsize=1,
    cwd=DRIVE_PATH
)

# Monitor for URLs
local_url = None
public_url = None
urls_displayed = False

for line in iter(process.stdout.readline, ''):
    print(line, end='')

    # Capture URLs
    if not local_url:
        local_match = re.search(r'(http://(?:127\.0\.0\.1|0\.0\.0\.0|localhost):\d+)', line)
        if local_match:
            local_url = local_match.group(1)

    if not public_url:
        public_match = re.search(r'(https://[a-z0-9\-]+\.gradio\.live)', line)
        if public_match:
            public_url = public_match.group(1)

    # Display when ready
    if local_url and public_url and not urls_displayed:
        print("\n" + "\033[1;32;1m" + "="*80)
        print("üéâ LLAMA 2 7B CHAT IS READY!")
        print("="*80)
        print(f"\nüìç LOCAL URL:  {local_url}")
        print(f"üåê PUBLIC URL: {public_url}")
        print(f"\nüí° Click the PUBLIC URL to start chatting!")
        print(f"üíæ All conversations save to: {DRIVE_PATH}/characters")
        print("="*80 + "\033[0;37;0m\n")
        urls_displayed = True

process.wait()