In [1]:
# 🚀 SETUP: Mount Drive and Install Dependencies
import os, subprocess, time, requests, threading, re
from google.colab import drive

# 🚀 Mount Google Drive and install system packages
from google.colab import drive
drive.mount('/content/drive')

!apt -y update && apt -y install -qq build-essential aria2
!pip install -q huggingface_hub

Mounted at /content/drive
Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:4 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:6 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease [24.3 kB]
Get:9 https://r2u.stat.illinois.edu/ubuntu jammy/main amd64 Packages [2,749 kB]
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:11 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy/main amd64 Packages [51.0 kB]
Get:12 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:13 http://archive

In [2]:
import shutil

# Paths
drive_path = '/content/drive/MyDrive/koboldcpp_build'
local_path = '/content/koboldcpp'

# Build or restore KoboldCPP
if os.path.exists(drive_path) and os.path.exists(f'{drive_path}/koboldcpp_cublas.so'):
    print("Restoring KoboldCPP from Drive...")
    !mkdir -p {local_path}
    !cp -r {drive_path}/* {local_path}/
    print("✅ KoboldCPP restored!")
else:
    print("Building KoboldCPP from scratch...")
    !apt -y install -qq build-essential
    !git clone https://github.com/LostRuins/koboldcpp.git
    %cd koboldcpp
    !make LLAMA_CUBLAS=1 CUDA_DOCKER_ARCH=compute_80,code=sm_80
    !mkdir -p {drive_path}
    !cp -r /content/koboldcpp/* {drive_path}/
    print("✅ KoboldCPP built and saved to Drive!")

# Check GPU
!nvidia-smi


Restoring KoboldCPP from Drive...
✅ KoboldCPP restored!
Wed Jul  2 18:13:19 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   31C    P0             48W /  400W |       0MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------

In [4]:
# 📥 Download GGUF model
from huggingface_hub import hf_hub_download

model_path = hf_hub_download(
    repo_id="mradermacher/DeepSeek-R1-Distill-Qwen-32B-Uncensored-GGUF",
    filename="DeepSeek-R1-Distill-Qwen-32B-Uncensored.Q5_K_M.gguf")

print(f"✅ Model downloaded to: {model_path}")


DeepSeek-R1-Distill-Qwen-32B-Uncensored.(…):   0%|          | 0.00/23.3G [00:00<?, ?B/s]

✅ Model downloaded to: /root/.cache/huggingface/hub/models--mradermacher--DeepSeek-R1-Distill-Qwen-32B-Uncensored-GGUF/snapshots/4b072f663b4b51bd05813ba5a3c812e16e4fba0d/DeepSeek-R1-Distill-Qwen-32B-Uncensored.Q5_K_M.gguf


In [5]:
# 🎨 Set up Stable Diffusion WebUI with epicRealismXL from Drive
import os

%cd /content
if not os.path.exists("stable-diffusion-webui"):
    !git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui.git

%cd stable-diffusion-webui

model_in_drive = "/content/drive/MyDrive/EpicRealism/epicrealismXL_vxviiCrystalclear.safetensors"
target_path = "models/Stable-diffusion/epicRealismXL.safetensors"
!mkdir -p models/Stable-diffusion

if os.path.exists(model_in_drive):
    if not os.path.exists(target_path):
        !ln -s "{model_in_drive}" "{target_path}"
        print("✅ Model linked from Drive")
    else:
        print("🔁 Model already linked")
else:
    print("❌ Model not found in Drive")


/content
Cloning into 'stable-diffusion-webui'...
remote: Enumerating objects: 34968, done.[K
remote: Total 34968 (delta 0), reused 0 (delta 0), pack-reused 34968 (from 1)[K
Receiving objects: 100% (34968/34968), 35.54 MiB | 14.17 MiB/s, done.
Resolving deltas: 100% (24409/24409), done.
/content/stable-diffusion-webui
✅ Model linked from Drive


In [6]:
# 📥 Download cloudflared
!wget -q -O /content/cloudflared https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64
!chmod +x /content/cloudflared

In [7]:
# 🚀 Launch Stable Diffusion WebUI in background
import subprocess, time, requests

%cd /content/stable-diffusion-webui

webui_cmd = [
    "python", "launch.py", "--deepdanbooru",
    "--api", "--listen", "--port", "7860",
    "--skip-torch-cuda-test", "--no-half-vae",
    "--ckpt", "models/Stable-diffusion/epicRealismXL.safetensors"
]

subprocess.Popen(webui_cmd)
print("✅ WebUI launched in background. Give it 60–90 seconds to initialize.")

# Optional: wait for API to become available
print("⏳ Waiting for Stable Diffusion API...")
for i in range(600):
    try:
        r = requests.get("http://127.0.0.1:7860/sdapi/v1/sd-models", timeout=5)
        if r.status_code == 200:
            print("✅ Stable Diffusion API is ready!")
            break
    except:
        pass
    if i % 30 == 0 and i > 0:
        print(f"Still waiting... ({i}s)")
    time.sleep(1)
else:
    print("❌ Stable Diffusion API did not respond in time.")


/content/stable-diffusion-webui
✅ WebUI launched in background. Give it 60–90 seconds to initialize.
⏳ Waiting for Stable Diffusion API...
Still waiting... (30s)
Still waiting... (60s)
Still waiting... (90s)
Still waiting... (120s)
Still waiting... (150s)
✅ Stable Diffusion API is ready!


In [8]:
# 🌐 Start Cloudflare tunnel to expose WebUI
import subprocess, re

tunnel_cmd = "/content/cloudflared tunnel --url http://localhost:7860 --no-autoupdate"

print("⏳ Starting Cloudflare tunnel for WebUI...")
tunnel_proc = subprocess.Popen(tunnel_cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)

# Print the tunnel URL when it appears
for line in tunnel_proc.stdout:
    print(line.strip())
    match = re.search(r'https://[a-zA-Z0-9\-]+\.trycloudflare\.com', line)
    if match:
        print(f"\n🌐 Public WebUI URL: {match.group(0)}")
        break

⏳ Starting Cloudflare tunnel for WebUI...
2025-07-02T18:30:40Z INF Thank you for trying Cloudflare Tunnel. Doing so, without a Cloudflare account, is a quick way to experiment and try it out. However, be aware that these account-less Tunnels have no uptime guarantee, are subject to the Cloudflare Online Services Terms of Use (https://www.cloudflare.com/website-terms/), and Cloudflare reserves the right to investigate your use of Tunnels for violations of such terms. If you intend to use Tunnels in production you should use a pre-created named tunnel by following: https://developers.cloudflare.com/cloudflare-one/connections/connect-apps
2025-07-02T18:30:40Z INF Requesting new quick Tunnel on trycloudflare.com...
2025-07-02T18:30:45Z INF +--------------------------------------------------------------------------------------------+
2025-07-02T18:30:45Z INF |  Your quick Tunnel has been created! Visit it at (it may take some time to be reachable):  |
2025-07-02T18:30:45Z INF |  https://fol

In [None]:
%cd /content/koboldcpp

!python3 koboldcpp.py \
  --model {model_path} \
  --port 5001 \
  --host 0.0.0.0 \
  --remotetunnel \
  --threads 4 \
  --usecublas \
  --gpulayers 32 \
  --contextsize 4096


/content/koboldcpp
***
Welcome to KoboldCpp - Version 1.95.1
cloudflared-linux-amd64 already exists, using existing file.
Attempting to start tunnel thread...
Loading Chat Completions Adapter: /content/koboldcpp/kcpp_adapters/AutoGuess.json
Chat Completions Adapter Loaded
Detected Available GPU Memory: 40960 MB
Detected Available RAM: 81827 MB
Initializing dynamic library: koboldcpp_cublas.so
Starting Cloudflare Tunnel for Linux, please wait...
Namespace(model=['/root/.cache/huggingface/hub/models--mradermacher--DeepSeek-R1-Distill-Qwen-32B-Uncensored-GGUF/snapshots/4b072f663b4b51bd05813ba5a3c812e16e4fba0d/DeepSeek-R1-Distill-Qwen-32B-Uncensored.Q5_K_M.gguf'], model_param='/root/.cache/huggingface/hub/models--mradermacher--DeepSeek-R1-Distill-Qwen-32B-Uncensored-GGUF/snapshots/4b072f663b4b51bd05813ba5a3c812e16e4fba0d/DeepSeek-R1-Distill-Qwen-32B-Uncensored.Q5_K_M.gguf', port=5001, port_param=5001, host='0.0.0.0', launch=False, config=None, threads=4, usecublas=[], usevulkan=None, usecl