In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install flask flask-ngrok torch transformers diffusers peft ngrok flask_cors

Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Collecting ngrok
  Downloading ngrok-1.4.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting flask_cors
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-1

In [None]:
import io
import subprocess
from flask import Flask, request, send_file, abort
from flask_cors import CORS
import torch
from PIL import Image
from diffusers import (
    StableDiffusionImg2ImgPipeline,
    DPMSolverMultistepScheduler,
    AutoencoderKL,
    UNet2DConditionModel
)
from transformers import CLIPTextModel, CLIPTokenizer
from huggingface_hub import login

# 1. Install dependencies
def install_dependencies():
    subprocess.run(
        ['pip', 'install', 'pyngrok', 'flask', 'torch', 'diffusers', 'transformers',
         'accelerate', 'peft', 'pillow'],
        check=True
    )

install_dependencies()

# 2. Flask app setup
app = Flask(__name__)
CORS(app)

# 3. Ngrok setup (optional)
from pyngrok import ngrok
ngrok.kill()
ngrok.set_auth_token("")
public_url = ngrok.connect(5051).public_url
print(f" * Ngrok tunnel: {public_url}")

# 4. Initialize SD2.1 pipeline with explicit components
pipe = None
def initialize_pipeline():
    global pipe
    login(token="")

    model_id = "stabilityai/stable-diffusion-2-1"

    # Load components explicitly :cite[1]:cite[3]
    text_encoder = CLIPTextModel.from_pretrained(
        model_id, subfolder="text_encoder",
        torch_dtype=torch.float16
    )
    tokenizer = CLIPTokenizer.from_pretrained(
        model_id, subfolder="tokenizer"
    )
    vae = AutoencoderKL.from_pretrained(
        model_id, subfolder="vae",
        torch_dtype=torch.float16
    )
    unet = UNet2DConditionModel.from_pretrained(
        model_id, subfolder="unet",
        torch_dtype=torch.float16
    )

    # Initialize pipeline with custom components :cite[5]
    pipe = StableDiffusionImg2ImgPipeline(
        vae=vae,
        text_encoder=text_encoder,
        tokenizer=tokenizer,
        unet=unet,
        scheduler=DPMSolverMultistepScheduler.from_pretrained(model_id, subfolder="scheduler"),
        safety_checker=None,
        feature_extractor=None,
        requires_safety_checker=False
    ).to("cuda")

    pipe.enable_attention_slicing()

initialize_pipeline()

# 5. Modified /generate endpoint
@app.route('/generate', methods=['POST'])
def generate():
    if 'prompt' not in request.form or 'image' not in request.files:
        abort(400, description="Missing 'prompt' or 'image'")

    prompt = request.form['prompt']
    init_file = request.files['image']

    try:
        init_image = Image.open(init_file).convert("RGB").resize((768, 768))  # SD2.1-768 requires 768px :cite[2]
    except Exception as e:
        abort(400, description=f"Image error: {e}")

    output = pipe(
        prompt=prompt,
        image=init_image,
        strength=0.7,
        guidance_scale=7.5,
        num_inference_steps=50
    )

    img_io = io.BytesIO()
    output.images[0].save(img_io, format='PNG')
    img_io.seek(0)
    return send_file(img_io, mimetype='image/png')

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5051)

 * Ngrok tunnel: https://c6fb-104-197-100-49.ngrok-free.app


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/633 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.36G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/824 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/611 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/939 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.46G [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5051
 * Running on http://172.28.0.12:5051
INFO:werkzeug:[33mPress CTRL+C to quit[0m
Token indices sequence length is longer than the specified maximum sequence length for this model (91 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['aesthetic poster design , instagram ad style , high - resolution , commercial look']


  0%|          | 0/35 [00:00<?, ?it/s]

INFO:werkzeug:127.0.0.1 - - [16/May/2025 00:35:48] "POST /generate HTTP/1.1" 200 -
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ['aesthetic poster design , instagram ad style , high - resolution , commercial look']


  0%|          | 0/35 [00:00<?, ?it/s]

INFO:werkzeug:127.0.0.1 - - [16/May/2025 00:36:03] "POST /generate HTTP/1.1" 200 -
