In [1]:
# Install dependencies
!pip install flask pyngrok diffusers transformers accelerate torch torchvision --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m40.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m36.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m57.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:

# --- Imports ---
import torch
from flask import Flask, request, jsonify, send_file
from pyngrok import ngrok
import shutil
import zipfile
import os
from PIL import Image
from diffusers import StableDiffusionPipeline
from transformers import BlipProcessor, BlipForConditionalGeneration
from google.colab import drive # Add this import


In [None]:
# --- Config ---
NGROK_AUTH_TOKEN = "XXXXX"

In [6]:
# Authenticate ngrok
ngrok.set_auth_token(NGROK_AUTH_TOKEN)



In [8]:

# --- Initialize Models ---

# Stable Diffusion (image generation)
device = "cuda" if torch.cuda.is_available() else "cpu"
"""sd_pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if device=="cuda" else torch.float32
).to(device)"""

sd_pipe = StableDiffusionPipeline.from_pretrained(
    "hakurei/waifu-diffusion", torch_dtype=torch.float16 if device=="cuda" else torch.float32
).to(device)

"""sd_pipe = StableDiffusionPipeline.from_pretrained(
    "nitrosocke/Arcane-Diffusion", torch_dtype=torch.float16 if device=="cuda" else torch.float32
).to(device)"""

"""sd_pipe = StableDiffusionPipeline.from_pretrained(
    "Lykon/dreamshaper-8", torch_dtype=torch.float16 if device=="cuda" else torch.float32
).to(device)"""

"""sd_pipe = StableDiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-2-1", torch_dtype=torch.float16 if device=="cuda" else torch.float32
).to(device)"""

# BLIP (caption generation)
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)

# --- Helper Functions ---

def generate_images(prompt, num_images=5):
    image_paths = []
    for i in range(num_images):
        image = sd_pipe(prompt).images[0]
        img_path = f"img_{i}.png"
        image.save(img_path)
        image_paths.append(img_path)
    return image_paths

def generate_captions(image_paths):
    captions = []
    for path in image_paths:
        raw_image = Image.open(path).convert("RGB")
        inputs = blip_processor(raw_image, return_tensors="pt").to(device)
        out = blip_model.generate(**inputs)
        caption = blip_processor.decode(out[0], skip_special_tokens=True)
        captions.append(caption)
    return captions


# --- Flask App ---

app = Flask(__name__)
public_url = ngrok.connect(5000)
print("🚀 Backend running at:", public_url)

@app.route("/generate", methods=["POST"])
def generate():
    prompt = request.json.get("prompt")

    # Clean up old outputs
    if os.path.exists("output"): shutil.rmtree("output")
    os.makedirs("output", exist_ok=True)

    # 1. Generate images
    image_paths = generate_images(prompt)

    # 2. Generate captions
    captions = generate_captions(image_paths)

    # 3. Save to output dir
    for i, img_path in enumerate(image_paths):
        shutil.move(img_path, f"output/img_{i}.png")
        with open(f"output/img_{i}.txt", "w") as f:
            f.write(captions[i])

    # 4. Zip the results
    zipf = zipfile.ZipFile("output.zip", 'w')
    for file in os.listdir("output"):
        zipf.write(os.path.join("output", file), arcname=file)
    zipf.close()

    # 5. Copy output.zip to Google Drive
    google_drive_path = "/content/drive/MyDrive/GeneratedImages/" # You can change this path
    os.makedirs(google_drive_path, exist_ok=True) # Create the folder if it doesn't exist

    # Define a unique filename for the zip file in Google Drive to avoid overwriting
    # You might want to add a timestamp or part of the prompt to the filename
    #import datetime
    #timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    #drive_zip_filename = f"output_{timestamp}.zip"
    shutil.copy("output.zip", os.path.join(google_drive_path, "output.zip"))
    print(f"✅ output.zip saved to Google Drive at: {os.path.join(google_drive_path, 'output.zip')}")


    return jsonify({"download_url": f"{public_url}/download", "drive_status": "File saved to Google Drive"})

@app.route("/download", methods=["GET"])
def download():
    return send_file("output.zip", as_attachment=True)

# Run the Flask app
app.run(port=5000)

model_index.json:   0%|          | 0.00/577 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

preprocessor_config.json:   0%|          | 0.00/518 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/620 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.89k [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/341 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.36G [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.46G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/819 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.00k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

🚀 Backend running at: NgrokTunnel: "https://bf44-104-196-236-186.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
Token indices sequence length is longer than the specified maximum sequence length for this model (84 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', depth of field , fantasy atmosphere']


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', depth of field , fantasy atmosphere']


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', depth of field , fantasy atmosphere']


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', depth of field , fantasy atmosphere']


  0%|          | 0/50 [00:00<?, ?it/s]

The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', depth of field , fantasy atmosphere']


  0%|          | 0/50 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.
INFO:werkzeug:127.0.0.1 - - [15/Jun/2025 10:54:12] "POST /generate HTTP/1.1" 200 -


✅ output.zip saved to Google Drive at: /content/drive/MyDrive/GeneratedImages/output.zip
