In [10]:
pip install deep_translator

Collecting deep_translator
  Downloading deep_translator-1.11.4-py3-none-any.whl.metadata (30 kB)
Downloading deep_translator-1.11.4-py3-none-any.whl (42 kB)
Installing collected packages: deep_translator
Successfully installed deep_translator-1.11.4
Note: you may need to restart the kernel to use updated packages.


In [13]:
import re
import time
from tqdm import tqdm
import torch
from PIL import Image, ImageDraw, ImageOps
from diffusers import DiffusionPipeline
from deep_translator import GoogleTranslator

# Load the model
pipeline = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch.float16).to("cuda")
pipeline.enable_attention_slicing()

def extract_count_from_prompt(prompt, default=1, max_images=10):
    match = re.search(r"(?<!\w)(\d+)\s+(image|images|pictures|photos|gambar|)", prompt, re.IGNORECASE)
    if match:
        return min(int(match.group(1)), max_images)
    return default

def clean_prompt_text(prompt):
    return re.sub(r"(?<!\w)(\d+)\s+(image|images|pictures|photos|gambar|)\s+of\s+", "", prompt, flags=re.IGNORECASE).strip()

def generate_images(prompt, negative_prompt, count=4, seed=123456789, steps=40, guidance=7.5):
    images = []
    for i in tqdm(range(count), desc="Generating images"):
        generator = torch.Generator("cuda").manual_seed(seed + i)
        result = pipeline(
            prompt=prompt,
            negative_prompt=negative_prompt,
            generator=generator,
            num_inference_steps=steps,
            guidance_scale=guidance
        )
        images.append(result.images[0])
    return images

def make_grid(images, rows=None, cols=None, resize_to=256, border_width=2, border_color="black", filename=None):
    n = len(images)
    if rows is None and cols is not None:
        rows = (n + cols - 1) // cols
    elif cols is None and rows is not None:
        cols = (n + rows - 1) // rows
    elif rows is None and cols is None:
        cols = min(4, n)
        rows = (n + cols - 1) // cols

    images = [
        ImageOps.expand(img.resize((resize_to, resize_to)), border=border_width, fill=border_color)
        for img in images
    ]

    w, h = images[0].size
    grid = Image.new("RGB", (cols * w, rows * h), "white")

    for idx, img in enumerate(images):
        x = (idx % cols) * w
        y = (idx // cols) * h
        grid.paste(img, (x, y))

    if filename is None:
        filename = f"grid_{int(time.time())}.png"

    grid.save(filename)
    print(f"Saved grid to {filename}")
    return grid

def main():
    # Prompt input — can be modified dynamically or used from CLI
    prompt = "Buatkan 4 gambar adegan balap mobil Formula 1 dengan latar belakang penonton yang ramai, buat dengan mendekati keadaan sesungguhnya di dunia nyata"
    seed = 123456789

    prompt = GoogleTranslator(source='auto', target='en').translate(prompt)
    
    # Extract image count and clean prompt
    num_images = extract_count_from_prompt(prompt, default=1)
    clean_prompt = clean_prompt_text(prompt)

    print(f" Detected request for {num_images} image(s)")
    print(f" Cleaned prompt: '{clean_prompt}'")

    # Generate images
    images = generate_images(
        prompt=clean_prompt,
        negative_prompt="blurry, low quality, distorted, grainy",
        count=num_images,
        seed=seed
    )

    # Build grid
    cols = min(4, num_images)
    rows = (num_images + cols - 1) // cols
    make_grid(images, rows=rows, cols=cols, resize_to=512)

if __name__ == "__main__":
    main()

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

 Detected request for 4 image(s)
 Cleaned prompt: 'Make formula 1 car racing scenes with a large audience background, by approaching the real world'


Generating images:   0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

Generating images:  25%|██▌       | 1/4 [00:01<00:04,  1.58s/it]

  0%|          | 0/40 [00:00<?, ?it/s]

Generating images:  50%|█████     | 2/4 [00:03<00:03,  1.52s/it]

  0%|          | 0/40 [00:00<?, ?it/s]

Generating images:  75%|███████▌  | 3/4 [00:04<00:01,  1.54s/it]

  0%|          | 0/40 [00:00<?, ?it/s]

Generating images: 100%|██████████| 4/4 [00:06<00:00,  1.57s/it]


Saved grid to grid_1751949864.png


In [18]:
from deep_translator import GoogleTranslator

text = "We're halfway there, living on a prayer. Take my hand, we'll make it I swear"
translated_text = GoogleTranslator(source='auto', target='ru').translate(text=text)
print(translated_text)

Мы на полпути, живем на молитве. Возьми меня за руку, мы сделаем это, я клянусь
