<a href="https://colab.research.google.com/github/anaumghori/Sketch-to-Image/blob/main/Sketch_to_Image.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Requirements**




In [None]:
!pip install gradio controlnet_aux transformers torchvision mediapipe xformers diffusers

# **Import necessary Libraries and Models**





In [None]:
import random
import cv2
import torch
import numpy as np
from PIL import Image
from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
from diffusers import EulerAncestralDiscreteScheduler
from controlnet_aux import HEDdetector
import gradio as gr

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

controlnet = ControlNetModel.from_pretrained(
    "xinsir/controlnet-scribble-sdxl-1.0", torch_dtype=torch.float16
).to(device)

vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to(device)
scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    safety_checker=None,
    torch_dtype=torch.float16,
    scheduler=scheduler,
).to(device)

hed_processor = HEDdetector.from_pretrained("lllyasviel/Annotators")

# **List of pre-available styles and prompts**

In [None]:
style_list = [
    {
        "name": "(No Style)",
        "prompt": "{prompt}",
        "negative_prompt": (
            "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, blurry, distorted, artifacts"
        ),
    },
    {
        "name": "Anime",
        "prompt": (
            "vibrant anime artwork, sharp outlines, vivid colors, detailed characters, dynamic lighting, expressive faces, cinematic compositions, dramatic storytelling, vivid backgrounds, key visual aesthetic, polished designs, studio anime"
        ),
        "negative_prompt": (
            "blurred, low-quality, flat colors, overexposed lighting, poorly proportioned characters, dull backgrounds, lack of detail, realistic proportions, muted colors, harsh shadows, distorted perspectives, unrefined lines, noise, grain"
        ),
    },
    {
        "name": "Neon",
        "prompt": (
            "futuristic cyberpunk scene, glowing neon lights, hues of blue and purple, pink accents, sleek urban designs, towering skyscrapers, tech-enhanced outfits, glowing elements, reflective surfaces, rain-soaked streets, high-tech dystopia, moody atmosphere, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights"
        ),
        "negative_prompt": (
            "dull colors, natural landscapes, low-tech, medieval themes, low-resolution details, lack of neon glow, warm tones, absence of urban environment, soft lighting, lack of reflections, mundane designs, poorly lit scenes, blurred, noise, grain"
        ),
    },
    {
        "name": "Manga",
        "prompt": (
            "highly detailed manga-style artwork, bold black-and-white contrasts, clean linework, dynamic shading, expressive characters, intricate patterns, storytelling focus, dramatic compositions, traditional Japanese manga aesthetic, visually striking panels, detailed expressions, classic manga styles, polished designs"
        ),
        "negative_prompt": (
            "lack of detail, faint lines, unbalanced contrast, lack of storytelling, inconsistent shading, muted tones, overuse of gray, poor composition, blurred designs, dull visuals, excessive colors, lack of texture, noise, grain"
        ),
    },
    {
        "name": "Digital",
        "prompt": (
            "hyper-realistic digital portrait artwork, intricate facial details, luminous skin textures, expressive and captivating eyes, smooth and polished surfaces, soft gradients, dynamic yet subtle lighting, professional digital rendering, highly detailed and stylized character design, artistic precision, visually compelling compositions, painterly yet refined quality"
        ),
        "negative_prompt": (
            "blurred or smudged details, overly harsh shadows, low resolution, unbalanced lighting, unrealistic or flat expressions, unpolished textures, lack of detail in the eyes, excessive noise or grain, dull or uninspired compositions, overly simplistic designs, lack of sharpness or refinement"
        ),
    },
    {
        "name": "Fantasy",
        "prompt": (
            "immersive fantasy artwork, magical landscapes, ethereal lighting, enchanted forests, epic scenes, intricate details, vivid colors, glowing effects, dramatic skies, imaginative worlds, majestic compositions, awe-inspiring designs"
        ),
        "negative_prompt": (
            "dull colors, lack of detail, low resolution, overly simplistic designs, absence of fantasy elements, muted tones, mundane scenes, poor composition, blurred, noise, lack of imagination, grain, harsh shadows, realistic styles"
        ),
    },
]

# **Core functions**

In [None]:
def nms(x, t, s):
    """
    Performs Non-Maximum Suppression (NMS) on the image.
    """
    x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)

    f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
    f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
    f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
    f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)

    y = np.zeros_like(x)

    for f in [f1, f2, f3, f4]:
        np.putmask(y, cv2.dilate(x, kernel=f) == x, x)

    z = np.zeros_like(y, dtype=np.uint8)
    z[y > t] = 255
    return z


def process_sketch(sketch):
    """
    Preprocesses a sketch input into a controlnet-compatible format.
    """
    controlnet_img = np.array(sketch["composite"])
    controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_RGB2GRAY)
    controlnet_img[controlnet_img > 127] = 255
    controlnet_img[controlnet_img <= 127] = 0
    return Image.fromarray(controlnet_img)


def process_image(image_path):
    """
    Converts an image into a controlnet-compatible scribble.
    """
    try:
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError("Image not found or invalid.")

        # Edge detection using HED or similar processor
        controlnet_img = hed_processor(image, scribble=False)
        controlnet_img = np.array(controlnet_img)

        # Apply Gaussian blur and NMS
        controlnet_img = cv2.GaussianBlur(controlnet_img, (0, 0), 3)
        controlnet_img = nms(controlnet_img, t=100, s=1.0)

        # Simulate a human-like sketch
        random_val = int(round(random.uniform(0.01, 0.10), 2) * 255)
        controlnet_img[controlnet_img > random_val] = 255
        controlnet_img[controlnet_img < 255] = 0

        return Image.fromarray(controlnet_img)
    except Exception as e:
        print(f"Error processing image: {e}")
        return None

In [None]:
def apply_style_to_prompt(style_name, user_prompt):
    """
    Combines the user prompt with the selected style.
    """
    style_data = next(style for style in style_list if style["name"] == style_name)
    positive_prompt = f"{user_prompt}, {style_data['prompt']}"
    negative_prompt = style_data["negative_prompt"]
    return positive_prompt, negative_prompt


def generate_with_pipeline(controlnet_img, prompt, negative_prompt, width, height):
    """
    Generates an image using the pipeline.
    """
    try:
        images = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            image=controlnet_img,
            controlnet_conditioning_scale=1.2,
            guidance_scale=7.5,
            width=width,
            height=height,
            num_inference_steps=50,
        ).images
        return images[0] if images else None
    except Exception as e:
        print(f"Error during image generation: {e}")
        return None

In [None]:
def generate_image(sketch, user_prompt, selected_style, input_type, image=None, num_steps=25, guidance_scale=7.5, controlnet_conditioning_scale=1.2, seed=0, randomize_seed=True):
    """
    Main function for generating images based on sketch or image input.
    """
    try:
        if randomize_seed:
            seed = random.randint(0, 9999)

        if input_type == "Sketch":
            controlnet_img = process_sketch(sketch)
        elif input_type == "Image" and image:
            controlnet_img = process_image(image)
        else:
            raise ValueError("Invalid input type or missing image.")

        if controlnet_img is None:
            raise ValueError("Failed to process input.")

        # Resize image for compatibility
        controlnet_img = controlnet_img.resize((1024, 1024))

        # Prepare prompts
        positive_prompt, negative_prompt = apply_style_to_prompt(selected_style, user_prompt)

        # Generate and return the image
        return generate_with_pipeline(
            controlnet_img, positive_prompt, negative_prompt, 1024, 1024
        )
    except Exception as e:
        print(f"Error: {e}")
        return None

# **Gradio Interface**

In [None]:
def gradio_interface():
    """
    Gradio interface for the Sketch-to-Image generator with examples.
    """
    with gr.Blocks() as demo:
        with gr.Row():
            gr.Markdown(
                """
                <h1 style="text-align: center;">Advanced Sketch-to-Art Generator</h1>
                <p style="text-align: center;">Draw or input an image or sketch, enter your prompt, and see the magic happen!</p>
                """
            )

        # Input and Output Section
        with gr.Row():
            with gr.Column():
                with gr.Group():
                    input_type = gr.Radio(
                        choices=["Sketch", "Image"],
                        label="Input Type",
                        value="Sketch"
                    )
                    sketch_input = gr.Sketchpad(
                        label="Draw a sketch",
                        visible=True,
                        height=450,
                        width=450
                    )
                    image_input = gr.Image(
                        type="filepath",
                        label="Upload an image",
                        visible=False
                    )
                    user_prompt = gr.Textbox(label="Describe your image", lines=2)
                    style_dropdown = gr.Dropdown(
                        label="Select Style",
                        choices=[style["name"] for style in style_list]
                    )

                run_button = gr.Button("Generate")

            with gr.Column():
                output_image = gr.Image(
                    label="Generated Image",
                    height=450,
                    width=450,
                )

        # Advanced options with notes about defaults
        with gr.Accordion("Advanced Options", open=False):
            gr.Markdown(
                """
                <p><b>Defaults:</b> Steps = 50, Conditioning Scale = 1.2, Guidance = 7.5.</p>
                """
            )
            num_steps = gr.Slider(
                label="Steps",
                minimum=1,
                maximum=50,
                value=50,
                step=1
            )
            guidance_scale = gr.Slider(
                label="Guidance",
                minimum=1,
                maximum=10,
                value=7.5,
                step=0.1
            )
            controlnet_conditioning_scale = gr.Slider(
                label="Conditioning Scale",
                minimum=0.5,
                maximum=5.0,
                value=1.2,
                step=0.1
            )
            seed = gr.Slider(
                label="Seed",
                minimum=0,
                maximum=9999,
                value=0,
                step=1
            )
            randomize_seed = gr.Checkbox(
                label="Randomize Seed",
                value=True
            )

        # Examples Section
        examples = [
            ["Image", "/content/drive/MyDrive/face.webp", "Boy, curly hair, t-shirt", "Digital"],
            ["Image", "/content/drive/MyDrive/superhero.webp", "Superhero in pink armor, cute and lighthearted, kawaii, chibi, pastel colors, whimsical atmosphere", "(No Style)"],
            ["Image", "/content/drive/MyDrive/grim_reaper.webp", "Grim Reaper, bones", "Neon"],
            ["Image", "/content/drive/MyDrive/fox.webp", "Orange Fox sitting with red fruit on head, lighthearted, kawaii, cute, adorable", "Anime"],
            ["Image", "/content/drive/MyDrive/women.webp", "Women sitting in park reading book, trees, ", "Manga"],
            ["Image", "/content/drive/MyDrive/book.webp", "Glowing green stone on open book", "Fantasy"],
            ["Image", "/content/drive/MyDrive/car.webp", "Car driving fast, movie-like composition, film grainy, vignette, highly detailed, high budget, bokeh, cinemascope", "(No Style)"]
        ]

        gr.Examples(
            examples=examples,
            inputs=[input_type, image_input, user_prompt, style_dropdown],
            label="Try these examples",
        )

        # Input and output mapping
        def toggle_inputs(input_type):
            return (
                gr.update(visible=input_type == "Sketch"),
                gr.update(visible=input_type == "Image"),
            )

        input_type.change(
            toggle_inputs,
            inputs=[input_type],
            outputs=[sketch_input, image_input]
        )

        # Button click handler
        run_button.click(
            fn=generate_image,
            inputs=[
                sketch_input, user_prompt, style_dropdown, input_type,
                image_input, num_steps, guidance_scale,
                controlnet_conditioning_scale, seed, randomize_seed
            ],
            outputs=[output_image]
        )

    demo.launch(debug=True)

gradio_interface()