<a href="https://colab.research.google.com/github/club20s/t2i/blob/main/Teks_To_Image_Model_Stable_Diffusion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

TEKS KE GAMBAR DENGAN STABLE DIFFUSION

In [None]:
# LIBRARY YANG HARUS DI INSTALL DI PYTHON TERLEBIH DAHULU

!pip install diffusers transformers ipywidgets matplotlib torch tqdm accelerate
!pip install tensorflow
!pip install torch torchvision
!pip install seaborn
!pip install opencv-python
!pip install --upgrade gradio
!pip install googletrans==4.0.0-rc1
!pip install deep-translator
!pip install httpx==0.23.0


In [None]:
import os
import torch
from transformers import CLIPTokenizer, CLIPTextModel
from diffusers import StableDiffusionPipeline
from PIL import Image
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
from skimage.metrics import structural_similarity as ssim
import ipywidgets as widgets
from IPython.display import display
import time


# Set up device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize pipeline, tokenizer, and text encoder
pipeline = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16).to(device)
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32").to(device)

# Create a folder for saving images and results
output_folder = "generated_images"
os.makedirs(output_folder, exist_ok=True)

# Function to define the default beta schedule
def default_beta_schedule(num_steps, beta_start=0.1, beta_end=0.2):
    return torch.linspace(beta_start, beta_end, num_steps, dtype=torch.float32).to(device)

# Function to calculate noise on image tensor
def calculate_noise(image_tensor):
    return torch.mean(torch.abs(image_tensor - image_tensor.mean())).item()

# Function to calculate noise percentage
def calculate_noise_percentage(noise_level, max_pixel_value=1.0):
    return (noise_level / max_pixel_value) * 100

# Forward diffusion function
def forward_diffusion(latents, beta, noise):
    beta_tensor = beta.clone().detach().to(latents.device)
    return torch.sqrt(1 - beta_tensor) * latents + torch.sqrt(beta_tensor) * noise

# Reverse diffusion function
def reverse_diffusion(latents, beta, noise):
    beta_tensor = beta.clone().detach().to(latents.device)
    return (latents - torch.sqrt(beta_tensor) * noise) / torch.sqrt(1 - beta_tensor)

# Function to save and display generated image
def display_image(image, title="Generated Image", save_path=None):
    plt.imshow(image)
    plt.title(title)
    plt.axis("off")
    if save_path:
        plt.savefig(save_path)
    plt.show()

# Function to calculate PSNR
def calculate_psnr(img1, img2):
    img1 = img1.to(device) if isinstance(img1, torch.Tensor) else transforms.ToTensor()(img1).to(device)
    img2 = img2.to(device)
    mse = torch.mean((img1 - img2) ** 2)
    return float('inf') if mse == 0 else 20 * torch.log10(1.0 / torch.sqrt(mse + 1e-8))

# Function to calculate SSIM
def calculate_ssim(img1, img2, win_size=3):
    img1 = img1.to(device) if isinstance(img1, torch.Tensor) else transforms.ToTensor()(img1).to(device)
    img2 = img2.to(device)
    img1_np = img1.squeeze(0).cpu().numpy()
    img2_np = img2.squeeze(0).cpu().numpy()
    if img1_np.shape[1] < win_size or img1_np.shape[2] < win_size:
        raise ValueError(f"Image too small to compute SSIM; minimum size {win_size}x{win_size} required.")
    return ssim(img1_np, img2_np, multichannel=True, win_size=win_size, data_range=1.0)

# Function to evaluate image
def evaluate_image(reverse_noised_image, generated_tensor):
    psnr_value = calculate_psnr(reverse_noised_image, generated_tensor)
    try:
        ssim_value = calculate_ssim(reverse_noised_image, generated_tensor)
    except ValueError as e:
        ssim_value = str(e)
    return psnr_value, ssim_value


# Function to track PSNR and SSIM across diffusion steps
def track_metrics(generated_tensor, betas, num_steps=100):
    psnr_values = []
    ssim_values = []
    noise = torch.randn_like(generated_tensor).to(device)
    noisy_image = generated_tensor.clone()

    # Forward diffusion
    for i in range(num_steps):
        beta = betas[i] # Ambil beta untuk langkah i
        noisy_image = forward_diffusion(noisy_image, beta, noise) # Proses difusi maju
        psnr_value = calculate_psnr(generated_tensor, noisy_image)
        try:
            ssim_value = calculate_ssim(generated_tensor, noisy_image)
        except ValueError:
            ssim_value = None
        psnr_values.append(float(psnr_value))
        ssim_values.append(float(ssim_value) if ssim_value is not None else np.nan)

    # Reverse diffusion
    for i in range(num_steps - 1, -1, -1):
        beta = betas[i]
        noisy_image = reverse_diffusion(noisy_image, beta, noise)
        psnr_value = calculate_psnr(generated_tensor, noisy_image)
        try:
            ssim_value = calculate_ssim(generated_tensor, noisy_image)
        except ValueError:
            ssim_value = None
        psnr_values.append(float(psnr_value))
        ssim_values.append(float(ssim_value) if ssim_value is not None else np.nan)

    return psnr_values, ssim_values



# Function to process image generation with Self-Attention and Cross-Attention
def process_image(prompt, selected_theme, selected_style):
    combined_prompt = f"{prompt} {selected_theme} {selected_style}"
    # Tokenize and encode the prompt
    text_input = tokenizer(combined_prompt, return_tensors="pt").input_ids.to(device)
    print("Prompt yang telah di-tokenisasi:", text_input)
    # Decode the tokenized prompt
    decoded_text = tokenizer.decode(text_input.squeeze(0))
    print("Prompt yang telah didekode:", decoded_text)
    embeddings = text_encoder(text_input).last_hidden_state
    generated_image = pipeline(prompt=combined_prompt, guidance_scale=7.5, num_inference_steps=50)["images"][0]
    return transforms.ToTensor()(generated_image).unsqueeze(0).to(device)

# Create widgets for prompt, theme, and style input
prompt_widget = widgets.Text(placeholder='Masukkan prompt untuk pembuatan gambar', description='Prompt:')
theme_widget = widgets.Dropdown(
    options=[
        '', 'Flying', 'Underwater', 'Space', 'Fantasy', 'Cyberpunk', 'Steampunk',
        'Forest', 'Desert', 'Cityscape', 'Mountain', 'Ocean', 'Abstract',
        'Sci-fi', 'Medieval', 'Winter', 'Summer', 'Spring', 'Autumn',
        'Future', 'Wild West', 'Jungle', 'Deep Sea', 'Outer Space',
        'Robot World', 'Alien Planet', 'Dreamscape', 'Apocalypse',
        'Neon City', 'Magic Realm', 'Steampunk City', 'Mythical Creatures',
        'Post-Apocalyptic', 'Enchanted Forest', 'Galactic Empire', 'Vintage',
        'Art Deco', 'Fairytale', 'Futuristic City', 'Retro', 'Alien Landscape',
        'Cybernetic World', 'Celestial', 'Whimsical', 'Dark Fantasy'
    ],
    description='Tema:',
)
style_widget = widgets.Dropdown(
    options=[
        '', 'Van Gogh', 'Edvard Munch', 'Pablo Picasso', 'Johannes Vermeer',
        'Studio Ghibli', 'Leonardo Da Vinci', 'Claude Monet', 'Henri Matisse',
        'Jackson Pollock', 'Salvador Dalí', 'Frida Kahlo', 'Gustav Klimt',
        'Andy Warhol', 'Piet Mondrian', 'Wassily Kandinsky', 'Paul Cézanne',
        'Yayoi Kusama', 'Keith Haring', 'Damien Hirst', 'Renaissance',
        'Surrealism', 'Impressionism', 'Cubism', 'Modern Art', 'Pop Art',
        'Minimalism', 'Anime', 'Realism', 'Futurism', 'Baroque',
        'Expressionism', 'Gothic', 'Photorealism', 'Graffiti', 'Pixel Art',
        'Fantasy Art', 'Comic Book Style', 'Retro Futurism', 'Noir',
        'Abstract Art', 'Flat Art', '3D Art', 'High Contrast', 'Concept Art',
        'Art Deco'
    ],
    description='Gaya:',
)

# Button to trigger image generation
generate_button = widgets.Button(description="Buat Gambar")

def on_generate_clicked(b):
    prompt = prompt_widget.value
    selected_theme = theme_widget.value
    selected_style = style_widget.value
    # Start timing
    start_time = time.time()

    # Step 1: Process image generation
    generated_tensor = process_image(prompt, selected_theme, selected_style)

    # Save and display the initial generated image
    initial_image_path = os.path.join(output_folder, "initial_generated_image.png")
    display_image(transforms.ToPILImage()(generated_tensor.squeeze(0).cpu()), title="Gambar Awal yang Dihasilkan", save_path=initial_image_path)

    # Step 2: Forward diffusion (adding noise)
    num_steps = 50  # Set the number of diffusion steps
    betas = default_beta_schedule(num_steps)
    beta = betas[0]  # Use the first beta value for this example


    psnr_values, ssim_values = track_metrics(generated_tensor, betas, num_steps)
    print(f"PSNR Values: {psnr_values}")
    print(f"SSIM Values: {ssim_values}")

    noise = torch.randn_like(generated_tensor).to(device)
    forward_noised_image = forward_diffusion(generated_tensor, beta, noise)

    # Convert and display the image with noise from forward diffusion
    forward_noised_image_pil = transforms.ToPILImage()(forward_noised_image.squeeze(0).cpu())
    forward_noised_image_path = os.path.join(output_folder, "forward_noised_image.png")
    forward_noise_level = calculate_noise(forward_noised_image)
    forward_noise_percentage = calculate_noise_percentage(forward_noise_level)

    print(f"Forward diffusion: Tingkat Noise: {forward_noise_level}, Persentase Noise: {forward_noise_percentage:.2f}%")
    display_image(forward_noised_image_pil, title="Gambar dengan Noise Forward", save_path=forward_noised_image_path)

    # Step 3: Reverse diffusion (removing noise)
    reverse_noised_image = reverse_diffusion(forward_noised_image, beta, noise)
    reverse_noised_image_pil = transforms.ToPILImage()(reverse_noised_image.squeeze(0).cpu())
    reverse_noised_image_path = os.path.join(output_folder, "reverse_noised_image.png")
    print(f"Reverse diffusion: Tingkat Noise: {calculate_noise(reverse_noised_image)}, Persentase Noise: {calculate_noise_percentage(calculate_noise(reverse_noised_image)):.2f}%")

    print("Evaluating image quality...")
    psnr_value, ssim_value = evaluate_image(reverse_noised_image_pil, generated_tensor)
    print(f"PSNR: {psnr_value} dB, SSIM: {ssim_value}")


    # Save and display the reverse diffusion image
    display_image(reverse_noised_image_pil, title="Gambar dengan Noise Reverse", save_path=reverse_noised_image_path)

    # Plot PSNR and SSIM values
    plt.figure(figsize=(12, 6))

    # Plot PSNR values
    plt.subplot(1, 2, 1)
    plt.plot(range(len(psnr_values)), psnr_values, label='PSNR')
    plt.xlabel('Diffusion Step')
    plt.ylabel('PSNR')
    plt.title('PSNR across Diffusion Steps')

    # Plot SSIM values
    plt.subplot(1, 2, 2)
    plt.plot(range(len(ssim_values)), ssim_values, label='SSIM', color='orange')
    plt.xlabel('Diffusion Step')
    plt.ylabel('SSIM')
    plt.title('SSIM across Diffusion Steps')


    # End timing
    end_time = time.time()
    duration = end_time - start_time
    print(f"Waktu mulai: {start_time:.2f} detik")
    print(f"Waktu selesai: {end_time:.2f} detik")
    print(f"Durasi total: {duration:.2f} detik")

    plt.tight_layout()
    plt.show()


generate_button.on_click(on_generate_clicked)
display(prompt_widget, theme_widget, style_widget, generate_button)


#satuan dB (decibel) digunakan untuk mengukur rasio antara nilai maksimum sinyal asli (atau puncak sinyal) dengan tingkat derau (noise) pada sinyal tersebut
