In [None]:
!nvidia-smi

Wed Mar 20 05:41:20 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.161.07             Driver Version: 535.161.07   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA RTX A4000               On  | 00000000:83:00.0 Off |                  Off |
| 41%   35C    P8              15W / 100W |      4MiB / 16376MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
import os
import json
import accelerate
# import wandb
import random
from typing import Tuple, Union, List
from datasets import Dataset, load_dataset, Features, Value
from PIL import Image
import numpy as np
import cv2
import datasets
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from transformers import CLIPTextModel, CLIPTokenizer, DataCollatorWithPadding, AutoImageProcessor, UperNetForSemanticSegmentation

import diffusers
from diffusers import (
    AutoencoderKL,
    DDPMScheduler,
    StableDiffusionInpaintPipeline,
    StableDiffusionControlNetInpaintPipeline,
    ControlNetModel,
    UNet2DConditionModel,
    UniPCMultistepScheduler
)
from diffusers.loaders import AttnProcsLayers
from diffusers.models.attention_processor import LoRAAttnProcessor
from colors import ade_palette
from utils import map_colors_rgb

In [None]:
# !unzip -q '/development_data-v0.2.zip' -d '/'

### Finetuning inpainting with accelerate launch command

In [None]:
!git clone https://github.com/anilbhatt1/interior_design_challenge_022024.git

In [None]:
len(os.listdir('/ikea_room_images')), len(os.listdir('/ikea_mask_broaden')), len(os.listdir('/ikea_image_mask_broaden'))

In [None]:
!accelerate launch '/interior_design_challenge_022024/terrain-diffusion-interior-clean/scripts/train_text_to_image_lora_sd2_inpaint.py' \
  --pretrained_model_name_or_path="runwayml/stable-diffusion-inpainting" \
  --dataset_name="custom" \
  --caption_column="text" \
  --mask_mode="512train-very-large" \
  --mixed_precision="no" \
  --train_batch_size=8 \
  --gradient_accumulation_steps=4 \
  --num_train_epochs=3 \
  --checkpointing_steps=90 \
  --learning_rate=1e-03 \
  --lr_scheduler="cosine" \
  --seed=2 \
  --validation_epochs=1 \
  --validation_file="/val_images_rose/validation.jsonl"\
  --output_dir="output" \
  --enable_xformers_memory_efficient_attention \
  --report_to="wandb" \
  --resume_from_checkpoint='latest' \
  --image_dir1="/ikea_room_images/" \
  --image_dir2="/ikea_mask_broaden/" \
  --image_dir3="/ikea_image_mask_broaden/" \
  --caption_file_path='/ikea_captions_summarized.json' \
  --val_image_dir='/val_images_rose/' \
  --val_image_save_dir='/val_images_save/'

### Inferencing using saved safetensors

In [None]:
# !gdown 1Vb9KAewPVK3UjDHZFtFXx8muehFzf_nm  #run1403.zip
!gdown 1CcOFTM4lWs8RF4ZoanAWSEC0fGYGcJi8 #run1803.zip

Downloading...
From (original): https://drive.google.com/uc?id=1CcOFTM4lWs8RF4ZoanAWSEC0fGYGcJi8
From (redirected): https://drive.google.com/uc?id=1CcOFTM4lWs8RF4ZoanAWSEC0fGYGcJi8&confirm=t&uuid=83ab03d3-1390-4bac-98a2-2c81d39bdd73
To: /runs1803.zip
100%|████████████████████████████████████████| 893M/893M [00:12<00:00, 68.8MB/s]


In [None]:
!unzip -q '/demo_dataset.zip' -d '/'
# !unzip -q '/runs1803.zip' -d '/'

In [None]:
!unzip -q '/runs1803.zip' -d '/'

In [None]:
def filter_items(
    colors_list: Union[List, np.ndarray],
    items_list: Union[List, np.ndarray],
    items_to_retain: Union[List, np.ndarray]
) -> Tuple[Union[List, np.ndarray], Union[List, np.ndarray]]:
    """
    Filters items and their corresponding colors from given lists, excluding
    specified items.

    Args:
        colors_list: A list or numpy array of colors corresponding to items.
        items_list: A list or numpy array of items.
        items_to_remove: A list or numpy array of items to be removed.

    Returns:
        A tuple of two lists or numpy arrays: filtered colors and filtered
        items.
    """
    filtered_colors = []
    filtered_items = []
    for color, item in zip(colors_list, items_list):
        if item in items_to_retain:
            filtered_colors.append(color)
            filtered_items.append(item)
    return filtered_colors, filtered_items

def get_segmentation_pipeline(
) -> Tuple[AutoImageProcessor, UperNetForSemanticSegmentation]:
    """Method to load the segmentation pipeline
    Returns:
        Tuple[AutoImageProcessor, UperNetForSemanticSegmentation]: segmentation pipeline
    """
    image_processor = AutoImageProcessor.from_pretrained(
        "openmmlab/upernet-convnext-large"
    )
    image_segmentor = UperNetForSemanticSegmentation.from_pretrained(
        "openmmlab/upernet-convnext-large"
    )
    return image_processor, image_segmentor


@torch.inference_mode()
@torch.autocast('cuda')
def segment_image(
        image: Image,
        image_processor: AutoImageProcessor,
        image_segmentor: UperNetForSemanticSegmentation
) -> Image:
    """
    Segments an image using a semantic segmentation model.

    Args:
        image (Image): The input image to be segmented.
        image_processor (AutoImageProcessor): The processor to prepare the
            image for segmentation.
        image_segmentor (UperNetForSemanticSegmentation): The semantic
            segmentation model used to identify different segments in the image.

    Returns:
        Image: The segmented image with each segment colored differently based
            on its identified class.
    """
    # image_processor, image_segmentor = get_segmentation_pipeline()
    pixel_values = image_processor(image, return_tensors="pt").pixel_values
    with torch.no_grad():
        outputs = image_segmentor(pixel_values)

    seg = image_processor.post_process_semantic_segmentation(
        outputs, target_sizes=[image.size[::-1]])[0]
    color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8)
    palette = np.array(ade_palette())
    for label, color in enumerate(palette):
        color_seg[seg == label, :] = color
    color_seg = color_seg.astype(np.uint8)
    seg_image = Image.fromarray(color_seg).convert('RGB')
    return seg_image

def resize_dimensions(dimensions, target_size):
    """
    Resize PIL to target size while maintaining aspect ratio
    If smaller than target size leave it as is
    """
    width, height = dimensions

    # Check if both dimensions are smaller than the target size
    if width < target_size and height < target_size:
        return dimensions

    # Determine the larger side
    if width > height:
        # Calculate the aspect ratio
        aspect_ratio = height / width
        # Resize dimensions
        return (target_size, int(target_size * aspect_ratio))
    else:
        # Calculate the aspect ratio
        aspect_ratio = width / height
        # Resize dimensions
        return (int(target_size * aspect_ratio), target_size)


In [None]:
unet = UNet2DConditionModel.from_pretrained(
    "runwayml/stable-diffusion-inpainting", subfolder="unet")
print(f'unet done')
unet.requires_grad_(False)
weight_dtype = torch.float32
unet.to('cuda', dtype=weight_dtype)
print(f'unet moved to cuda')

unet/config.json:   0%|          | 0.00/748 [00:00<?, ?B/s]

unet/diffusion_pytorch_model.bin:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

unet done
unet moved to cuda


In [None]:
lora_attn_procs = {}
for name in unet.attn_processors.keys():
    # print(f'name in unet : {name}')
    cross_attention_dim = (
        None
        if name.endswith("attn1.processor")
        else unet.config.cross_attention_dim
    )
    if name.startswith("mid_block"):
        hidden_size = unet.config.block_out_channels[-1]
    elif name.startswith("up_blocks"):
        block_id = int(name[len("up_blocks.")])
        hidden_size = list(reversed(unet.config.block_out_channels))[block_id]
    elif name.startswith("down_blocks"):
        block_id = int(name[len("down_blocks.")])
        hidden_size = unet.config.block_out_channels[block_id]

    lora_attn_procs[name] = LoRAAttnProcessor(
        hidden_size=hidden_size,
        cross_attention_dim=cross_attention_dim,
        rank=64,
    )

In [None]:
unet.set_attn_processor(lora_attn_procs)

In [None]:
lora_layers = AttnProcsLayers(unet.attn_processors)

In [None]:
controlnet_seg = ControlNetModel.from_pretrained("BertChristiaens/controlnet-seg-room", torch_dtype=torch.float32)

config.json:   0%|          | 0.00/989 [00:00<?, ?B/s]

diffusion_pytorch_model.bin:   0%|          | 0.00/723M [00:00<?, ?B/s]

The config attributes {'dropout': 0.0, 'sample_size': 32} were passed to ControlNetModel, but are not expected and will be ignored. Please verify your config.json configuration file.


In [None]:
pipeline = StableDiffusionControlNetInpaintPipeline.from_pretrained(
            "runwayml/stable-diffusion-inpainting",
            controlnet=controlnet_seg,
            safety_checker=None,
            torch_dtype=torch.float32
        )
pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config)
pipeline.enable_xformers_memory_efficient_attention()
pipeline = pipeline.to('cuda')
seed = 2

model_index.json:   0%|          | 0.00/548 [00:00<?, ?B/s]

unet/diffusion_pytorch_model.safetensors not found


Fetching 14 files:   0%|          | 0/14 [00:00<?, ?it/s]

(…)ature_extractor/preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/313 [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

text_encoder/config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

vae/config.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

text_encoder/pytorch_model.bin:   0%|          | 0.00/492M [00:00<?, ?B/s]

vae/diffusion_pytorch_model.bin:   0%|          | 0.00/335M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.controlnet.pipeline_controlnet_inpaint.StableDiffusionControlNetInpaintPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


In [None]:
unet_original = pipeline.unet

mean_unet_original = 0
for param in unet_original.parameters():
    mean_unet_original += param.mean()

print(f"Mean original: {mean_unet_original}")

Mean original: 41.74258041381836


In [None]:
# unet_weight_path = 'fine-tuning/Run_details_mar13_2107ist/pytorch_lora_weights.safetensors'
# unet_weight_path = '/inpainting/output/pytorch_lora_weights.safetensors'
unet_weight_path = '/int_ch/models/unet_fine_tuned_weights/pytorch_lora_weights_run1403.safetensors'
pipeline.unet.load_attn_procs(unet_weight_path, use_safetensors=True)

In [None]:
# Mean based on Sij's run
unet_fine_tuned = pipeline.unet

mean_unet_fine_tuned = 0
for param in unet_fine_tuned.parameters():
    mean_unet_fine_tuned += param.mean()

print(f"Mean fine_tuned: {mean_unet_fine_tuned}")

Mean fine_tuned: 41.7419319152832


In [None]:
# Mean based on Anils run - Mean fine_tuned: 41.74186706542969
# unet_fine_tuned = pipeline.unet

# mean_unet_fine_tuned = 0
# for param in unet_fine_tuned.parameters():
#     mean_unet_fine_tuned += param.mean()

# print(f"Mean fine_tuned: {mean_unet_fine_tuned}")

Mean fine_tuned: 41.74186706542969


In [None]:
seg_image_processor, image_segmentor = get_segmentation_pipeline()

preprocessor_config.json:   0%|          | 0.00/372 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/8.76k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/940M [00:00<?, ?B/s]

In [None]:
text_encoder = CLIPTextModel.from_pretrained(
    "runwayml/stable-diffusion-inpainting",
    subfolder="text_encoder")

tokenizer = CLIPTokenizer.from_pretrained(
    "runwayml/stable-diffusion-inpainting",
    subfolder="tokenizer")

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
def tokenize_function(caption):
    return tokenizer(caption, truncation=False)

def get_pipeline_embeds_mod(input_ids, negative_ids):

    max_length = tokenizer.model_max_length

    shape_max_length = max(input_ids.shape[-1], negative_ids.shape[-1])

    concat_embeds = []
    neg_embeds = []
    for i in range(0, shape_max_length, max_length):
        concat_embeds.append(text_encoder(input_ids[:, i: i + max_length])[0])
        neg_embeds.append(text_encoder(negative_ids[:, i: i + max_length])[0])

    return torch.cat(concat_embeds, dim=1), torch.cat(neg_embeds, dim=1)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
demo_dataset = pd.read_csv('/demo_dataset/demo_dataset_prompts.tsv', sep='\t')
demo_dataset.head()

Unnamed: 0,image,prompt
0,image_0.jpg,A Bauhaus-inspired living room with a sleek bl...
1,image_1.jpg,A glamorous master bedroom in Hollywood Regenc...
2,image_2.jpg,"A vibrant living room with a tropical theme, c..."
3,image_3.jpg,An industrial-chic loft space featuring expose...
4,image_4.jpg,A mid-century modern dining room showcasing a ...


In [None]:
aicrowd_dataset = pd.read_csv('//development_data-v0.2/all_input_list.csv', sep='\t')
aicrowd_dataset.head()

Unnamed: 0,img_path,text_prompt
0,bedroom_1.jpg,An elegantly appointed bedroom in the Art Deco...
1,bedroom_2.jpg,A bedroom that exudes French country charm wit...
2,bedroom_3.jpg,"A bedroom with a bohemian spirit, centered aro..."
3,children_room_1.jpg,A whimsical bohemian-themed children's room ad...
4,children_room_2.jpg,A serene and playful Scandinavian-style childr...


In [None]:
neg_prompt = "lowres, watermark, banner, logo, contactinfo, text, deformed, blurry, blur, \
out of focus, out of frame, surreal, ugly, distortion, low-res, poor quality, "
additional_quality_suffix = "interior design, 4K, high resolution"

In [None]:
neg_const1 = "cluttered space, Low ceilings, Mismatched colors and patterns, Poor lighting, Empty and uninviting space, "
neg_const2 = "furniture with illogical or non-functional designs, missing parts or unbalanced bases, "
neg_const3 = "plants growing inside furniture (e.g., stools, baskets), No plants defying gravity \
(e.g., floating or hanging unrealistically), "
neg_const4 = "furniture merging with other objects (e.g., sofa with table, bed with floor), "
neg_const5 = "columns and pillars not affixed to ground, columns and pillars defying gravity, "

In [None]:
pos_const1 = "Sofas, tables, bed cots, chairs if present in the image must have sturdy bases, "
pos_const2 = "Bed if present must be smooth and of queen size. Bed cot Headboard must be smooth, simple, "
pos_const3 = "Ceilings and walls must be smooth, "
pos_const4 = "Table/s if present must have a level top with a flat, stable surface made of glass, wooden or marble tops, "
pos_const5 = "Plants if present must be placed in proper terracotta pots, "
pos_const6 = "Furnitures and objects must maintain their distinct boundaries, "
pos_const7 = "Columns and pillars must have their bases firmly affixed to the floor, "
pos_const8 = "Scenaries outside windows should remain the same"

In [None]:
# pos_const_lst = [pos_const1, pos_const2, pos_const3, pos_const4, pos_const5, pos_const6]
pos_const_lst = []

In [None]:
# neg_const_lst = [neg_const1, neg_const2, neg_const3, neg_const4]
neg_const_lst = []

In [None]:
control_items = ["floor;flooring", "rug;carpet;carpeting", "wall", "ceiling"]

In [None]:
def gen_image(neg_prompt):
    for idx, row in demo_dataset.iterrows():

        image_name = row['image']
        pos_prompt = row['prompt']

        empty_room_image = Image.open('demo_dataset/'+image_name)
        orig_w, orig_h = empty_room_image.size
        new_width, new_height = resize_dimensions(empty_room_image.size, 768)
        input_image = empty_room_image.resize((new_width, new_height))

        plt.imshow(input_image)
        plt.axis('off')
        plt.show()
        print("Prompt:", pos_prompt)

        real_seg = np.array(segment_image(input_image,
                                      seg_image_processor,
                                      image_segmentor))
        unique_colors = np.unique(real_seg.reshape(-1, real_seg.shape[2]), axis=0)
        unique_colors = [tuple(color) for color in unique_colors]
        segment_items = [map_colors_rgb(i) for i in unique_colors]
        chosen_colors, segment_items = filter_items(
            colors_list=unique_colors,
            items_list=segment_items,
            items_to_retain=control_items
        )
        mask = np.zeros_like(real_seg)
        for color in chosen_colors:
            color_matches = (real_seg == color).all(axis=2)
            mask[color_matches] = 1

        plt.imshow((mask*255).astype(int))
        plt.axis('off')
        plt.show()

        image_np = np.array(input_image)
        image = Image.fromarray(image_np).convert("RGB")
        segmentation_cond_image = Image.fromarray(real_seg).convert("RGB")
        mask_image = Image.fromarray((mask * 255).astype(np.uint8)).convert("RGB")

        pos_prompt += f', {additional_quality_suffix},'
        for pos_const in pos_const_lst:
            pos_prompt = pos_prompt + f'{pos_const}'
        if idx == 0:
            print("Final POS Prompt:", pos_prompt)

        for neg_const in neg_const_lst:
            neg_prompt = neg_prompt + f', {neg_const}'
        if idx == 0:
            print("Final NEG Prompt:", neg_prompt)

        prompt_lst = [pos_prompt, neg_prompt]
        prompt_token_lst = []
        for prompt in prompt_lst:
            prompt_dict = tokenize_function(prompt)
            prompt_token_lst.append(prompt_dict)
        prompt_tensors = data_collator(prompt_token_lst)
        prompt_ids = prompt_tensors['input_ids']
        pos_prompt_ids = prompt_ids[0, :].unsqueeze(0)
        neg_prompt_ids = prompt_ids[1, :].unsqueeze(0)
        pos_prompt_embed, neg_prompt_embed = get_pipeline_embeds_mod(pos_prompt_ids, neg_prompt_ids)
        print("prompt embedding shape", pos_prompt_embed.size(), neg_prompt_embed.size())

        generated_image = pipeline(
            prompt_embeds=pos_prompt_embed,
            negative_prompt_embeds=neg_prompt_embed,
            num_inference_steps=50,
            strength=1.0,
            guidance_scale=7.0,
            generator=[torch.Generator(device="cuda").manual_seed(seed)],
            image=image,
            mask_image=mask_image,
            control_image=segmentation_cond_image,
        ).images[0]

        design_image = generated_image.resize(
                (orig_w, orig_h), Image.Resampling.LANCZOS)

        plt.imshow(design_image)
        plt.axis('off')
        plt.show()

        if idx > 3:
            break

### No Pos or neg consts

In [None]:
pos_const_lst = []

In [None]:
neg_const_lst = []

In [None]:
gen_image(neg_prompt)

### Neg prompt = 'Objects other than mentioned in prompt"

In [None]:
neg_const1 = "Objects other than those mentioned in prompt, "

In [None]:
neg_const_lst = [neg_const1]

In [None]:
gen_image(neg_prompt)

### pos prompt = 'No objects should be included other than those mentioned in prompt"

In [None]:
pos_const1 = "No objects should be included other than those mentioned in prompt, "

In [None]:
pos_const_lst = [pos_const1]

In [None]:
neg_const_lst = []

In [None]:
gen_image(neg_prompt)

### No additional prompts
control_items_to_mask = ["stairs;steps",
"step;stair",
"stairway;staircase",
"radiator",
"screen;door;screen",
"windowpane;window",
"door;double;door",
"countertop",
"fireplace;hearth;open;fireplace",]

In [None]:
control_items = ["stairs;steps", "step;stair", "stairway;staircase", "radiator", "screen;door;screen", \
                 "windowpane;window", "door;double;door", "countertop", "fireplace;hearth;open;fireplace",]

In [None]:
def filter_items(
    colors_list: Union[List, np.ndarray],
    items_list: Union[List, np.ndarray],
    items_to_mask: Union[List, np.ndarray]
) -> Tuple[Union[List, np.ndarray], Union[List, np.ndarray]]:
    """
    Filters items and their corresponding colors from given lists, excluding
    specified items.

    Args:
        colors_list: A list or numpy array of colors corresponding to items.
        items_list: A list or numpy array of items.
        items_to_remove: A list or numpy array of items to be removed.

    Returns:
        A tuple of two lists or numpy arrays: filtered colors and filtered
        items.
    """
    filtered_colors = []
    filtered_items = []
    for color, item in zip(colors_list, items_list):
        if item not in items_to_mask:
            filtered_colors.append(color)
            filtered_items.append(item)
    return filtered_colors, filtered_items

In [None]:
def gen_image(neg_prompt):
    for idx, row in demo_dataset.iterrows():

        image_name = row['image']
        pos_prompt = row['prompt']

        empty_room_image = Image.open('demo_dataset/'+image_name)
        orig_w, orig_h = empty_room_image.size
        new_width, new_height = resize_dimensions(empty_room_image.size, 768)
        input_image = empty_room_image.resize((new_width, new_height))

        plt.imshow(input_image)
        plt.axis('off')
        plt.show()
        print("Prompt:", pos_prompt)

        real_seg = np.array(segment_image(input_image,
                                      seg_image_processor,
                                      image_segmentor))
        unique_colors = np.unique(real_seg.reshape(-1, real_seg.shape[2]), axis=0)
        unique_colors = [tuple(color) for color in unique_colors]
        segment_items = [map_colors_rgb(i) for i in unique_colors]
        chosen_colors, segment_items = filter_items(
            colors_list=unique_colors,
            items_list=segment_items,
            items_to_mask=control_items
        )
        mask = np.zeros_like(real_seg)
        for color in chosen_colors:
            color_matches = (real_seg == color).all(axis=2)
            mask[color_matches] = 1

        plt.imshow((mask*255).astype(int))
        plt.axis('off')
        plt.show()

        image_np = np.array(input_image)
        image = Image.fromarray(image_np).convert("RGB")
        segmentation_cond_image = Image.fromarray(real_seg).convert("RGB")
        mask_image = Image.fromarray((mask * 255).astype(np.uint8)).convert("RGB")

        pos_prompt += f', {additional_quality_suffix},'
        for pos_const in pos_const_lst:
            pos_prompt = pos_prompt + f'{pos_const}'
        if idx == 0:
            print("Final POS Prompt:", pos_prompt)

        for neg_const in neg_const_lst:
            neg_prompt = neg_prompt + f', {neg_const}'
        if idx == 0:
            print("Final NEG Prompt:", neg_prompt)

        prompt_lst = [pos_prompt, neg_prompt]
        prompt_token_lst = []
        for prompt in prompt_lst:
            prompt_dict = tokenize_function(prompt)
            prompt_token_lst.append(prompt_dict)
        prompt_tensors = data_collator(prompt_token_lst)
        prompt_ids = prompt_tensors['input_ids']
        pos_prompt_ids = prompt_ids[0, :].unsqueeze(0)
        neg_prompt_ids = prompt_ids[1, :].unsqueeze(0)
        pos_prompt_embed, neg_prompt_embed = get_pipeline_embeds_mod(pos_prompt_ids, neg_prompt_ids)
        print("prompt embedding shape", pos_prompt_embed.size(), neg_prompt_embed.size())

        generated_image = pipeline(
            prompt_embeds=pos_prompt_embed,
            negative_prompt_embeds=neg_prompt_embed,
            num_inference_steps=50,
            strength=1.0,
            guidance_scale=7.0,
            generator=[torch.Generator(device="cuda").manual_seed(seed)],
            image=image,
            mask_image=mask_image,
            control_image=segmentation_cond_image,
        ).images[0]

        design_image = generated_image.resize(
                (orig_w, orig_h), Image.Resampling.LANCZOS)

        plt.imshow(design_image)
        plt.axis('off')
        plt.show()

        if idx > 3:
            break

In [None]:
neg_const_lst = []

In [None]:
pos_const_lst = []

In [None]:
gen_image(neg_prompt)

### 30% wall masking + No extra prompts
control_items_to_mask = ["stairs;steps", "step;stair", "stairway;staircase", "radiator", "screen;door;screen", "windowpane;window", "door;double;door", "countertop", "fireplace;hearth;open;fireplace",]

In [None]:
control_items_mask = ["stairs;steps", "step;stair", "stairway;staircase", "radiator", "screen;door;screen", \
                 "windowpane;window", "door;double;door", "countertop", "fireplace;hearth;open;fireplace",]

In [None]:
control_items_retain = ["floor;flooring", "rug;carpet;carpeting", "wall", "ceiling"]

In [None]:
def filter_items_mask(colors_list,items_list,items_to_mask):
    """
    Filters items and their corresponding colors from given lists, excluding
    specified items.

    Args:
        colors_list: A list or numpy array of colors corresponding to items.
        items_list: A list or numpy array of items.
        items_to_remove: A list or numpy array of items to be removed.

    Returns:
        A tuple of two lists or numpy arrays: filtered colors and filtered
        items.
    """
    filtered_colors = []
    filtered_items = []
    for color, item in zip(colors_list, items_list):
        if item not in items_to_mask:
            filtered_colors.append(color)
            filtered_items.append(item)
    return filtered_colors, filtered_items

In [None]:
def filter_items_retain(colors_list,items_list,items_to_retain):
    """
    Filters items and their corresponding colors from given lists, excluding
    specified items.

    Args:
        colors_list: A list or numpy array of colors corresponding to items.
        items_list: A list or numpy array of items.
        items_to_remove: A list or numpy array of items to be removed.

    Returns:
        A tuple of two lists or numpy arrays: filtered colors and filtered
        items.
    """
    filtered_colors = []
    filtered_items = []
    for color, item in zip(colors_list, items_list):
        if item in items_to_retain:
            filtered_colors.append(color)
            filtered_items.append(item)
    return filtered_colors, filtered_items

In [None]:
def gen_image(neg_prompt):
    for idx, row in demo_dataset.iterrows():

        image_name = row['image']
        pos_prompt = row['prompt']
        # pos_prompt = pos_prompt.replace('plants','plants++').replace('plant', 'plant++')

        empty_room_image = Image.open('demo_dataset/'+image_name)
        orig_w, orig_h = empty_room_image.size
        new_width, new_height = resize_dimensions(empty_room_image.size, 768)
        input_image = empty_room_image.resize((new_width, new_height))

        plt.imshow(input_image)
        plt.axis('off')
        plt.show()
        print("Prompt:", pos_prompt)

        real_seg = np.array(segment_image(input_image,
                                      seg_image_processor,
                                      image_segmentor))
        unique_colors = np.unique(real_seg.reshape(-1, real_seg.shape[2]), axis=0)
        unique_colors = [tuple(color) for color in unique_colors]
        segment_items = [map_colors_rgb(i) for i in unique_colors]
        chosen_colors, segment_items_1 = filter_items_mask(
            colors_list=unique_colors,
            items_list=segment_items,
            items_to_mask=control_items_mask,
            # items_to_retain=control_items_retain
        )
        mask = np.zeros_like(real_seg)
        for color in chosen_colors:
            color_matches = (real_seg == color).all(axis=2)
            mask[color_matches] = 1

        plt.imshow((mask*255).astype(int))
        plt.axis('off')
        plt.show()

        image_np = np.array(input_image)
        image = Image.fromarray(image_np).convert("RGB")
        segmentation_cond_image = Image.fromarray(real_seg).convert("RGB")
        mask_image = Image.fromarray((mask * 255).astype(np.uint8)).convert("RGB")

        mask_0_array = (mask * 255).astype(np.uint8)
        mask_1_image = Image.fromarray(mask_0_array).convert("L")
        mask_1_array = np.array(mask_1_image)

        object_items_2 = ["wall"]
        chosen_colors_2, segment_items_2 = filter_items_mask(
            colors_list=unique_colors,
            items_list=segment_items,
            items_to_mask=object_items_2,
            # items_to_retain=control_items_retain
        )
        mask_2 = np.zeros_like(real_seg)
        for color in chosen_colors_2:
            color_matches = (real_seg == color).all(axis=2)
            mask_2[color_matches] = 1

        mask_2_array = (mask_2 * 255).astype(np.uint8)
        mask_2_image = Image.fromarray(mask_2_array).convert("L")

        # Find the wall height for each column of the image
        mask_3_array = np.array(mask_2_image)
        wall_heights = []
        for col in range(mask_3_array.shape[1]):
            # Find the black pixelsfrom the top of the column
            black_indices = np.nonzero(mask_3_array[:, col] == 0)[0]
            if black_indices.size == 0:
                min_ = 0
                max_ = 6
            else:
                max_ = max(black_indices)
                min_ = min(black_indices)
            tup = (min_, max_)
            wall_heights.append(tup)

        height, width = mask_3_array.shape
        white_image_array = np.full((height, width), 255, dtype=np.uint8)

        for col_idx, coords in enumerate(wall_heights):
            min_, max_ = coords
            wall_ht = max_ - min_
            mask_wall_ht = int(0.30 * (wall_ht))
            new_max_ = min_ + mask_wall_ht
            for col in range(white_image_array.shape[1]):
                white_image_array[min_: new_max_, col_idx] = 0

        print(f'mask_1_array.shape : {mask_1_array.shape} & white_image_array.shape : {white_image_array.shape}')

        combined_mask_array = cv2.bitwise_and(mask_1_array, white_image_array)
        final_mask_image = Image.fromarray((combined_mask_array).astype(np.uint8)).convert("RGB")
        plt.imshow((combined_mask_array).astype(np.uint8))
        plt.axis('off')
        plt.show()

        pos_prompt += f', {additional_quality_suffix},'
        for pos_const in pos_const_lst:
            pos_prompt = pos_prompt + f'{pos_const}'
        if idx == 0:
            print("Final POS Prompt:", pos_prompt)

        for neg_const in neg_const_lst:
            neg_prompt = neg_prompt + f', {neg_const}'
        if idx == 0:
            print("Final NEG Prompt:", neg_prompt)

        prompt_lst = [pos_prompt, neg_prompt]
        prompt_token_lst = []
        for prompt in prompt_lst:
            prompt_dict = tokenize_function(prompt)
            prompt_token_lst.append(prompt_dict)
        prompt_tensors = data_collator(prompt_token_lst)
        prompt_ids = prompt_tensors['input_ids']
        pos_prompt_ids = prompt_ids[0, :].unsqueeze(0)
        neg_prompt_ids = prompt_ids[1, :].unsqueeze(0)
        pos_prompt_embed, neg_prompt_embed = get_pipeline_embeds_mod(pos_prompt_ids, neg_prompt_ids)
        print("prompt embedding shape", pos_prompt_embed.size(), neg_prompt_embed.size())

        generated_image = pipeline(
            prompt_embeds=pos_prompt_embed,
            negative_prompt_embeds=neg_prompt_embed,
            num_inference_steps=50,
            strength=1.0,
            guidance_scale=7.0,
            generator=[torch.Generator(device="cuda").manual_seed(seed)],
            image=image,
            mask_image=final_mask_image,
            control_image=segmentation_cond_image,
        ).images[0]

        design_image = generated_image.resize(
                (orig_w, orig_h), Image.Resampling.LANCZOS)

        plt.imshow(design_image)
        plt.axis('off')
        plt.show()

        # if idx > 3:
        #     break

In [None]:
neg_const_lst = []

In [None]:
pos_const_lst = []

In [None]:
gen_image(neg_prompt)

### 30% wall masking + No extra prompts
control_items_retain = ["floor;flooring", "rug;carpet;carpeting", "wall", "ceiling"]

In [None]:
control_items_retain = ["floor;flooring", "rug;carpet;carpeting", "wall", "ceiling"]

In [None]:
neg_const_lst = []

In [None]:
pos_const_lst = []

In [None]:
def gen_image(neg_prompt):
    for idx, row in demo_dataset.iterrows():

        image_name = row['image']
        pos_prompt = row['prompt']
        # pos_prompt = pos_prompt.replace('plants','plants++').replace('plant', 'plant++')

        empty_room_image = Image.open('demo_dataset/'+image_name)
        orig_w, orig_h = empty_room_image.size
        new_width, new_height = resize_dimensions(empty_room_image.size, 768)
        input_image = empty_room_image.resize((new_width, new_height))

        plt.imshow(input_image)
        plt.axis('off')
        plt.show()
        print("Prompt:", pos_prompt)

        real_seg = np.array(segment_image(input_image,
                                      seg_image_processor,
                                      image_segmentor))
        unique_colors = np.unique(real_seg.reshape(-1, real_seg.shape[2]), axis=0)
        unique_colors = [tuple(color) for color in unique_colors]
        segment_items = [map_colors_rgb(i) for i in unique_colors]
        chosen_colors, segment_items_1 = filter_items_retain(
            colors_list=unique_colors,
            items_list=segment_items,
            # items_to_mask=control_items_mask,
            items_to_retain=control_items_retain
        )
        mask = np.zeros_like(real_seg)
        for color in chosen_colors:
            color_matches = (real_seg == color).all(axis=2)
            mask[color_matches] = 1

        plt.imshow((mask*255).astype(int))
        plt.axis('off')
        plt.show()

        image_np = np.array(input_image)
        image = Image.fromarray(image_np).convert("RGB")
        segmentation_cond_image = Image.fromarray(real_seg).convert("RGB")
        mask_image = Image.fromarray((mask * 255).astype(np.uint8)).convert("RGB")

        mask_0_array = (mask * 255).astype(np.uint8)
        mask_1_image = Image.fromarray(mask_0_array).convert("L")
        mask_1_array = np.array(mask_1_image)

        object_items_2 = ["wall"]
        chosen_colors_2, segment_items_2 = filter_items_retain(
            colors_list=unique_colors,
            items_list=segment_items,
            # items_to_mask=object_items_2,
            items_to_retain=object_items_2
        )
        mask_2 = np.zeros_like(real_seg)
        for color in chosen_colors_2:
            color_matches = (real_seg == color).all(axis=2)
            mask_2[color_matches] = 1

        mask_2_array = (mask_2 * 255).astype(np.uint8)
        mask_2_image = Image.fromarray(mask_2_array).convert("L")

        # Find the wall height for each column of the image
        mask_3_array = np.array(mask_2_image)
        wall_heights = []
        for col in range(mask_3_array.shape[1]):
            # Find the black pixelsfrom the top of the column
            black_indices = np.nonzero(mask_3_array[:, col] == 0)[0]
            if black_indices.size == 0:
                min_ = 0
                max_ = 6
            else:
                max_ = max(black_indices)
                min_ = min(black_indices)
            tup = (min_, max_)
            wall_heights.append(tup)

        height, width = mask_3_array.shape
        white_image_array = np.full((height, width), 255, dtype=np.uint8)

        for col_idx, coords in enumerate(wall_heights):
            min_, max_ = coords
            wall_ht = max_ - min_
            mask_wall_ht = int(0.30 * (wall_ht))
            new_max_ = min_ + mask_wall_ht
            for col in range(white_image_array.shape[1]):
                white_image_array[min_: new_max_, col_idx] = 0

        print(f'mask_1_array.shape : {mask_1_array.shape} & white_image_array.shape : {white_image_array.shape}')

        combined_mask_array = cv2.bitwise_and(mask_1_array, white_image_array)
        final_mask_image = Image.fromarray((combined_mask_array).astype(np.uint8)).convert("RGB")
        plt.imshow((combined_mask_array).astype(np.uint8))
        plt.axis('off')
        plt.show()

        pos_prompt += f', {additional_quality_suffix},'
        for pos_const in pos_const_lst:
            pos_prompt = pos_prompt + f'{pos_const}'
        if idx == 0:
            print("Final POS Prompt:", pos_prompt)

        for neg_const in neg_const_lst:
            neg_prompt = neg_prompt + f', {neg_const}'
        if idx == 0:
            print("Final NEG Prompt:", neg_prompt)

        prompt_lst = [pos_prompt, neg_prompt]
        prompt_token_lst = []
        for prompt in prompt_lst:
            prompt_dict = tokenize_function(prompt)
            prompt_token_lst.append(prompt_dict)
        prompt_tensors = data_collator(prompt_token_lst)
        prompt_ids = prompt_tensors['input_ids']
        pos_prompt_ids = prompt_ids[0, :].unsqueeze(0)
        neg_prompt_ids = prompt_ids[1, :].unsqueeze(0)
        pos_prompt_embed, neg_prompt_embed = get_pipeline_embeds_mod(pos_prompt_ids, neg_prompt_ids)
        print("prompt embedding shape", pos_prompt_embed.size(), neg_prompt_embed.size())

        generated_image = pipeline(
            prompt_embeds=pos_prompt_embed,
            negative_prompt_embeds=neg_prompt_embed,
            num_inference_steps=50,
            strength=1.0,
            guidance_scale=7.0,
            generator=[torch.Generator(device="cuda").manual_seed(seed)],
            image=image,
            mask_image=final_mask_image,
            control_image=segmentation_cond_image,
        ).images[0]

        design_image = generated_image.resize(
                (orig_w, orig_h), Image.Resampling.LANCZOS)

        plt.imshow(design_image)
        plt.axis('off')
        plt.show()

        # if idx > 3:
        #     break

In [None]:
gen_image(neg_prompt)

### AI Crowd submission - Mar 18 with pytorch_lora_weights_run1803.safetensors & 30% wall masking
control_items_to_mask = ["stairs;steps", "step;stair", "stairway;staircase", "radiator", "screen;door;screen", "windowpane;window", "door;double;door", "countertop", "fireplace;hearth;open;fireplace",]
**No extra prompts**

In [None]:
!python3 '/int_ch/local_evaluation.py'

  torch.utils._pytree._register_pytree_node(
  torch.utils._pytree._register_pytree_node(
The config attributes {'dropout': 0.0, 'sample_size': 32} were passed to ControlNetModel, but are not expected and will be ignored. Please verify your config.json configuration file.
Loading pipeline components...: 100%|█████████████| 6/6 [00:03<00:00,  1.88it/s]
You have disabled the safety checker for <class 'diffusers.pipelines.controlnet.pipeline_controlnet_inpaint.StableDiffusionControlNetInpaintPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/di

### 20% , 10% wall masking + No extra prompts
control_items_to_mask = ["stairs;steps", "step;stair", "stairway;staircase", "radiator", "screen;door;screen", "windowpane;window", "door;double;door", "countertop", "fireplace;hearth;open;fireplace",]

In [None]:
control_items_mask = ["stairs;steps", "step;stair", "stairway;staircase", "radiator", "screen;door;screen", \
                 "windowpane;window", "door;double;door", "countertop", "fireplace;hearth;open;fireplace", "column;pillar"]

In [None]:
control_items_retain = ["floor;flooring", "rug;carpet;carpeting", "wall", "ceiling"]

In [None]:
def filter_items_mask(colors_list,items_list,items_to_mask):
    """
    Filters items and their corresponding colors from given lists, excluding
    specified items.

    Args:
        colors_list: A list or numpy array of colors corresponding to items.
        items_list: A list or numpy array of items.
        items_to_remove: A list or numpy array of items to be removed.

    Returns:
        A tuple of two lists or numpy arrays: filtered colors and filtered
        items.
    """
    filtered_colors = []
    filtered_items = []
    for color, item in zip(colors_list, items_list):
        if item not in items_to_mask:
            filtered_colors.append(color)
            filtered_items.append(item)
    return filtered_colors, filtered_items

In [None]:
def filter_items_retain(colors_list,items_list,items_to_retain):
    """
    Filters items and their corresponding colors from given lists, excluding
    specified items.

    Args:
        colors_list: A list or numpy array of colors corresponding to items.
        items_list: A list or numpy array of items.
        items_to_remove: A list or numpy array of items to be removed.

    Returns:
        A tuple of two lists or numpy arrays: filtered colors and filtered
        items.
    """
    filtered_colors = []
    filtered_items = []
    for color, item in zip(colors_list, items_list):
        if item in items_to_retain:
            filtered_colors.append(color)
            filtered_items.append(item)
    return filtered_colors, filtered_items

In [None]:
def gen_image(neg_prompt):
    for idx, row in demo_dataset.iterrows():

        image_name = row['image']
        pos_prompt = row['prompt']
        # pos_prompt = pos_prompt.replace('plants','plants++').replace('plant', 'plant++')

        empty_room_image = Image.open('demo_dataset/'+image_name)
        orig_w, orig_h = empty_room_image.size
        new_width, new_height = resize_dimensions(empty_room_image.size, 768)
        input_image = empty_room_image.resize((new_width, new_height))

        plt.imshow(input_image)
        plt.axis('off')
        plt.show()
        print("Prompt:", pos_prompt)

        real_seg = np.array(segment_image(input_image,
                                      seg_image_processor,
                                      image_segmentor))
        unique_colors = np.unique(real_seg.reshape(-1, real_seg.shape[2]), axis=0)
        unique_colors = [tuple(color) for color in unique_colors]
        segment_items = [map_colors_rgb(i) for i in unique_colors]
        chosen_colors, segment_items_1 = filter_items_mask(
            colors_list=unique_colors,
            items_list=segment_items,
            items_to_mask=control_items_mask,
            # items_to_retain=control_items_retain
        )
        mask = np.zeros_like(real_seg)
        for color in chosen_colors:
            color_matches = (real_seg == color).all(axis=2)
            mask[color_matches] = 1

        plt.imshow((mask*255).astype(int))
        plt.axis('off')
        plt.show()

        image_np = np.array(input_image)
        image = Image.fromarray(image_np).convert("RGB")
        segmentation_cond_image = Image.fromarray(real_seg).convert("RGB")
        mask_image = Image.fromarray((mask * 255).astype(np.uint8)).convert("RGB")

        mask_0_array = (mask * 255).astype(np.uint8)
        mask_1_image = Image.fromarray(mask_0_array).convert("L")
        mask_1_array = np.array(mask_1_image)

        object_items_2 = ["wall"]
        chosen_colors_2, segment_items_2 = filter_items_mask(
            colors_list=unique_colors,
            items_list=segment_items,
            items_to_mask=object_items_2,
            # items_to_retain=control_items_retain
        )
        mask_2 = np.zeros_like(real_seg)
        for color in chosen_colors_2:
            color_matches = (real_seg == color).all(axis=2)
            mask_2[color_matches] = 1

        mask_2_array = (mask_2 * 255).astype(np.uint8)
        mask_2_image = Image.fromarray(mask_2_array).convert("L")

        # Find the wall height for each column of the image
        mask_3_array = np.array(mask_2_image)
        wall_heights = []
        for col in range(mask_3_array.shape[1]):
            # Find the black pixelsfrom the top of the column
            black_indices = np.nonzero(mask_3_array[:, col] == 0)[0]
            if black_indices.size == 0:
                min_ = 0
                max_ = 6
            else:
                max_ = max(black_indices)
                min_ = min(black_indices)
            tup = (min_, max_)
            wall_heights.append(tup)

        height, width = mask_3_array.shape
        white_image_array = np.full((height, width), 255, dtype=np.uint8)

        for col_idx, coords in enumerate(wall_heights):
            min_, max_ = coords
            wall_ht = max_ - min_
            mask_wall_ht = int(0.30 * (wall_ht))
            new_max_ = min_ + mask_wall_ht
            for col in range(white_image_array.shape[1]):
                white_image_array[min_: new_max_, col_idx] = 0

        print(f'mask_1_array.shape : {mask_1_array.shape} & white_image_array.shape : {white_image_array.shape}')

        combined_mask_array = cv2.bitwise_and(mask_1_array, white_image_array)
        final_mask_image = Image.fromarray((combined_mask_array).astype(np.uint8)).convert("RGB")
        plt.imshow((combined_mask_array).astype(np.uint8))
        plt.axis('off')
        plt.show()

        pos_prompt += f', {additional_quality_suffix},'
        for pos_const in pos_const_lst:
            pos_prompt = pos_prompt + f'{pos_const}'
        if idx == 0:
            print("Final POS Prompt:", pos_prompt)

        for neg_const in neg_const_lst:
            neg_prompt = neg_prompt + f', {neg_const}'
        if idx == 0:
            print("Final NEG Prompt:", neg_prompt)

        prompt_lst = [pos_prompt, neg_prompt]
        prompt_token_lst = []
        for prompt in prompt_lst:
            prompt_dict = tokenize_function(prompt)
            prompt_token_lst.append(prompt_dict)
        prompt_tensors = data_collator(prompt_token_lst)
        prompt_ids = prompt_tensors['input_ids']
        pos_prompt_ids = prompt_ids[0, :].unsqueeze(0)
        neg_prompt_ids = prompt_ids[1, :].unsqueeze(0)
        pos_prompt_embed, neg_prompt_embed = get_pipeline_embeds_mod(pos_prompt_ids, neg_prompt_ids)
        print("prompt embedding shape", pos_prompt_embed.size(), neg_prompt_embed.size())

        generated_image = pipeline(
            prompt_embeds=pos_prompt_embed,
            negative_prompt_embeds=neg_prompt_embed,
            num_inference_steps=50,
            strength=1.0,
            guidance_scale=7.0,
            generator=[torch.Generator(device="cuda").manual_seed(seed)],
            image=image,
            mask_image=final_mask_image,
            control_image=segmentation_cond_image,
        ).images[0]

        design_image = generated_image.resize(
                (orig_w, orig_h), Image.Resampling.LANCZOS)

        plt.imshow(design_image)
        plt.axis('off')
        plt.show()

        # if idx > 3:
        #     break

In [None]:
neg_const_lst = []

In [None]:
pos_const_lst = []

In [None]:
gen_image(neg_prompt)

In [None]:
def gen_image_aicrowd(neg_prompt):
    for idx, row in aicrowd_dataset.iterrows():

        image_name = row['img_path']
        pos_prompt = row['text_prompt']
        # pos_prompt = pos_prompt.replace('plants','plants++').replace('plant', 'plant++')

        empty_room_image = Image.open('development_data-v0.2/'+image_name)
        orig_w, orig_h = empty_room_image.size
        new_width, new_height = resize_dimensions(empty_room_image.size, 768)
        input_image = empty_room_image.resize((new_width, new_height))

        plt.imshow(input_image)
        plt.axis('off')
        plt.show()
        print("Prompt:", pos_prompt)

        real_seg = np.array(segment_image(input_image,
                                      seg_image_processor,
                                      image_segmentor))
        unique_colors = np.unique(real_seg.reshape(-1, real_seg.shape[2]), axis=0)
        unique_colors = [tuple(color) for color in unique_colors]
        segment_items = [map_colors_rgb(i) for i in unique_colors]
        chosen_colors, segment_items_1 = filter_items_mask(
            colors_list=unique_colors,
            items_list=segment_items,
            items_to_mask=control_items_mask,
            # items_to_retain=control_items_retain
        )
        mask = np.zeros_like(real_seg)
        for color in chosen_colors:
            color_matches = (real_seg == color).all(axis=2)
            mask[color_matches] = 1

        plt.imshow((mask*255).astype(int))
        plt.axis('off')
        plt.show()

        image_np = np.array(input_image)
        image = Image.fromarray(image_np).convert("RGB")
        segmentation_cond_image = Image.fromarray(real_seg).convert("RGB")
        mask_image = Image.fromarray((mask * 255).astype(np.uint8)).convert("RGB")

        mask_0_array = (mask * 255).astype(np.uint8)
        mask_1_image = Image.fromarray(mask_0_array).convert("L")
        mask_1_array = np.array(mask_1_image)

        object_items_2 = ["wall"]
        chosen_colors_2, segment_items_2 = filter_items_mask(
            colors_list=unique_colors,
            items_list=segment_items,
            items_to_mask=object_items_2,
            # items_to_retain=control_items_retain
        )
        mask_2 = np.zeros_like(real_seg)
        for color in chosen_colors_2:
            color_matches = (real_seg == color).all(axis=2)
            mask_2[color_matches] = 1

        mask_2_array = (mask_2 * 255).astype(np.uint8)
        mask_2_image = Image.fromarray(mask_2_array).convert("L")

        # Find the wall height for each column of the image
        mask_3_array = np.array(mask_2_image)
        wall_heights = []
        for col in range(mask_3_array.shape[1]):
            # Find the black pixelsfrom the top of the column
            black_indices = np.nonzero(mask_3_array[:, col] == 0)[0]
            if black_indices.size == 0:
                min_ = 0
                max_ = 6
            else:
                max_ = max(black_indices)
                min_ = min(black_indices)
            tup = (min_, max_)
            wall_heights.append(tup)

        height, width = mask_3_array.shape
        white_image_array = np.full((height, width), 255, dtype=np.uint8)

        for col_idx, coords in enumerate(wall_heights):
            min_, max_ = coords
            wall_ht = max_ - min_
            mask_wall_ht = int(0.20 * (wall_ht))
            new_max_ = min_ + mask_wall_ht
            for col in range(white_image_array.shape[1]):
                white_image_array[min_: new_max_, col_idx] = 0

        print(f'mask_1_array.shape : {mask_1_array.shape} & white_image_array.shape : {white_image_array.shape}')

        combined_mask_array = cv2.bitwise_and(mask_1_array, white_image_array)
        final_mask_image = Image.fromarray((combined_mask_array).astype(np.uint8)).convert("RGB")
        plt.imshow((combined_mask_array).astype(np.uint8))
        plt.axis('off')
        plt.show()

        pos_prompt += f', {additional_quality_suffix},'
        for pos_const in pos_const_lst:
            pos_prompt = pos_prompt + f'{pos_const}'
        if idx == 0:
            print("Final POS Prompt:", pos_prompt)

        for neg_const in neg_const_lst:
            neg_prompt = neg_prompt + f', {neg_const}'
        if idx == 0:
            print("Final NEG Prompt:", neg_prompt)

        prompt_lst = [pos_prompt, neg_prompt]
        prompt_token_lst = []
        for prompt in prompt_lst:
            prompt_dict = tokenize_function(prompt)
            prompt_token_lst.append(prompt_dict)
        prompt_tensors = data_collator(prompt_token_lst)
        prompt_ids = prompt_tensors['input_ids']
        pos_prompt_ids = prompt_ids[0, :].unsqueeze(0)
        neg_prompt_ids = prompt_ids[1, :].unsqueeze(0)
        pos_prompt_embed, neg_prompt_embed = get_pipeline_embeds_mod(pos_prompt_ids, neg_prompt_ids)
        print("prompt embedding shape", pos_prompt_embed.size(), neg_prompt_embed.size())

        generated_image = pipeline(
            prompt_embeds=pos_prompt_embed,
            negative_prompt_embeds=neg_prompt_embed,
            num_inference_steps=50,
            strength=1.0,
            guidance_scale=7.0,
            generator=[torch.Generator(device="cuda").manual_seed(seed)],
            image=image,
            mask_image=final_mask_image,
            control_image=segmentation_cond_image,
        ).images[0]

        design_image = generated_image.resize(
                (orig_w, orig_h), Image.Resampling.LANCZOS)

        plt.imshow(design_image)
        plt.axis('off')
        plt.show()

        # if idx > 3:
        #     break

In [None]:
gen_image_aicrowd(neg_prompt)

In [None]:
def gen_image_aicrowd_5pct(neg_prompt):
    for idx, row in aicrowd_dataset.iterrows():

        image_name = row['img_path']
        pos_prompt = row['text_prompt']
        # pos_prompt = pos_prompt.replace('plants','plants++').replace('plant', 'plant++')

        empty_room_image = Image.open('development_data-v0.2/'+image_name)
        orig_w, orig_h = empty_room_image.size
        new_width, new_height = resize_dimensions(empty_room_image.size, 768)
        input_image = empty_room_image.resize((new_width, new_height))

        plt.imshow(input_image)
        plt.axis('off')
        plt.show()
        print("Prompt:", pos_prompt)

        real_seg = np.array(segment_image(input_image,
                                      seg_image_processor,
                                      image_segmentor))
        unique_colors = np.unique(real_seg.reshape(-1, real_seg.shape[2]), axis=0)
        unique_colors = [tuple(color) for color in unique_colors]
        segment_items = [map_colors_rgb(i) for i in unique_colors]
        chosen_colors, segment_items_1 = filter_items_mask(
            colors_list=unique_colors,
            items_list=segment_items,
            items_to_mask=control_items_mask,
            # items_to_retain=control_items_retain
        )
        mask = np.zeros_like(real_seg)
        for color in chosen_colors:
            color_matches = (real_seg == color).all(axis=2)
            mask[color_matches] = 1

        plt.imshow((mask*255).astype(int))
        plt.axis('off')
        plt.show()

        image_np = np.array(input_image)
        image = Image.fromarray(image_np).convert("RGB")
        segmentation_cond_image = Image.fromarray(real_seg).convert("RGB")
        mask_image = Image.fromarray((mask * 255).astype(np.uint8)).convert("RGB")

        mask_0_array = (mask * 255).astype(np.uint8)
        mask_1_image = Image.fromarray(mask_0_array).convert("L")
        mask_1_array = np.array(mask_1_image)

        object_items_2 = ["wall"]
        chosen_colors_2, segment_items_2 = filter_items_mask(
            colors_list=unique_colors,
            items_list=segment_items,
            items_to_mask=object_items_2,
            # items_to_retain=control_items_retain
        )
        mask_2 = np.zeros_like(real_seg)
        for color in chosen_colors_2:
            color_matches = (real_seg == color).all(axis=2)
            mask_2[color_matches] = 1

        mask_2_array = (mask_2 * 255).astype(np.uint8)
        mask_2_image = Image.fromarray(mask_2_array).convert("L")

        # Find the wall height for each column of the image
        mask_3_array = np.array(mask_2_image)
        wall_heights = []
        for col in range(mask_3_array.shape[1]):
            # Find the black pixelsfrom the top of the column
            black_indices = np.nonzero(mask_3_array[:, col] == 0)[0]
            if black_indices.size == 0:
                min_ = 0
                max_ = 6
            else:
                max_ = max(black_indices)
                min_ = min(black_indices)
            tup = (min_, max_)
            wall_heights.append(tup)

        height, width = mask_3_array.shape
        white_image_array = np.full((height, width), 255, dtype=np.uint8)

        for col_idx, coords in enumerate(wall_heights):
            min_, max_ = coords
            wall_ht = max_ - min_
            mask_wall_ht = int(0.05 * (wall_ht))
            new_max_ = min_ + mask_wall_ht
            for col in range(white_image_array.shape[1]):
                white_image_array[min_: new_max_, col_idx] = 0

        print(f'mask_1_array.shape : {mask_1_array.shape} & white_image_array.shape : {white_image_array.shape}')

        combined_mask_array = cv2.bitwise_and(mask_1_array, white_image_array)
        final_mask_image = Image.fromarray((combined_mask_array).astype(np.uint8)).convert("RGB")
        plt.imshow((combined_mask_array).astype(np.uint8))
        plt.axis('off')
        plt.show()

        pos_prompt += f', {additional_quality_suffix},'
        for pos_const in pos_const_lst:
            pos_prompt = pos_prompt + f'{pos_const}'
        if idx == 0:
            print("Final POS Prompt:", pos_prompt)

        for neg_const in neg_const_lst:
            neg_prompt = neg_prompt + f', {neg_const}'
        if idx == 0:
            print("Final NEG Prompt:", neg_prompt)

        prompt_lst = [pos_prompt, neg_prompt]
        prompt_token_lst = []
        for prompt in prompt_lst:
            prompt_dict = tokenize_function(prompt)
            prompt_token_lst.append(prompt_dict)
        prompt_tensors = data_collator(prompt_token_lst)
        prompt_ids = prompt_tensors['input_ids']
        pos_prompt_ids = prompt_ids[0, :].unsqueeze(0)
        neg_prompt_ids = prompt_ids[1, :].unsqueeze(0)
        pos_prompt_embed, neg_prompt_embed = get_pipeline_embeds_mod(pos_prompt_ids, neg_prompt_ids)
        print("prompt embedding shape", pos_prompt_embed.size(), neg_prompt_embed.size())

        generated_image = pipeline(
            prompt_embeds=pos_prompt_embed,
            negative_prompt_embeds=neg_prompt_embed,
            num_inference_steps=50,
            strength=1.0,
            guidance_scale=7.0,
            generator=[torch.Generator(device="cuda").manual_seed(seed)],
            image=image,
            mask_image=final_mask_image,
            control_image=segmentation_cond_image,
        ).images[0]

        design_image = generated_image.resize(
                (orig_w, orig_h), Image.Resampling.LANCZOS)

        plt.imshow(design_image)
        plt.axis('off')
        plt.show()

        # if idx > 3:
        #     break

In [None]:
gen_image_aicrowd_5pct(neg_prompt)

In [None]:
pos_const_lst = [pos_const7, pos_const8]
neg_const_lst = [neg_const5]
pos_const_lst = []
neg_const_lst = []

In [None]:
def gen_image_aicrowd_10pct(neg_prompt):
    for idx, row in aicrowd_dataset.iterrows():

        image_name = row['img_path']
        pos_prompt = row['text_prompt']
        # pos_prompt = pos_prompt.replace('plants','plants++').replace('plant', 'plant++')

        empty_room_image = Image.open('development_data-v0.2/'+image_name)
        orig_w, orig_h = empty_room_image.size
        new_width, new_height = resize_dimensions(empty_room_image.size, 768)
        input_image = empty_room_image.resize((new_width, new_height))

        plt.imshow(input_image)
        plt.axis('off')
        plt.show()
        print(f"idx : {idx} Prompt: {pos_prompt}")

        real_seg = np.array(segment_image(input_image,
                                      seg_image_processor,
                                      image_segmentor))
        unique_colors = np.unique(real_seg.reshape(-1, real_seg.shape[2]), axis=0)
        unique_colors = [tuple(color) for color in unique_colors]
        segment_items = [map_colors_rgb(i) for i in unique_colors]
        chosen_colors, segment_items_1 = filter_items_mask(
            colors_list=unique_colors,
            items_list=segment_items,
            items_to_mask=control_items_mask,
            # items_to_retain=control_items_retain
        )
        if idx == 0:
            print(f'control_items_mask : {control_items_mask}')

        print(f'segment_items for inpainting : {segment_items_1}')

        mask = np.zeros_like(real_seg)
        for color in chosen_colors:
            color_matches = (real_seg == color).all(axis=2)
            mask[color_matches] = 1

        plt.imshow((mask*255).astype(int))
        plt.axis('off')
        plt.show()

        image_np = np.array(input_image)
        image = Image.fromarray(image_np).convert("RGB")
        segmentation_cond_image = Image.fromarray(real_seg).convert("RGB")
        mask_image = Image.fromarray((mask * 255).astype(np.uint8)).convert("RGB")

        mask_0_array = (mask * 255).astype(np.uint8)
        mask_1_image = Image.fromarray(mask_0_array).convert("L")
        mask_1_array = np.array(mask_1_image)

        object_items_2 = ["wall"]
        chosen_colors_2, segment_items_2 = filter_items_mask(
            colors_list=unique_colors,
            items_list=segment_items,
            items_to_mask=object_items_2,
            # items_to_retain=control_items_retain
        )
        mask_2 = np.zeros_like(real_seg)
        for color in chosen_colors_2:
            color_matches = (real_seg == color).all(axis=2)
            mask_2[color_matches] = 1

        mask_2_array = (mask_2 * 255).astype(np.uint8)
        mask_2_image = Image.fromarray(mask_2_array).convert("L")

        # Find the wall height for each column of the image
        mask_3_array = np.array(mask_2_image)
        wall_heights = []
        for col in range(mask_3_array.shape[1]):
            # Find the black pixelsfrom the top of the column
            black_indices = np.nonzero(mask_3_array[:, col] == 0)[0]
            if black_indices.size == 0:
                min_ = 0
                max_ = 6
            else:
                max_ = max(black_indices)
                min_ = min(black_indices)
            tup = (min_, max_)
            wall_heights.append(tup)

        height, width = mask_3_array.shape
        white_image_array = np.full((height, width), 255, dtype=np.uint8)

        for col_idx, coords in enumerate(wall_heights):
            min_, max_ = coords
            wall_ht = max_ - min_
            mask_wall_ht = int(0.11 * (wall_ht))
            new_max_ = min_ + mask_wall_ht
            for col in range(white_image_array.shape[1]):
                white_image_array[min_: new_max_, col_idx] = 0

        print(f'mask_1_array.shape : {mask_1_array.shape} & white_image_array.shape : {white_image_array.shape}')

        combined_mask_array = cv2.bitwise_and(mask_1_array, white_image_array)
        final_mask_image = Image.fromarray((combined_mask_array).astype(np.uint8)).convert("RGB")
        plt.imshow((combined_mask_array).astype(np.uint8))
        plt.axis('off')
        plt.show()

        pos_prompt += f', {additional_quality_suffix},'
        for pos_const in pos_const_lst:
            pos_prompt = pos_prompt + f'{pos_const}'
        if idx == 0:
            print("Final POS Prompt:", pos_prompt)

        for neg_const in neg_const_lst:
            neg_prompt = neg_prompt + f', {neg_const}'
        if idx == 0:
            print("Final NEG Prompt:", neg_prompt)

        prompt_lst = [pos_prompt, neg_prompt]
        prompt_token_lst = []
        for prompt in prompt_lst:
            prompt_dict = tokenize_function(prompt)
            prompt_token_lst.append(prompt_dict)
        prompt_tensors = data_collator(prompt_token_lst)
        prompt_ids = prompt_tensors['input_ids']
        pos_prompt_ids = prompt_ids[0, :].unsqueeze(0)
        neg_prompt_ids = prompt_ids[1, :].unsqueeze(0)
        pos_prompt_embed, neg_prompt_embed = get_pipeline_embeds_mod(pos_prompt_ids, neg_prompt_ids)
        print("prompt embedding shape", pos_prompt_embed.size(), neg_prompt_embed.size())

        generated_image = pipeline(
            prompt_embeds=pos_prompt_embed,
            negative_prompt_embeds=neg_prompt_embed,
            num_inference_steps=50,
            strength=1.0,
            guidance_scale=7.0,
            generator=[torch.Generator(device="cuda").manual_seed(seed)],
            image=image,
            mask_image=final_mask_image,
            control_image=segmentation_cond_image,
        ).images[0]

        design_image = generated_image.resize(
                (orig_w, orig_h), Image.Resampling.LANCZOS)

        plt.imshow(design_image)
        plt.axis('off')
        plt.show()

        # if idx > 3:
        #     break

In [None]:
gen_image_aicrowd_10pct(neg_prompt)

### 20% wall masking + No extra prompts + Window border masking
control_items_mask = ["stairs;steps", "step;stair", "stairway;staircase", "radiator", "screen;door;screen", \
                 "door;double;door", "countertop", "fireplace;hearth;open;fireplace", "column;pillar"]

In [None]:
neg_prompt = "lowres, watermark, banner, logo, contactinfo, text, deformed, blurry, blur, \
out of focus, out of frame, surreal, ugly, distortion, low-res, poor quality, "
additional_quality_suffix = "interior design, 4K, high resolution"

neg_const1 = "cluttered space, Low ceilings, Mismatched colors and patterns, Poor lighting, Empty and uninviting space, "
neg_const2 = "furniture with illogical or non-functional designs, missing parts or unbalanced bases, "
neg_const3 = "plants growing inside furniture (e.g., stools, baskets), No plants defying gravity \
(e.g., floating or hanging unrealistically), "
neg_const4 = "furniture merging with other objects (e.g., sofa with table, bed with floor), "
neg_const5 = "columns and pillars not affixed to ground, columns and pillars defying gravity, "

pos_const1 = "Sofas, tables, bed cots, chairs if present in the image must have sturdy bases, "
pos_const2 = "Bed if present must be smooth and of queen size. Bed cot Headboard must be smooth, simple, "
pos_const3 = "Ceilings and walls must be smooth, "
pos_const4 = "Table/s if present must have a level top with a flat, stable surface made of glass, wooden or marble tops, "
pos_const5 = "Plants if present must be placed in proper terracotta pots, "
pos_const6 = "Furnitures and objects must maintain their distinct boundaries, "
pos_const7 = "Columns and pillars must have their bases firmly affixed to the floor, "
pos_const8 = "Scenaries outside windows should remain the same"

# pos_const_lst = [pos_const1, pos_const2, pos_const3, pos_const4, pos_const5, pos_const6]
pos_const_lst = []

# neg_const_lst = [neg_const1, neg_const2, neg_const3, neg_const4]
neg_const_lst = []

In [None]:
control_items_mask = ["stairs;steps", "step;stair", "stairway;staircase", "radiator", "screen;door;screen", \
                 "door;double;door", "countertop", "fireplace;hearth;open;fireplace", "column;pillar"]

In [None]:
control_items_retain = ["floor;flooring", "rug;carpet;carpeting", "wall", "ceiling"]

In [None]:
def filter_items_mask(colors_list,items_list,items_to_mask):
    """
    Filters items and their corresponding colors from given lists, excluding
    specified items.

    Args:
        colors_list: A list or numpy array of colors corresponding to items.
        items_list: A list or numpy array of items.
        items_to_remove: A list or numpy array of items to be removed.

    Returns:
        A tuple of two lists or numpy arrays: filtered colors and filtered
        items.
    """
    filtered_colors = []
    filtered_items = []
    for color, item in zip(colors_list, items_list):
        if item not in items_to_mask:
            filtered_colors.append(color)
            filtered_items.append(item)
    return filtered_colors, filtered_items

In [None]:
def filter_items_retain(colors_list,items_list,items_to_retain):
    """
    Filters items and their corresponding colors from given lists, excluding
    specified items.

    Args:
        colors_list: A list or numpy array of colors corresponding to items.
        items_list: A list or numpy array of items.
        items_to_remove: A list or numpy array of items to be removed.

    Returns:
        A tuple of two lists or numpy arrays: filtered colors and filtered
        items.
    """
    filtered_colors = []
    filtered_items = []
    for color, item in zip(colors_list, items_list):
        if item in items_to_retain:
            filtered_colors.append(color)
            filtered_items.append(item)
    return filtered_colors, filtered_items

In [None]:
idx_to_check = [999]

In [None]:
def gen_image_aicrowd_with_window_mask(neg_prompt, wall_ht_pct, win_border_thickness, win_border_colors):
    for idx, row in aicrowd_dataset.iterrows():
        if idx in idx_to_check:
            pass
        else:
            image_name = row['img_path']
            pos_prompt = row['text_prompt']
            # pos_prompt = pos_prompt.replace('plants','plants++').replace('plant', 'plant++')

            empty_room_image = Image.open('development_data-v0.2/'+image_name)
            orig_w, orig_h = empty_room_image.size
            new_width, new_height = resize_dimensions(empty_room_image.size, 768)
            input_image = empty_room_image.resize((new_width, new_height))

            plt.imshow(input_image)
            plt.axis('off')
            plt.show()
            print(f"idx : {idx} Prompt: {pos_prompt}")

            real_seg = np.array(segment_image(input_image,
                                          seg_image_processor,
                                          image_segmentor))
            unique_colors = np.unique(real_seg.reshape(-1, real_seg.shape[2]), axis=0)
            unique_colors = [tuple(color) for color in unique_colors]
            segment_items = [map_colors_rgb(i) for i in unique_colors]
            chosen_colors, segment_items_1 = filter_items_mask(
                colors_list=unique_colors,
                items_list=segment_items,
                items_to_mask=control_items_mask,
                # items_to_retain=control_items_retain
            )
            if idx == 0:
                print(f'control_items_mask : {control_items_mask}')

            print(f'segment_items for inpainting : {segment_items_1}')

            mask = np.zeros_like(real_seg)
            for color in chosen_colors:
                color_matches = (real_seg == color).all(axis=2)
                mask[color_matches] = 1

            plt.imshow((mask*255).astype(int))
            plt.axis('off')
            plt.show()

            image_np = np.array(input_image)
            image = Image.fromarray(image_np).convert("RGB")
            segmentation_cond_image = Image.fromarray(real_seg).convert("RGB")
            mask_image = Image.fromarray((mask * 255).astype(np.uint8)).convert("RGB")

            mask_0_array = (mask * 255).astype(np.uint8)
            mask_1_image = Image.fromarray(mask_0_array).convert("L")
            mask_1_array = np.array(mask_1_image)

            object_items_2 = ["wall"]
            chosen_colors_2, segment_items_2 = filter_items_mask(
                colors_list=unique_colors,
                items_list=segment_items,
                items_to_mask=object_items_2,
                # items_to_retain=control_items_retain
            )
            mask_2 = np.zeros_like(real_seg)
            for color in chosen_colors_2:
                color_matches = (real_seg == color).all(axis=2)
                mask_2[color_matches] = 1

            mask_2_array = (mask_2 * 255).astype(np.uint8)
            mask_2_image = Image.fromarray(mask_2_array).convert("L")

            # Find the wall height for each column of the image
            mask_3_array = np.array(mask_2_image)
            wall_heights = []
            for col in range(mask_3_array.shape[1]):
                # Find the black pixelsfrom the top of the column
                black_indices = np.nonzero(mask_3_array[:, col] == 0)[0]
                if black_indices.size == 0:
                    min_ = 0
                    max_ = 6
                else:
                    max_ = max(black_indices)
                    min_ = min(black_indices)
                tup = (min_, max_)
                wall_heights.append(tup)

            height, width = mask_3_array.shape
            white_image_array = np.full((height, width), 255, dtype=np.uint8)

            for col_idx, coords in enumerate(wall_heights):
                min_, max_ = coords
                wall_ht = max_ - min_
                mask_wall_ht = int(wall_ht_pct * (wall_ht))
                new_max_ = min_ + mask_wall_ht
                for col in range(white_image_array.shape[1]):
                    white_image_array[min_: new_max_, col_idx] = 0

            print(f'mask_1_array.shape : {mask_1_array.shape} & white_image_array.shape : {white_image_array.shape}')

            combined_mask_array = cv2.bitwise_and(mask_1_array, white_image_array)
            combined_mask_image = Image.fromarray((combined_mask_array).astype(np.uint8)).convert("RGB")
            plt.imshow((combined_mask_array).astype(np.uint8))
            plt.axis('off')
            plt.show()

            object_items_win = ["windowpane;window"]
            chosen_colors_win, segment_items_win = filter_items_retain(
                colors_list=unique_colors,
                items_list=segment_items,
                items_to_retain=object_items_win)

            win_color = np.array(chosen_colors_win)
            if win_color.shape == (0,):
                final_mask_image = combined_mask_image
            else:
                print(f'win_color.shape : {win_color.shape}, real_seg.shape : {real_seg.shape}')
                win_mask = cv2.inRange(real_seg, win_color, win_color)
                win_contours, _ = cv2.findContours(win_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                bordered_win_mask = cv2.drawContours(real_seg, win_contours, -1, (77, 77, 77), thickness=win_border_thickness)
                bordered_win_mask_image = Image.fromarray(bordered_win_mask)
                plt.imshow((bordered_win_mask).astype(np.uint8))
                plt.axis('off')
                plt.show()

                win_interim_mask = np.zeros_like(real_seg)
                for color in win_border_colors:
                    color_matches = (real_seg == color).all(axis=2)
                    win_interim_mask[color_matches] = 1

                win_interim_array = (win_interim_mask * 255).astype(np.uint8)
                win_interim_array = cv2.bitwise_not(win_interim_array)
                win_interim_image = Image.fromarray(win_interim_array).convert("L")
                win_interim_array = np.array(win_interim_image)

                final_mask_array = cv2.bitwise_and(combined_mask_array, win_interim_array)
                final_mask_image = Image.fromarray((final_mask_array).astype(np.uint8)).convert("RGB")
                plt.imshow((final_mask_array).astype(np.uint8))
                plt.axis('off')
                plt.show()

            pos_prompt += f', {additional_quality_suffix},'
            for pos_const in pos_const_lst:
                pos_prompt = pos_prompt + f'{pos_const}'
            if idx == 0:
                print("Final POS Prompt:", pos_prompt)

            for neg_const in neg_const_lst:
                neg_prompt = neg_prompt + f', {neg_const}'
            if idx == 0:
                print("Final NEG Prompt:", neg_prompt)

            prompt_lst = [pos_prompt, neg_prompt]
            prompt_token_lst = []
            for prompt in prompt_lst:
                prompt_dict = tokenize_function(prompt)
                prompt_token_lst.append(prompt_dict)
            prompt_tensors = data_collator(prompt_token_lst)
            prompt_ids = prompt_tensors['input_ids']
            pos_prompt_ids = prompt_ids[0, :].unsqueeze(0)
            neg_prompt_ids = prompt_ids[1, :].unsqueeze(0)
            pos_prompt_embed, neg_prompt_embed = get_pipeline_embeds_mod(pos_prompt_ids, neg_prompt_ids)
            print("prompt embedding shape", pos_prompt_embed.size(), neg_prompt_embed.size())

            generated_image = pipeline(
                prompt_embeds=pos_prompt_embed,
                negative_prompt_embeds=neg_prompt_embed,
                num_inference_steps=50,
                strength=1.0,
                guidance_scale=7.0,
                generator=[torch.Generator(device="cuda").manual_seed(seed)],
                image=image,
                mask_image=final_mask_image,
                control_image=segmentation_cond_image,
            ).images[0]

            design_image = generated_image.resize(
                    (orig_w, orig_h), Image.Resampling.LANCZOS)

            plt.imshow(design_image)
            plt.axis('off')
            plt.show()

        # if idx > 3:
        #     break

In [None]:
wall_ht_pct = 0.1
win_border_thickness = 10
win_border_colors = [(77, 77, 77)]
gen_image_aicrowd_with_window_mask(neg_prompt, wall_ht_pct, win_border_thickness, win_border_colors)