In [1]:
from mmseg.registry import DATASETS, MODELS
from mmengine.registry import init_default_scope
from mmengine import Config

cfg = Config.fromfile('configs/ldm_ad/test.py')
dataset_cfg = cfg.train_dataloader.pop('dataset')
init_default_scope('mmseg')
print(dataset_cfg)
dataset = DATASETS.build(dataset_cfg)


{'type': 'CityscapesWithAnomaliesDataset', 'data_root': 'data/cityscapes/', 'data_prefix': {'img_path': 'leftImg8bit/train', 'seg_map_path': 'gtFine/train'}, 'pipeline': [{'type': 'LoadImageFromFile'}, {'type': 'LoadAnnotations'}, {'type': 'ConcatAnomalies'}]}


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from PIL import Image
import numpy as np
import torch
import cv2

for i in range(dataset.num_anomalies):
    img = cv2.imread(f'samples/images/{i}.jpg')
    # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    mask = cv2.imread(f'samples/masks/{i}.jpg')
    mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
    
    dataset.anomalies[i] = {'image': img, 'mask': mask}

In [6]:
model_cfg = cfg.pop('model')
print(model_cfg)
model = MODELS.build(model_cfg)

{'type': 'EncoderDecoderLDM', 'data_preprocessor': {'type': 'SegDataPreProcessor', 'mean': [123.675, 116.28, 103.53], 'std': [58.395, 57.12, 57.375], 'bgr_to_rgb': True, 'pad_val': 0, 'seg_pad_val': 255, 'size': (512, 1024), 'test_cfg': {'size_divisor': 32}}, 'backbone': {'type': 'ResNet', 'depth': 50, 'deep_stem': False, 'num_stages': 4, 'out_indices': (0, 1, 2, 3), 'frozen_stages': -1, 'norm_cfg': {'type': 'SyncBN', 'requires_grad': False}, 'style': 'pytorch', 'init_cfg': {'type': 'Pretrained', 'checkpoint': 'torchvision://resnet50'}}, 'ldm': {'type': 'DDIMSampler', 'model': 'configs/ldm_ad/cldm_v15.yaml', 'ldm_pretrain': 'checkpoints/v1-5-pruned.ckpt', 'control_pretrain': 'checkpoints/control_v11p_sd15_scribble.pth'}, 'decode_head': {'type': 'Mask2FormerHead', 'in_channels': [256, 512, 1024, 2048], 'strides': [4, 8, 16, 32], 'feat_channels': 256, 'out_channels': 256, 'num_classes': 19, 'num_queries': 100, 'num_transformer_feat_level': 3, 'align_corners': False, 'pixel_decoder': {'ty

KeyboardInterrupt: 

In [None]:
model.ldm.model = model.ldm.model.cuda()

In [None]:
import random
import numpy as np
import time
from PIL import Image
import cv2
import torch

def plot_mask_on_img(img, mask, idx):
    Image.fromarray(img).save(f'samples/images/{idx}.jpg')
    Image.fromarray(mask).save(f'samples/masks/{idx}.jpg')
    red_mask = np.zeros_like(img)
    red_mask[:, :, :1][mask == 255] = 255  # 设置红色通道为1
    img[:, :, :1][mask == 255] = 0.5 * img[:, :, :1][mask == 255] + 0.5 * red_mask[:, :, :1][mask == 255]
    Image.fromarray(img).save(f'samples/mask_on_image/{idx}.jpg')

with open('ldm/object365.txt', 'r') as f:
    content = f.readlines()
objects = [eval(c)['name'] for c in content]

interval = 100
num_samples = interval
prompts = [['a', 'photo', 'of', 'a'] for _ in range(interval)]
a_prompt = 'best quality'
select_objects = random.choices(objects, k=num_samples)        
p_prompts = [' '.join(s + [ob]) + ', ' + a_prompt for s, ob in zip(prompts, select_objects)]
n_prompt = 'lowres, bad anatomy, bad hands, cropped, worst quality'
        
image_resolution = 512
detect_resolution = 512
ddim_steps = 40
control_start_step = 20
control_end_step = 40
guess_mode = False
self_control = True
strength = 1.4
scale = 9.0
seed = int(time.time()) % 1000000
eta = 1.0


for idx in range(0, num_samples, 4):
    num_s = 4 if idx + 4 < num_samples else num_samples - idx
    cond = {"c_concat": None, "c_crossattn": [model.ldm.model.get_learned_conditioning(p_prompts[idx: idx + num_s])]}
    un_cond = {"c_concat": None, "c_crossattn": [model.ldm.model.get_learned_conditioning([n_prompt] * num_s)]}
    
    H, W = image_resolution, image_resolution
    shape = (4, H // 8, W // 8)
    model.ldm.model.control_scales = [strength * (0.825 ** float(12 - i)) for i in range(13)] if guess_mode else ([strength] * 13)
    imgs, intermediates = model.ldm.sample_create_image_mask(ddim_steps, num_s,
                                                    shape, cond, verbose=False, eta=eta,
                                                    unconditional_guidance_scale=scale,
                                                    unconditional_conditioning=un_cond, 
                                                    control_start_step=control_start_step, 
                                                    control_end_step=control_end_step, 
                                                    self_control=self_control)
    
    
    
    imgs = model.ldm.model.decode_first_stage(imgs)
    B, C, H, W = imgs.shape
    
    imgs = ((imgs.permute(0, 2, 3, 1) + 1) / 2 * 255).cpu().numpy().clip(0, 255).astype(np.uint8)
    masks = (intermediates['pseudo_masks'].squeeze(1).cpu().numpy() * 255).astype(np.uint8)
    contours = intermediates['contours']
    
    for i, (img, mask, contour) in enumerate(zip(imgs, masks, contours)):
        x, y, w, h = cv2.boundingRect(contour)
        new_w, new_h = int(w / max(w, h) * W), int(h / max(w, h) * H)
        extracted_img = cv2.resize(img[y:y+h, x:x+w], (new_w, new_h))
        extracted_mask = cv2.resize(mask[y:y+h, x:x+w], (new_w, new_h))
        plot_mask_on_img(extracted_img, extracted_mask, idx + i)
        dataset.anomalies[idx + i] = \
                            {'image': torch.from_numpy(extracted_img).permute(2, 0, 1), \
                            'mask': torch.from_numpy(extracted_mask)}


DDIM Sampler: 100%|██████████| 20/20 [00:11<00:00,  1.73it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:13<00:00,  1.49it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:09<00:00,  2.10it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:13<00:00,  1.49it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:09<00:00,  2.09it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:13<00:00,  1.48it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:09<00:00,  2.08it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:13<00:00,  1.48it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:09<00:00,  2.09it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:13<00:00,  1.49it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:09<00:00,  2.08it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:13<00:00,  1.48it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:09<00:00,  2.08it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:13<00:00,  1.48it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:09<00:00,  2.07it/s]
DDIM Sampler: 100%|██████████| 20/20 [00:13<00:00,  1.48it/s]
DDIM Sam

In [4]:
dataset[0].keys()

IndexError: too many indices for array: array is 2-dimensional, but 3 were indexed

In [10]:
sample = dataset[2]

In [11]:
cv2.imwrite('test1.jpg', sample['img'])
sample['img'].shape

(1024, 2048, 3)

In [12]:
mask = np.zeros_like(sample['gt_seg_map'])
mask[sample['gt_seg_map'] == 19] = 255
cv2.imwrite('testm1.jpg', mask)

True

In [None]:
np.unique(dataset[4]['gt_seg_map'])

array([  0,   1,   2,   4,   5,   6,   7,   8,   9,  10,  11,  13,  17,
        18, 255], dtype=uint8)

In [4]:
anomaly = dataset[0]['anomalies'][0]
image = anomaly['image']
mask = anomaly['mask']

In [5]:
cv2.imwrite('test1.jpg', image)
cv2.imwrite('testm1.jpg', mask)
image.shape, mask.shape

((512, 347, 3), (512, 347))

In [6]:
image = cv2.resize(image, (128, 256))
mask = cv2.resize(mask, (128, 256))
cv2.imwrite('test2.jpg', image)
cv2.imwrite('testm2.jpg', mask)

True

In [7]:
np.unique(mask)

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  58,  59,  60,  61,  63,  66,  67,  68,
        69,  70,  72,  73,  74,  75,  76,  78,  79,  80,  81,  82,  84,
        85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
        98,  99, 101, 103, 104, 107, 108, 109, 110, 112, 113, 114, 115,
       116, 119, 121, 122, 125, 126, 127, 129, 130, 132, 134, 135, 139,
       140, 141, 142, 143, 144, 145, 147, 148, 149, 150, 151, 152, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
       183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 194, 196, 197,
       198, 199, 200, 201, 202, 203, 204, 205, 208, 209, 210, 21

In [None]:
Image.fromarray(image.permute(1, 2, 0).numpy()).save('test1.jpg')
Image.fromarray(mask.numpy()).save('testm1.jpg')

In [None]:
from torchvision import transforms
Image.fromarray(transforms.Resize([201, 256])(image).permute(1, 2, 0).numpy()).save('test2.jpg')
Image.fromarray(transforms.Resize([201, 256])(mask.unsqueeze(0)).squeeze(0).numpy()).save('testm2.jpg')



In [None]:
transforms.Resize([201, 256])(mask.unsqueeze(0)).squeeze(0).numpy().max()



255

In [None]:
transforms.Resize(100)(dataset[0]['anomalies'][0]['image']).shape



torch.Size([3, 100, 128])

In [None]:
type(dataset.anomalies[0])

dict

In [None]:
import cv2
import random
import numpy as np

# 创建一个空白的512x1024原图
original_image = np.zeros((512, 1024, 3), dtype=np.uint8)

# 从100个物体中随机选择1到3个物体
num_objects = random.randint(1, 3)
object_sizes = []

# 存储已经放置物体的位置和尺寸
placed_objects = []

for _ in range(num_objects):
    # 随机生成物体的大小，大小范围在16x16到256x256之间
    object_size = (random.randint(16, 256), random.randint(16, 256))
    
    # 随机选择一个位置来放置物体，确保不重叠
    x = random.randint(0, original_image.shape[1] - object_size[1])
    y = random.randint(0, original_image.shape[0] - object_size[0])
    
    # 检查新物体是否与已放置物体重叠，如果重叠则重新选择位置
    overlap = False
    for placed_object in placed_objects:
        if (x < placed_object[0] + placed_object[2] and
            x + object_size[1] > placed_object[0] and
            y < placed_object[1] + placed_object[3] and
            y + object_size[0] > placed_object[1]):
            overlap = True
            break
    
    # 如果重叠，重新选择位置
    if overlap:
        continue
    
    # 在原图上绘制物体
    color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
    cv2.rectangle(original_image, (x, y), (x + object_size[1], y + object_size[0]), color, -1)
    
    # 存储已放置物体的位置和尺寸
    placed_objects.append((x, y, object_size[1], object_size[0]))

# 显示原图
cv2.imshow('Original Image', original_image)

In [None]:
import random
random.choices([0, 1, 2, 3], weights=[2, 10, 5, 2], k = 1)

TypeError: choice() got an unexpected keyword argument 'weights'