In [1]:
import random
from PIL import Image, ImageDraw
from pathlib import Path
from tqdm import tqdm
import io
import sqlite3
import base64
import torch

In [3]:
def markup_watermarked_image(image: Image.Image, boxes: bytes):
	torch_boxes = torch.frombuffer(bytearray(boxes), dtype=torch.float16).reshape(-1, 4).tolist()

	draw = ImageDraw.Draw(image)

	x_scale = max(image.size)
	for bbox in torch_boxes:
		bbox = (bbox[0] * x_scale, bbox[1] * x_scale, bbox[2] * x_scale, bbox[3] * x_scale)
		xmin, ymin, xmax, ymax = bbox
		draw.rectangle([xmin, ymin, xmax, ymax], outline="red", width=2)


conn = sqlite3.connect('../data/clip-embeddings.sqlite3')
cursor = conn.cursor()

cursor.execute("SELECT path, watermark, watermark_boxes FROM images WHERE embedding IS NOT NULL AND watermark IS NOT NULL")
all_paths = [row for row in cursor.fetchall()]
random.shuffle(all_paths)

watermarked = [(path, watermark_boxes) for path, watermark, watermark_boxes in all_paths if watermark]
non_watermarked = [(path, watermark_boxes) for path, watermark, watermark_boxes in all_paths if not watermark]


def write_images(paths: list[tuple[str, bytes]]):
	html = "<html><<body>"

	for path, boxes in tqdm(paths):
		image = Image.open(path)
		scale = 512 / max(image.size)
		image = image.resize([round(x * scale) for x in image.size])
		image = image.convert("RGB")

		if boxes is not None and len(boxes) > 0:
			markup_watermarked_image(image, boxes)

		with io.BytesIO() as output:
			image.save(output, format="webp")
			data = base64.b64encode(output.getvalue()).decode()
		
		html += f'<img src="data:image/jpeg;base64,{data}" alt="{path}">'

	html += "</body></html>"

	return html


with open("watermarked.html", "w") as f:
	f.write(write_images(watermarked[:1024]))

with open("non_watermarked.html", "w") as f:
	f.write(write_images(non_watermarked[:1024]))

100%|██████████| 1024/1024 [01:05<00:00, 15.58it/s]
100%|██████████| 1024/1024 [01:19<00:00, 12.88it/s]
