In [None]:
from init_notebook import *

In [None]:
PATH = Path("~/prog/data/pixilart").expanduser()
os.makedirs(PATH, exist_ok=True)

In [None]:
session = requests.Session()
session.headers = {
    "user-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/113.0"
}

In [None]:
def scrape_page(index: int):
    url = f"https://www.pixilart.com/api/w/gallery/{index}/0/highlighted?user=true&liked=true"
    response = session.get(url)
    if response.status_code != 200:
        raise RuntimeError(response.text)

    for art in response.json()["art"]:
        filename = PATH / f'{art["unqid"]}.json'
        filename.write_text(json.dumps(art, indent=2))
        
        image_url = art["full_image_url"]
        filename = PATH / image_url.rsplit("/", 1)[-1]
        if not filename.exists():        
            print(image_url)
            response = session.get(image_url)
            filename.write_bytes(response.content)
            time.sleep(1)
        
# yeah, well, it does not work, thanks to cloudflare i guess         
#scrape_page(0)

# instead i just recorded a har file in the browser

In [None]:
with (PATH / "pixilart-com-2024-01-25.har").open() as f:
    har_data = json.load(f)

In [None]:
def iter_images():
    for e in tqdm(har_data["log"]["entries"]):
        if e.get("request") and e["request"]["url"].endswith(".png"):
            if e.get("response") and e["response"].get("content"):
                content = e["response"]["content"]
                #print(content["mimeType"])
                if content["mimeType"] in ("image/png", "image/webp"):
                    if content.get("encoding") == "base64":
                        data = base64.b64decode(content["text"].encode("ascii"))
                        try:
                            image = PIL.Image.open(io.BytesIO(data))
                        except Exception as ex:
                            print("FAILED", e["request"]["url"], ex)
                            continue

                        yield image, e["request"]["url"].rsplit("/", 1)[-1]
                        

for i, (image, filename) in zip(range(10), iter_images()):
    print(filename)
    display(image)
    

## resize images to smallest scale

wow, this seems to be a harder problem... it's not really working good, so i keep the 400px previews as they are

In [None]:
import torchvision.transforms.functional as VF
import torch
import math

def resize(img, scale: float, mode: VF.InterpolationMode = VF.InterpolationMode.NEAREST):
    if isinstance(img, PIL.Image.Image):
        shape = (img.height, img.width)
    else:
        shape = img.shape[-2:]
    return VF.resize(img, [max(1, int(s * scale)) for s in shape], mode, antialias=False)

def iter_small_images():
    for image, filename in iter_images():
        image = VF.to_tensor(image.convert("RGB"))
        # display(VF.to_pil_image(resize(image, 1)))
    
        smallest_image = image
        smallest_error = None
        
        for ofs in range(2, 9):
            small = VF.resize(image, [s // ofs for s in image.shape[-2:]], VF.InterpolationMode.NEAREST, antialias=False)
            upscaled = VF.resize(small, image.shape[-2:], VF.InterpolationMode.BILINEAR, antialias=False)
            error = (image != upscaled).float().mean()
            
            # print("X", ofs, error)
            if error < 0.5:
                if smallest_error is None or error <= smallest_error or error < .4:
                    smallest_error = error
                    smallest_image = small
                    # print("smallest", ofs, smallest_error)
        #print(smallest_error)
        #display(VF.to_pil_image(smallest_image))
        yield smallest_image, filename, smallest_error


for i, (image, filename, e) in zip(range(10), iter_small_images()):
    display(VF.to_pil_image(image))

# store

In [None]:
os.makedirs(PATH / "raw", exist_ok=True)

for image, filename in iter_images():
    image.convert("RGB").save(PATH / "raw" / filename)    

## patch dataset

In [None]:
class PixilartPatchDataset(BaseIterableDataset):
    def __init__(
        self,
        shape: Tuple[int, int, int] = (3, 64, 64),
        interpolation: VT.InterpolationMode = VT.InterpolationMode.BILINEAR,
        interleave_images: Optional[int] = 20,
        shuffle_images: bool = True,
    ):
        self._ds_image = ImageFolderIterableDataset(
            Path("~/prog/data/pixilart/raw").expanduser(),
            shuffle=shuffle_images,
        )
        
        self._ds = InterleaveIterableDataset(( 
            RandomImagePatchIterableDataset(
                self._ds_image.scale(min(shape[2:])/400, interpolation=interpolation), shape,
                patches_per_image_factor=1.,
                interleave_images=interleave_images,
            ),
            RandomImagePatchIterableDataset(
                self._ds_image.scale(.25, interpolation=interpolation), shape,
                patches_per_image_factor=2.,
                interleave_images=interleave_images,
            ),
            RandomImagePatchIterableDataset(
                self._ds_image.scale(.5, interpolation=interpolation), shape,
                patches_per_image_factor=3.,
                interleave_images=interleave_images,
            ),
            RandomImagePatchIterableDataset(
                self._ds_image, shape,
                interleave_images=interleave_images,
            ),
        ))

    def __iter__(self):
        yield from self._ds

ds = PixilartPatchDataset(shape=(3, 64, 64), interleave_images=20)#.shuffle(10_000)

VF.to_pil_image(make_grid(ds.sample(14*14), nrow=14))
#VF.to_pil_image(resize(make_grid(ds.sample(64), nrow=4), 3))


In [None]:
for _ in tqdm(ds):
    pass

In [None]:
class RandomImagePatchIterableDataset(ImagePatchIterableDataset):
    def __init__(
            self,
            dataset: Union[Dataset, IterableDataset, Iterable[torch.Tensor], Iterable[Tuple[torch.Tensor, ...]]],
            shape: Union[int, Iterable[int]],
            patches_per_image_factor: float = 1.,
            interleave_images: Optional[int] = None,
            with_pos: bool = False,
    ):
        """
        Yields patches of each source image

        :param dataset: source dataset
        :param shape: one or two ints defining the output shape
        :param interleave_images: optional int,
            number of source images to create patches from at the same time
        :param with_pos: bool, insert the patch rectangle position as second output argument
        """
        self.dataset = dataset
        self.shape = (shape, shape) if isinstance(shape, int) else tuple(shape)
        self.interleave_images = interleave_images
        self.with_pos = bool(with_pos)
        self.patches_per_image_factor = patches_per_image_factor
        self.max_size = None
        
    def _iter_image_patches(self, image: torch.Tensor):
        if min(image.shape[-2:]) < min(self.shape):
            return
        
        ps = self.shape
        size = image.shape[-2:]
        count = (size[-2] // ps[-2]) * (size[-1] // ps[-1])
        count = int(count * self.patches_per_image_factor)
        
        for i in range(count):
            pos = (
                random.randrange(0, max(1, size[-2] - ps[-2])),
                random.randrange(0, max(1, size[-1] - ps[-1]))
            )
    
            patch = image[:, pos[-2]: pos[-2] + ps[-2], pos[-1]: pos[-1] + ps[-1]]
            if self.with_pos:
                yield patch, pos
            else:
                yield patch
            

VF.to_pil_image(make_grid(ds.sample(14*14), nrow=14))


In [None]:
ds = ds_image.scale(.125)
for i in ds:
    display(VF.to_pil_image(i))
