In [None]:
import sys
sys.path.append("..")

import io
import os
import random
import math
import time
import json
import shutil
from io import BytesIO
from pathlib import Path
from collections import OrderedDict
from typing import Optional, Callable, List, Tuple, Iterable, Generator, Union

import PIL.Image
import PIL.ImageDraw
import plotly
import plotly.express as px
plotly.io.templates.default = "plotly_dark"

from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset, IterableDataset
import torchvision.transforms as VT
import torchvision.transforms.functional as VF
from torchvision.utils import make_grid
from IPython.display import display

from src.datasets import *
from src.util.image import *
from src.util import *
from src.algo import *
from src.models.decoder import *
from src.models.transform import *
from src.models.util import *
from experiments import datasets
from experiments.denoise.resconv import ResConv

def resize(img, scale: float, mode: VF.InterpolationMode = VF.InterpolationMode.NEAREST):
    if isinstance(img, PIL.Image.Image):
        shape = (img.height, img.width)
    else:
        shape = img.shape[-2:]
    return VF.resize(img, [max(1, int(s * scale)) for s in shape], mode, antialias=False)

In [None]:
import yaml
import ipywidgets
from src.clipig.clipig_task import ClipigTask

In [None]:
class ImageWidget(ipywidgets.Image):

    def set_pil(self, image: PIL.Image.Image):
        fp = io.BytesIO()
        image.save(fp, "png")
        fp.seek(0)
        self.format = "png"
        self.value = fp.read()

    def set_tensor(self, image: torch.Tensor):
        image = VF.to_pil_image(image)
        self.set_pil(image)
        
image_widget = ImageWidget()
display(image_widget)

image_widget.set_pil(PIL.Image.open("/home/bergi/Pictures/bob/bob-trans-back.png"))
#image.value = 

In [None]:
print(Path("../src/clipig/presets/fractal-224.yaml").read_text())

In [None]:
config_1 = """
clip_model_name: ViT-B/32
device: auto
initialize: random
num_iterations: 10000
source_model:
  name: pixels
  params:
    channels: RGB
    size:
    - 224
    - 224
targets:
- batch_size: 5
  optimizer:
    betas:
    - 0.9
    - 0.999
    learnrate: 0.02
    optimizer: Adam
    weight_decay: 1.0e-06
  target_features:
  - text: cthulhu's cave in the city of r'lyeh
    weight: 1.0
  #- text: fractal patterns
  #  weight: 0.2
  - text: words, letters
    weight: -1.
  transformations:
  - name: repeat
    params:
      active: true
      repeat_xy:
      - 2
      - 2
  - name: random_affine
    params:
      active: true
      degrees_min_max:
      - -10.6
      - 10.0
      interpolation: bilinear
      scale_min_max:
      - 0.9
      - 1.1
      shear_min_max:
      - -15.0
      - 15.0
      translate_xy:
      - 0.01
      - 0.01
  - name: random_crop
    params:
      active: true
      size: 224
  - name: multiplication
    params:
      active: false
      add: 0.3
      multiply: 0.5
  - name: blur
    params:
      active: false
      kernel_size:
      - 3
      - 3
      mix: 0.7
      sigma:
      - 1.0
      - 1.0
"""

In [None]:
config_2 = """
clip_model_name: ViT-B/32
device: auto
initialize: random
num_iterations: 10000
source_model:
  name: pixels
  params:
    channels: RGB
    size:
    - 224
    - 224
targets:
- batch_size: 5
  optimizer:
    betas:
    - 0.9
    - 0.999
    learnrate: 0.02
    optimizer: RAdam
    weight_decay: 1.0e-06
  target_features:
  - text: unicorns flying through the clouds
    weight: 1.0
  #- text: fractal patterns
  #  weight: 0.2
  - text: words, letters
    weight: -1.
  transformations:
  - name: repeat
    params:
      active: true
      repeat_xy:
      - 2
      - 2
  - name: random_affine
    params:
      active: true
      degrees_min_max:
      - -10.6
      - 10.0
      interpolation: bilinear
      scale_min_max:
      - 0.9
      - 1.1
      shear_min_max:
      - -15.0
      - 15.0
      translate_xy:
      - 0.01
      - 0.01
  - name: random_crop
    params:
      active: true
      size: 224
  - name: multiplication
    params:
      active: true
      add: 0.
      multiply: 0.2
  - name: blur
    params:
      active: false
      kernel_size:
      - 3
      - 3
      mix: 0.7
      sigma:
      - 1.0
      - 1.0
"""

In [None]:
config_3 = """
clip_model_name: ViT-B/32
device: auto
initialize: random
num_iterations: 10000
source_model:
  name: pixels
  params:
    channels: RGB
    size:
    - 224
    - 224
targets:
- batch_size: 1
  optimizer:
    betas:
    - 0.9
    - 0.999
    learnrate: 0.02
    optimizer: Adam
    weight_decay: 1.0e-06
  target_features:
  - image: ''
    text: fisheye view of a cthulhu fractal
    type: text
    weight: 1.0
  - image: ''
    text: words, letters
    type: text
    weight: -1.0
  transformations:
  - name: padding
    params:
      active: true
      pad_left: 100
      pad_right: 100
      pad_top: 50
      pad_bottom: 150
      padding_mode: symmetric
  - name: random_affine
    params:
      active: true
      degrees_min_max:
      - -5.6
      - 5.0
      interpolation: bilinear
      scale_min_max:
      - 0.9
      - 1.1
      shear_min_max:
      - -15.0
      - 15.0
      translate_xy:
      - 0.01
      - 0.01
  - name: random_crop
    params:
      active: true
      pad_if_needed: true
      padding_mode: constant
      size: 224
  - name: multiplication
    params:
      active: true
      add: 0.5
      multiply: 0.1
  - name: blur
    params:
      active: true
      kernel_size:
      - 3
      - 3
      mix: 0.7
      sigma:
      - 1.0
      - 1.0
"""

In [None]:
from src.clipig.transformations.value_trans import Denoising
denoiser = Denoising(
    model="denoise-mid-64x64-150k",
    mix=.5,
    overlap=(7, 7),
)
denoiser.model.cuda()

In [None]:
def perspective_transform(
        image: torch.Tensor, 
        top: float = 1.,
        left: float = 1.,
        bottom: float = 1.,
        right: float = 1.,
):
    h, w = image.shape[-2:]
    top = max(-w // 2 + 1, (top - 1.) * w / 2)
    bottom = max(-w // 2 + 1, (bottom - 1.) * w / 2)
    left = max(-h // 2 + 1, (left - 1.) * h / 2)
    right = max(-h // 2 + 1, (right - 1.) * h / 2)
    return VF.perspective(
        image,
        [[0, 0], [w, 0], [w, h], [0, h]],
        [[-top, -left], [w + top, -right], [w + bottom, h + right], [-bottom, h + left]],
        interpolation=VF.InterpolationMode.BILINEAR,
        
    )

#VF.to_pil_image(perspective_transform(
#    image,
    #top=2,
    #bottom=.5,
#    left=1.5,
#    right=1.1,
#))

In [None]:
def run_config(
        config: str,
        length_seconds: float = 60,
        fps: int = 30,
        frame_stride: int = 10,
        store_directory: Optional[Union[str, Path]] = None,
        reset: bool = False,
        dummy: bool = False,
):
    num_iterations = int(length_seconds * fps * frame_stride)
    
    fp = io.StringIO(config)
    config = yaml.safe_load(fp)
    config["num_iterations"] = num_iterations
    config["pixel_yield_delay_sec"] = 0.
    
    image_widget = ImageWidget()
    status_widget = ipywidgets.Text()
    display(image_widget)
    display(status_widget)

    image_idx = -1
    frame_idx = -1
    second = 0

    if store_directory:
        store_directory = Path(store_directory)
        if store_directory.exists():
            if reset:
                shutil.rmtree(store_directory)
            else:
                filenames = sorted(store_directory.glob("*.png"))
                if filenames:
                    frame_idx = len(filenames) - 1
                    image_idx = frame_idx * frame_stride
                    config["initialize"] = "input"
                    config["input_image"] = VF.to_tensor(PIL.Image.open(str(filenames[-1])))

        os.makedirs(store_directory, exist_ok=True)

    if dummy:
        config["dummy_mode"] = True
    task = ClipigTask(config)    
    status = "requested"
    
    try:
        with tqdm(total=num_iterations) as progress:
            for event in task.run():
                if "status" in event:
                    status = event["status"]
        
                if "pixels" in event:
                    image_idx += 1
                    progress.update(1)
                    if image_idx % frame_stride == 0:
                        frame_idx += 1
                        second = frame_idx / fps
                        
                        pixels = event["pixels"].clamp(0, 1)
                        with torch.no_grad():
                            pixels_denoised = (denoiser(pixels) + 0.004).clamp(0, 1)
                    
                        pixels_pil = VF.to_pil_image(pixels_denoised)
                        if store_directory:
                            pixels_pil.save(str(store_directory / f"frame-{frame_idx:08}.png"))
                            
                        image_widget.set_pil(resize(pixels_pil, 2))

                        f = 0.005
                        s = math.sin(second / 3.)
                        pixels = perspective_transform(
                            pixels,
                            top=1 - f/5,
                            bottom=1 + f,
                        )
                        pixels = VF.affine(
                            pixels, 
                            angle=-s / 5, 
                            translate=[0., 0.],
                            scale=1 + f/4, 
                            shear=[0., 0.],
                            #shear=[0.1*math.sin(second/1.3), 0.1*math.sin(second/1.7)],
                            interpolation=VF.InterpolationMode.BILINEAR,
                            center=[pixels.shape[-1] * (.5 + .4*s), pixels.shape[-2] * .5],
                        )
                        
                        task.source_model.set_image(pixels.clamp(0, 1))               
    
                status_widget.value = (
                    f"status: {status}"
                    f", second={second:.2f}"
                    f", image_idx={image_idx}, frame_idx={frame_idx}"
                    
                )
                
    except KeyboardInterrupt:
        print("stopped")
        pass

run_config(
    config_3,
    store_directory="./clipig-frames/cthulhu-fractal",
    reset=True,
    frame_stride=30,
    #dummy=True, frame_stride=1,
)

In [None]:
image = VF.to_tensor(PIL.Image.open("/home/bergi/Pictures/bob/9872432.jpeg"))
VF.to_pil_image(image)

In [None]:
f = 10
h, w = image.shape[-2:]
VF.to_pil_image(VF.perspective(
    image,
    [[0, 0], [w, 0], [w, h], [0, h]],
    [[0, 0], [w, 0], [w+f, h], [0-f, h]],
    #[[1, 0], [1, 1], [0, 1], [0, 0]],
    #[[1.001, 0.001], [1, 1], [0, 1], [0, 0]],
    #[[1, -0.1], [1, 1.1], [0, 1], [0, 0]],
    interpolation=VF.InterpolationMode.BILINEAR,
))

In [None]:
VF.perspective?