In [None]:
from init_notebook import *

In [None]:
import yaml
import ipywidgets
from src.clipig.clipig_task import ClipigTask

In [None]:
print(Path("../src/clipig/presets/fractal-224.yaml").read_text())

In [None]:
config_1 = """
clip_model_name: ViT-B/32
device: auto
initialize: random
num_iterations: 10000
source_model:
  name: pixels
  params:
    channels: RGB
    size:
    - 224
    - 224
targets:
- batch_size: 5
  optimizer:
    betas:
    - 0.9
    - 0.999
    learnrate: 0.02
    optimizer: Adam
    weight_decay: 1.0e-06
  target_features:
  - text: cthulhu's cave in the city of r'lyeh
    weight: 1.0
  #- text: fractal patterns
  #  weight: 0.2
  - text: words, letters
    weight: -1.
  transformations:
  - name: repeat
    params:
      active: true
      repeat_xy:
      - 2
      - 2
  - name: random_affine
    params:
      active: true
      degrees_min_max:
      - -10.6
      - 10.0
      interpolation: bilinear
      scale_min_max:
      - 0.9
      - 1.1
      shear_min_max:
      - -15.0
      - 15.0
      translate_xy:
      - 0.01
      - 0.01
  - name: random_crop
    params:
      active: true
      size: 224
  - name: multiplication
    params:
      active: false
      add: 0.3
      multiply: 0.5
  - name: blur
    params:
      active: false
      kernel_size:
      - 3
      - 3
      mix: 0.7
      sigma:
      - 1.0
      - 1.0
"""

In [None]:
config_2 = """
clip_model_name: ViT-B/32
device: auto
initialize: random
num_iterations: 10000
source_model:
  name: pixels
  params:
    channels: RGB
    size:
    - 224
    - 224
targets:
- batch_size: 5
  optimizer:
    betas:
    - 0.9
    - 0.999
    learnrate: 0.02
    optimizer: RAdam
    weight_decay: 1.0e-06
  target_features:
  - text: unicorns flying through the clouds
    weight: 1.0
  #- text: fractal patterns
  #  weight: 0.2
  - text: words, letters
    weight: -1.
  transformations:
  - name: repeat
    params:
      active: true
      repeat_xy:
      - 2
      - 2
  - name: random_affine
    params:
      active: true
      degrees_min_max:
      - -10.6
      - 10.0
      interpolation: bilinear
      scale_min_max:
      - 0.9
      - 1.1
      shear_min_max:
      - -15.0
      - 15.0
      translate_xy:
      - 0.01
      - 0.01
  - name: random_crop
    params:
      active: true
      size: 224
  - name: multiplication
    params:
      active: true
      add: 0.
      multiply: 0.2
  - name: blur
    params:
      active: false
      kernel_size:
      - 3
      - 3
      mix: 0.7
      sigma:
      - 1.0
      - 1.0
"""

In [None]:
#1920/1080
225 * 16/9

In [None]:
config_3 = """
clip_model_name: ViT-B/32
device: auto
initialize: random
num_iterations: 10000
source_model:
  name: pixels
  params:
    channels: RGB
    size:
    - 400
    - 225
targets:
- batch_size: 1
  optimizer:
    betas:
    - 0.9
    - 0.999
    learnrate: 0.02
    optimizer: Adam
    weight_decay: 1.0e-06
  target_features:
  - image: ''
    #text: norwegian landscape, huge flowers in the foreground
    text: expressions of anger
    type: text
    weight: 1.0
  - image: ''
    text: fires and explosions  
    type: text
    weight: 0.5
  - image: ''
    text: words, letters
    type: text
    weight: -0.5
  - image: ''
    text: people 
    type: text
    weight: -0.5
  - image: ''
    text: repetitive 
    type: text
    weight: -0.5
  transformations:
  - name: padding
    params:
      active: true
      pad_left: 100
      pad_right: 100
      pad_top: 50
      pad_bottom: 150
      padding_mode: symmetric
  - name: random_affine
    params:
      active: true
      degrees_min_max:
      - -5.6
      - 5.0
      interpolation: bilinear
      scale_min_max:
      - 0.9
      - 1.1
      shear_min_max:
      - -15.0
      - 15.0
      translate_xy:
      - 0.01
      - 0.01
  - name: random_crop
    params:
      active: true
      pad_if_needed: true
      padding_mode: constant
      size: 224
  - name: multiplication
    params:
      active: true
      add: 0.5
      multiply: 0.1
  - name: blur
    params:
      active: true
      kernel_size:
      - 3
      - 3
      mix: 0.7
      sigma:
      - 1.0
      - 1.0
"""

In [None]:
from src.clipig.transformations.value_trans import Denoising
denoiser = Denoising(
    #model="denoise-mid-64x64-150k",
    #model="degradient-mid-64x64-150k",
    #model="declip-1",
    model="denoise-heavy-2",
    mix=.5,
    overlap=(7, 7),
)
denoiser.model.cuda()

In [None]:
def perspective_transform(
        image: torch.Tensor, 
        top: float = 1.,
        left: float = 1.,
        bottom: float = 1.,
        right: float = 1.,
):
    h, w = image.shape[-2:]
    top = max(-w // 2 + 1, (top - 1.) * w / 2)
    bottom = max(-w // 2 + 1, (bottom - 1.) * w / 2)
    left = max(-h // 2 + 1, (left - 1.) * h / 2)
    right = max(-h // 2 + 1, (right - 1.) * h / 2)
    return VF.perspective(
        image,
        [[0, 0], [w, 0], [w, h], [0, h]],
        [[-top, -left], [w + top, -right], [w + bottom, h + right], [-bottom, h + left]],
        interpolation=VF.InterpolationMode.BILINEAR,
        
    )

#VF.to_pil_image(perspective_transform(
#    image,
    #top=2,
    #bottom=.5,
#    left=1.5,
#    right=1.1,
#))

In [None]:
def run_config_video(
        config: str,
        length_seconds: float = 120,
        fps: int = 30,
        frame_stride: int = 10,
        store_directory: Optional[Union[str, Path]] = None,
        reset: bool = False,
        dummy: bool = False,
):
    num_iterations = int(length_seconds * fps * frame_stride)
    
    fp = io.StringIO(config)
    config = yaml.safe_load(fp)
    config["num_iterations"] = num_iterations
    config["pixel_yield_delay_sec"] = 0.
    
    image_widget = ImageWidget()
    status_widget = ipywidgets.Text()
    display(image_widget)
    display(status_widget)

    image_idx = -1
    frame_idx = -1
    second = 0

    if store_directory:
        store_directory = Path(store_directory)
        if store_directory.exists():
            if reset:
                shutil.rmtree(store_directory)
            else:
                filenames = sorted(store_directory.glob("*.png"))
                if filenames:
                    frame_idx = len(filenames) - 1
                    image_idx = frame_idx * frame_stride
                    config["initialize"] = "input"
                    config["input_image"] = VF.to_tensor(PIL.Image.open(str(filenames[-1])))

        os.makedirs(store_directory, exist_ok=True)

    if dummy:
        config["dummy_mode"] = True
    task = ClipigTask(config)    
    status = "requested"
    
    try:
        with tqdm(total=num_iterations) as progress:
            for event in task.run():
                if "status" in event:
                    status = event["status"]
        
                if "pixels" in event:
                    image_idx += 1
                    progress.update(1)
                    if image_idx % frame_stride == 0:
                        frame_idx += 1
                        second = frame_idx / fps
                        
                        pixels = event["pixels"].clamp(0, 1)
                        with torch.no_grad():
                            pixels_denoised = (denoiser(pixels) + 0.004).clamp(0, 1)

                        pixels = pixels + .7 * (pixels_denoised - pixels)
                    
                        pixels_pil = VF.to_pil_image(pixels_denoised)
                        if store_directory:
                            pixels_pil.save(str(store_directory / f"frame-{frame_idx:08}.png"))
                            
                        image_widget.set_pil(resize(pixels_pil, 2))

                        f = 0.01
                        s = math.sin(second / 4.7)
                        s2 = math.sin(second / 3.)
                        pixels = perspective_transform(
                            pixels,
                            top=1 - f/5,
                            bottom=1 + f * (2. + 8. * abs(s)),
                            left=1 + f * s,
                            right=1 + f * s2,
                        )
                        pixels = VF.affine(
                            pixels, 
                            angle=-s / 4., 
                            translate=[0.0, 0.0],
                            scale=1 + .5 * f * (.5 + s2 - s), 
                            shear=[-s2, s],
                            #shear=[0.1*math.sin(second/1.3), 0.1*math.sin(second/1.7)],
                            interpolation=VF.InterpolationMode.BILINEAR,
                            #center=[pixels.shape[-1] * (.5 + .4*s), pixels.shape[-2] * .5],
                        )
                        
                        task.source_model.set_image(pixels.clamp(0, 1))               
    
                status_widget.value = (
                    f"status: {status}"
                    f", second={second:.2f}"
                    f", image_idx={image_idx}, frame_idx={frame_idx}"
                    
                )
                
    except KeyboardInterrupt:
        print("stopped")
        pass

run_config_video(
    config_3,
    store_directory="./clipig-frames/movement",
    #reset=True,
    frame_stride=60,
    #dummy=True, frame_stride=1,
)

In [None]:
image = VF.to_tensor(PIL.Image.open("/home/bergi/Pictures/bob/9872432.jpeg"))
VF.to_pil_image(image)

In [None]:
f = 10
h, w = image.shape[-2:]
VF.to_pil_image(VF.perspective(
    image,
    [[0, 0], [w, 0], [w, h], [0, h]],
    [[0, 0], [w, 0], [w+f, h], [0-f, h]],
    #[[1, 0], [1, 1], [0, 1], [0, 0]],
    #[[1.001, 0.001], [1, 1], [0, 1], [0, 0]],
    #[[1, -0.1], [1, 1.1], [0, 1], [0, 0]],
    interpolation=VF.InterpolationMode.BILINEAR,
))

In [None]:
from src.clipig.parameters import get_complete_clipig_task_config
from copy import deepcopy


In [None]:
class ClipigVideoRenderer:

    def __init__(
            self,
            config: Union[dict, str, Path],
            fps: int = 30,
            video_frame_stride: int = 1,
            transformation_frame_stride: Optional[int] = None,
            store_directory: Optional[Union[str, Path]] = None,
            display_jupyter: bool = False,
    ):
        if not isinstance(config, dict):
            fp = io.StringIO(config)
            config = yaml.safe_load(fp)
            
        self.config = get_complete_clipig_task_config(config)
        self.store_directory = Path(store_directory) if store_directory is not None else None
        self.fps = fps
        self.video_frame_stride = video_frame_stride
        self.transformation_frame_stride = video_frame_stride if transformation_frame_stride is None else transformation_frame_stride
        self.display_jupyter = display_jupyter

        self.clipig_frame = 0
        self.video_frame = 0
        self.task: Optional[ClipigTask] = None

        if self.display_jupyter:
            from IPython.display import display
            import ipywidgets
            from src.util.widgets import ImageWidget
            self._image_widget = ImageWidget()
            self._status_widget = ipywidgets.Text()
            display(self._image_widget)
            display(self._status_widget)

    @property
    def second(self) -> float:
        return self.clipig_frame / self.video_frame_stride / self.fps

    def transform(self, pixels: torch.Tensor, delta: float) -> torch.Tensor:
        return pixels

    def post_process(self, pixels: torch.Tensor, delta: float) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
        """
        post-processing step before video frame writing.

        If returning a tuple, the second argument is put back into clipig
        """
        return pixels

    def run(
            self,
            seconds: float,
            reset: bool = False,
    ):
        num_iterations = seconds * self.video_frame_stride * self.fps

        config = deepcopy(self.config)
        config["num_iterations"] = num_iterations
        config["pixel_yield_delay_sec"] = 0.

        image_idx = 0
        frame_idx = 0

        if self.store_directory is not None:
            if self.store_directory.exists():
                if reset:
                    shutil.rmtree(self.store_directory)
                else:
                    filenames = sorted(self.store_directory.glob("*.png"))
                    if filenames:
                        frame_idx = len(filenames)
                        image_idx = frame_idx * self.video_frame_stride
                        config["initialize"] = "input"
                        config["input_image"] = VF.to_tensor(PIL.Image.open(str(filenames[-1])))

            os.makedirs(self.store_directory, exist_ok=True)

        self.video_frame = frame_idx
        self.clipig_frame = image_idx
        self.task = ClipigTask(config)
        status = "requested"

        last_video_frame = self.clipig_frame
        last_transformation_frame = self.clipig_frame
        try:
            with tqdm(total=num_iterations) as progress:
                for event in self.task.run():
                    if "status" in event:
                        status = event["status"]

                    if "pixels" in event:
                        progress.update(1)
                        clipig_frame = self.clipig_frame + 1
                        pixels = event["pixels"].clamp(0, 1)

                        if clipig_frame - last_transformation_frame >= self.transformation_frame_stride:
                            delta = (clipig_frame - last_transformation_frame) / self.video_frame_stride / self.fps
                            last_transformation_frame = clipig_frame

                            with torch.no_grad():
                                pixels = self.transform(pixels, delta).clamp(0, 1)
                                self.task.source_model.set_image(pixels)

                        if clipig_frame - last_video_frame >= self.video_frame_stride:
                            delta = (clipig_frame - last_video_frame) / self.video_frame_stride / self.fps
                            last_video_frame = clipig_frame
                            with torch.no_grad():
                                pixels = self.post_process(pixels, delta)
                                if isinstance(pixels, (list, tuple)):
                                    pixels, source_pixels = pixels
                                    self.task.source_model.set_image(source_pixels.clamp(0, 1))
                                pixels = pixels.clamp(0, 1)

                            if self.store_directory is not None or self.display_jupyter:
                                pixels_pil = VF.to_pil_image(pixels)
                                if self.store_directory is not None:
                                    pixels_pil.save(self.store_directory / f"frame-{self.video_frame:08}.png")

                                if self.display_jupyter:
                                    self._image_widget.set_pil(image_minimum_size(pixels_pil, width=500))

                            self.video_frame += 1
    
                        self.clipig_frame += 1

                    if self.display_jupyter:
                        self._status_widget.value = (
                            f"status: {status}"
                            f", second={self.second:.2f}"
                            f", video_frame={self.video_frame}, clipg_frame={self.clipig_frame}"

                        )

        except KeyboardInterrupt:
            print("stopped")
            pass


class MyRenderer(ClipigVideoRenderer):

    def post_process(self, pixels: torch.Tensor, delta: float):
        #pixels = denoiser(pixels)
        pixels = denoiser.model(pixels.unsqueeze(0))[0]
        return pixels
    
    def transform(self, pixels: torch.Tensor, delta: float):
        s = math.sin(self.second)
        s2 = math.sin(self.second * 1.3)
        pixels = perspective_transform(
            pixels, 
            top=1. + (1-s2)*delta, 
            bottom=1. + (1+s2)*delta, 
            left=1. + (1+s)*delta,
            right=1. + (1-s)*delta,
        )
        return pixels
        

renderer = MyRenderer(
    config_3,
    video_frame_stride=20,
    transformation_frame_stride=1,
    display_jupyter=True,
    store_directory="./clipig-frames/denoise",
)
renderer.run(100)

In [None]:
config_4 = """
clip_model_name: ViT-B/32
device: auto
initialize: random
num_iterations: 10000
source_model:
  name: pixels
  params:
    channels: RGB
    size:
    - 400
    - 225
targets:
- batch_size: 1
  optimizer:
    betas:
    - 0.9
    - 0.999
    learnrate: 0.02
    optimizer: RAdam
    weight_decay: 1.0e-06
  target_features:
  - image: ''
    #text: norwegian landscape, huge flowers in the foreground
    text: desolated streets
    type: text
    weight: 1.0
  - image: ''
    text: fires and explosions  
    type: text
    weight: 0.0
  - image: ''
    text: words, letters
    type: text
    weight: -1.0
  - image: ''
    text: people 
    type: text
    weight: -0.0
  - image: ''
    text: repetitive 
    type: text
    weight: -0.0
  transformations:
  - name: padding
    params:
      active: true
      pad_left: 100
      pad_right: 100
      pad_top: 100
      pad_bottom: 100
      padding_mode: symmetric
  - name: random_affine
    params:
      active: true
      degrees_min_max:
      - -5.6
      - 5.0
      interpolation: bilinear
      scale_min_max:
      - 0.9
      - 1.1
      shear_min_max:
      - -15.0
      - 15.0
      translate_xy:
      - 0.01
      - 0.01
  - name: random_crop
    params:
      active: true
      pad_if_needed: true
      padding_mode: constant
      size: 224
  - name: multiplication
    params:
      active: true
      add: 0.5
      multiply: 0.1
  - name: blur
    params:
      active: true
      kernel_size:
      - 3
      - 3
      mix: 0.7
      sigma:
      - 1.0
      - 1.0
"""

In [None]:
class MyRenderer(ClipigVideoRenderer):

    def post_process(self, pixels: torch.Tensor, delta: float):
        #pixels = denoiser(pixels)
        pixels = denoiser.model(pixels.unsqueeze(0))[0]
        return pixels  , pixels
    
    def transform(self, pixels: torch.Tensor, delta: float):
        #pixels = VF.adjust_hue(pixels, delta * 10.)
        #pixels = perspective_transform(pixels, top=1., bottom=1.+delta/10.)
        if 0:
            pixels = VF.affine(
                pixels, 
                angle=0., 
                translate=[0, 0], 
                scale=1,#+delta/6., 
                shear=[0, 0],
            )
        #space = Space2d([2, *pixels.shape[-2:]]).space()
        #space[0] = -delta * torch.sin((space[0]+self.second) * 3.1415/2.)
        #space[1] = -delta * torch.cos(space[1] * 3.1415/2.)                                      
        #space = space.permute(1,2, 0).unsqueeze(0)
        if not hasattr(self, "_space"):
            space = numpy_perlin_noise_2d((400*2, 400), (10, 5))
            space = torch.Tensor(space).cuda().view(2, 400, 400)[:, :225, :].permute(1, 2, 0).unsqueeze(0)
            self._space = space
        pixels += .6 * (VF.elastic_transform(pixels, delta/5. * self._space) - pixels)
        #pixels = pixels[:, 1:-1, 1:-1]
        #pixels = F.pad(pixels, [1, 1, 1, 1], mode="circular")
        return pixels        

renderer = MyRenderer(
    config_4,
    video_frame_stride=50,
    transformation_frame_stride=10,
    display_jupyter=True,
    store_directory="./clipig-frames/subgenius",
)
renderer.run(
    100,
    #reset=True,
)

In [None]:
from src.models.

In [None]:
conv = nn.Conv2d(1, 5, 3)
conv.weight.shape
conv_weights = torch.randn(4, *conv.weight.shape)
bs = 2
feature = torch.Tensor(bs, 4)
conv.weight.shape

In [None]:
cur_w = conv_weights.unsqueeze(0).expand(bs, -1, -1, -1, -1, -1) * feature[:, :, None, None, None, None]
cur_w = cur_w.sum(1)
cur_w.shape

In [None]:
F.conv2d(torch.zeros(bs, 1, 10, 10), cur_w)#, stride=[1, 1, 1, 1], padding=[0]*4, dilation=[1]*4 )

In [None]:
F.conv2d?