In [5]:
%pip install cairosvg

Collecting cairosvg
  Using cached CairoSVG-2.7.1-py3-none-any.whl.metadata (2.7 kB)
Collecting cairocffi (from cairosvg)
  Using cached cairocffi-1.7.1-py3-none-any.whl.metadata (3.3 kB)
Collecting cssselect2 (from cairosvg)
  Using cached cssselect2-0.7.0-py3-none-any.whl.metadata (2.9 kB)
Collecting defusedxml (from cairosvg)
  Using cached defusedxml-0.7.1-py2.py3-none-any.whl.metadata (32 kB)
Collecting pillow (from cairosvg)
  Downloading pillow-11.1.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (9.1 kB)
Collecting tinycss2 (from cairosvg)
  Using cached tinycss2-1.4.0-py3-none-any.whl.metadata (3.0 kB)
Collecting cffi>=1.1.0 (from cairocffi->cairosvg)
  Using cached cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl.metadata (1.5 kB)
Collecting webencodings (from cssselect2->cairosvg)
  Using cached webencodings-0.5.1-py2.py3-none-any.whl.metadata (2.1 kB)
Collecting pycparser (from cffi>=1.1.0->cairocffi->cairosvg)
  Using cached pycparser-2.22-py3-none-any.whl.metadata (943 bytes)
U

In [None]:
import os
import urllib.request
import cairosvg
from PIL import Image

knots_count = [1, 0, 0, 1, 1, 2, 3, 7, 21, 49, 3] # number of knots with i crossings represented on the https://prideout.net/blog/svg_knots/knottable_v1.svg

IMG_SIZE = 480 # size of the output image in px; influences on the complexity of the filling
               # 480 is size of picture from our 801-knots database so we can use that one too

# create and save the results
save_dir = "/content/new_knots_svg/"
try:
    os.makedirs(save_dir)
except FileExistsError:
    print(f"One or more directories in '{save_dir}' already exist.")
except PermissionError:
    print(f"Permission denied: Unable to create '{save_dir}'.")
except Exception as e:
    print(f"An error occurred: {e}")

save_dir = "/content/new_knots_png/"
try:
    os.makedirs(save_dir)
except FileExistsError:
    print(f"One or more directories in '{save_dir}' already exist.")
except PermissionError:
    print(f"Permission denied: Unable to create '{save_dir}'.")
except Exception as e:
    print(f"An error occurred: {e}")

for cross_num in range(len(knots_count)):
  for i in range(knots_count[cross_num]):
    url = f"https://prideout.net/blog/svg_knots/knots/{cross_num}_{i+1}.svg"
    full_path = f"/content/new_knots_svg/{cross_num}_{i+1}.svg"
    urllib.request.urlretrieve(url, full_path)

    # svg to png
    cairosvg.svg2png(bytestring=open(f"/content/new_knots_svg/{cross_num}_{i+1}.svg", 'rb').read(), write_to=f"/content/new_knots_png/{cross_num}_{i+1}.png", output_width=IMG_SIZE, output_height=IMG_SIZE)

    # fill png with white
    bg = Image.new("RGBA", (IMG_SIZE, IMG_SIZE), "WHITE")
    fg = Image.open(f"/content/new_knots_png/{cross_num}_{i+1}.png")
    bg.paste(fg, (0, 0), fg)
    bg.save(f"/content/new_knots_png/{cross_num}_{i+1}.png")

    # clear the image of errors
    pixels_bg = bg.load()
    img = Image.new(bg.mode, bg.size)
    pixels = img.load()
    for x in range(IMG_SIZE):
      for y in range(IMG_SIZE):
        if(x == 0 or y == 0 or x == IMG_SIZE-1 or y == IMG_SIZE-1):
          pixels[x, y] = (255, 255, 255, 255)
        else:
          if(pixels_bg[x-1, y-1] == (0, 0, 0, 255) or
             pixels_bg[x-1, y] == (0, 0, 0, 255) or
             pixels_bg[x-1, y+1] == (0, 0, 0, 255) or
             pixels_bg[x, y-1] == (0, 0, 0, 255) or
             pixels_bg[x, y] == (0, 0, 0, 255) or
             pixels_bg[x, y+1] == (0, 0, 0, 255) or
             pixels_bg[x+1, y-1] == (0, 0, 0, 255) or
             pixels_bg[x+1, y] == (0, 0, 0, 255) or
             pixels_bg[x+1, y+1] == (0, 0, 0, 255)
             ):
            pixels[x, y] = (0, 0, 0, 255)
          else:
            pixels[x, y] = (255, 255, 255, 255)
    img.save(f"/content/new_knots_png/{cross_num}_{i+1}.png")

In [None]:
import torch
from PIL import Image
import torchvision.transforms as transforms

def png_to_tensor(path : str =None, png_image : Image =None) -> torch.Tensor:
  if(path != None):
    png_image = Image.open(path)
  transform = transforms.PILToTensor()
  return transform(png_image)

def tensor_to_png(tensor_image) -> Image:
  transform = transforms.ToPILImage()
  return transform(tensor_image)

In [None]:
import requests
from pathlib import Path

# Download helper functions from Learn PyTorch repo (if not already downloaded)
if Path("helpers.py").is_file():
  print("helpers.py already exists, skipping download")
else:
  print("Downloading helpers.py")
  request = requests.get("https://raw.githubusercontent.com/pytorch/vision/refs/heads/main/gallery/transforms/helpers.py")
  with open("helpers.py", "wb") as f:
    f.write(request.content)

In [None]:
def fill_to(str, num):
  return "0"*(num-len(str)) + str

**About uniformly distributed transformations**

Suppose we have $N(m)$ knots with $m$ crossings, and $k$ available transformations. We want to know how many transformations we need to apply to get roughly the same amount of output knots. Let $n(m)$ be the number of transformed pictures for knot with $m$ crossings. The first and the naivest one can do to equitably distribute transforms is to abide by simple proportion: $n(m) \cdot N(m) = n(j) \cdot N(j) \implies n(m) = n(j) \frac{N(j)}{N(m)}$. Since the smallest possible value of $N$ is $1$ (obtainable at $m = 0, 3, 4$) and the maximum value of $n$ is $2^{k}$ we therefore can assume $n(0, 3, 4) = 2^{k}$. So, for each crossing number (and as we want every knot to be in the data) $i: n(m) = \max(1, round(\frac{2^{k}}{N(m)}))$ (given $j = 0$), where $round(x) = x, \{x\} < 0.5, [x]+1 \ \text{otherwise}$ where $\{x\}$ and $[x]$ are fractional and integer parts respectively.

In the following considerations we will omit $m$ as we'll deal only with one particular number of crossings.
Let $f_{i}$ be the number of $i$-th transform needed to be applied. We want to get uniformly distributed transformations, so ideal case is $f_i = f_j$. Let's use bit string $s$ where $i$-th bit represens whether the $i$-th transormation is applied or not. Thus, $f_i$ shows how many times we set $i$-th bit. So we cannot have diferent $f$'s since each $s$ requires us to define every bit so the values of $f$'s must be the same. Therefore, $f_i = n$.

We want our $i$-th transform to be completely random, so $ p(0|s[i]) = \frac{1}{2}$. Then we set $s[i] = 0$ in $\lceil \frac{f_i}{2} \rceil$ cases and $s[i] = 1$ in $\lfloor \frac{f_i}{2} \rfloor$ (we are also able to do it vice versa, but it doesn't give any impact and increase average runtime since it is biased to apply transformation).
Suppose that at the $l$-th step we have $a$ cases $s[i] = 0$ left and $b$ respectively. Hence, we do not apply $i$-th transform and update $a = a - 1$ with probability $p(s[i] = 0) = \frac{a}{a+b}$ and apply and update $b = b - 1$ with $p(s[i] = 0) = \frac{b}{a+b}$.

Doing that for each $s[i]$ we obtain completely random bit string $s$, thence we randomized application of transforms

In [None]:
# apply transforms to them!
import os
import math
from torchvision.transforms import v2
from helpers import plot
import random

# numbers of available transformations
k = 6
blurrer = v2.RandomRotation(degrees=(0, 180), expand=True, fill=255)
plot([tensor_to_png(blurrer(png_to_tensor("/content/new_knots_png/4_1.png")))])

def save_transforms(filename, orig_dir):
  try:
    orig_img = png_to_tensor(path=orig_dir + filename + ".png")
  except Exception as e:
    print(f"Error: {e}")
    return

  # see https://pytorch.org/vision/0.20/auto_examples/transforms/plot_transforms_illustrations.html#sphx-glr-auto-examples-transforms-plot-transforms-illustrations-py

  # random perspective
  perspective_transformer = v2.RandomPerspective(distortion_scale=0.6, p=1.0, fill=255)

  # random rotation
  rotater = v2.RandomRotation(degrees=(0, 180), expand=True, fill=255)

  # elastic
  elastic_transformer = v2.ElasticTransform(alpha=250.0, fill=255)

  # gaussian blur
  blurrer = v2.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5.))

  # horizontal flip
  hflipper = v2.RandomHorizontalFlip(p=1)

  # vertical flip
  vflipper = v2.RandomVerticalFlip(p=1)

  list_of_transforms = torch.nn.ModuleList([perspective_transformer, rotater, elastic_transformer, blurrer, hflipper, vflipper])

  # create and save the results
  save_dir = f"/content/saved_knots/{filename}.png"
  try:
      os.makedirs(save_dir)
  except FileExistsError:
      print(f"One or more directories in '{save_dir}' already exist.")
  except PermissionError:
      print(f"Permission denied: Unable to create '{save_dir}'.")
      return
  except Exception as e:
      print(f"An error occurred: {e}")
      return

  # uniformly distributed transformations

  # variables
  m = int(filename.split('_')[0])
  N = knots_count[m]
  n = round(2**k/N)
  s = [0]*k
  in_s = []
  a = [math.ceil(n/2)]*k
  b = [math.floor(n/2)]*k

  for i in range(n):
    # set bits of s
    for j in range(k):
      rand_choice = random.randint(1, a[j]+b[j])
      if rand_choice <= a[j]:
        s[j] = 0
        a[j] -= 1
      else:
        s[j] = 1
        b[j] -= 1
    in_s.append(int(''.join(str(j) for j in s), 2))

    cur_img = orig_img
    # for clarity of the image
    if(s[0] or s[2]):
      if(s[0]): cur_img = list_of_transforms[0](cur_img)
      if(s[2]): cur_img = list_of_transforms[2](cur_img)

      img = tensor_to_png(cur_img)
      pixels = img.load()
      img_cl = Image.new(img.mode, img.size)
      pixels_cl = img_cl.load()
      for x in range(IMG_SIZE):
        for y in range(IMG_SIZE):
          if(x == 0 or y == 0 or x == IMG_SIZE-1 or y == IMG_SIZE-1):
            pixels_cl[x, y] = (255, 255, 255, 255)
          else:
            if(pixels[x-1, y-1] == (0, 0, 0, 255) or
              pixels[x-1, y] == (0, 0, 0, 255) or
              pixels[x-1, y+1] == (0, 0, 0, 255) or
              pixels[x, y-1] == (0, 0, 0, 255) or
              pixels[x, y] == (0, 0, 0, 255) or
              pixels[x, y+1] == (0, 0, 0, 255) or
              pixels[x+1, y-1] == (0, 0, 0, 255) or
              pixels[x+1, y] == (0, 0, 0, 255) or
              pixels[x+1, y+1] == (0, 0, 0, 255)
              ):
              pixels_cl[x, y] = (0, 0, 0, 255)
            else:
              pixels_cl[x, y] = (255, 255, 255, 255)
      cur_img = png_to_tensor(png_image=img_cl)

    for j in range(1, k):
      if s[j] and j != 2:
        cur_img = list_of_transforms[j](cur_img)

    tensor_to_png(cur_img).save(save_dir + f"/{filename}_{''.join(str(j) for j in s)}" + f"_{in_s.count(int(''.join(str(j) for j in s), 2))}.png", "png")
    saved_imgs.append(save_dir + f"/{filename}_{''.join(str(j) for j in s)}" + f"_{in_s.count(int(''.join(str(j) for j in s), 2))}.png")

In [None]:
import zipfile

directory = os.fsencode("/content/new_knots_png")

# set -{number of knots you want to transform}
N = 1

saved_imgs = []

# aka for knot_image in knots: save transforms of image
for file in os.listdir(directory):
    if(not N): break; N += 1
    filename = os.path.splitext(os.fsdecode(file))[0]
    save_transforms(str(filename), "/content/new_knots_png/")

# save all to the .zip file
with zipfile.ZipFile("knots_aug.zip", "w") as saved_imgs_arch:
    for imgname in saved_imgs:
      saved_imgs_arch.write(imgname)