<a href="https://colab.research.google.com/github/bambokianr/syntheticImagesGenerator/blob/main/syntheticImagesGenerator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy
from matplotlib import pyplot
from scipy import interpolate
from io import BytesIO
from PIL import Image
import random
import re

#### **Images dimensions (in _pixels_)**

In [None]:
def generateRandomDataBasedOnUniformDistribution(min_val, max_val, sample_size, seed):
  numpy.random.seed(seed)
  dist = numpy.random.uniform(min_val, max_val, sample_size).astype(int)
  return dist

def generateRandomDataBasedOnNormalDistribution(mean, standard_deviation, sample_size, seed):
  numpy.random.seed(seed)
  dist = numpy.random.normal(mean, standard_deviation, sample_size).astype(int)
  return dist

In [None]:
def plotImagensDimensionsDistribution(W_dist, H_dist, figure_suptitle = "", W_title = "", H_title = ""):
  figure, axes = pyplot.subplots(nrows = 1, ncols = 2, figsize = (15, 5))
  figure.suptitle(figure_suptitle, fontsize = 14, fontweight = "bold")

  axes[0].set_title(W_title)
  axes[0].set_xlabel("largura (px)")
  axes[0].set_ylabel("count")
  W_count, W_bins, W_ignored = axes[0].hist(W_dist)

  axes[1].set_title(H_title)
  axes[1].set_xlabel("altura (px)")
  axes[1].set_ylabel("count")
  H_count, H_bins, H_ignored = axes[1].hist(H_dist)

#### **Images sizes (in _bytes_)**

In [None]:
def generateRandomDataBasedOnBoxPlot(q0, q1, q2, q3, q4, sample_size, seed):
  numpy.random.seed(seed)
  interp = interpolate.interp1d([0, 0.25, 0.5, 0.75, 100], [q0, q1, q2, q3, q4])
  values = interp(numpy.random.random(sample_size))

  return values.astype(int).tolist()

In [None]:
def plotImagesSizesDistribution(image_size_in_bytes_list_arr, positions_arr, title = ""):
  pyplot.boxplot(image_size_in_bytes_list_arr, positions=positions_arr)
  pyplot.title(title, fontsize = 14, fontweight = "bold")
  pyplot.xlabel("índice de execução")
  pyplot.ylabel("tamanho (em bytes)")
  pyplot.show()

#### **Synthetic images generator**

In [None]:
def getImageSizeInBytes(image_ref, format = "jpeg"):
  temp = BytesIO()
  image_ref.save(temp, format)

  size_in_bytes = temp.tell()
  return size_in_bytes

def getRandomRGBColor():
  return list(numpy.random.choice(range(255), size = 3))

def parseSizeErrorMargin(size_error_margin_str):
  size_error_margin_dict = { "type": "BYTES", "value": 0 }

  has_percent_symbol = re.findall("\d+\%", size_error_margin_str)
  if has_percent_symbol:
    size_error_margin_dict["type"] = "PERCENTUAL"
    size_error_margin_dict["value"] = int(re.findall("\d+", has_percent_symbol[0])[0])/100

  else:
    is_floating_point = re.findall("\d+\.\d+", size_error_margin_str)
    if is_floating_point:
      size_error_margin_dict["type"] = "PERCENTUAL"
      size_error_margin_dict["value"] = float(is_floating_point[0])
    else:
      is_in_bytes = re.findall("\d+\s?bytes", size_error_margin_str)
      if is_in_bytes:
        size_error_margin_dict["type"] = "BYTES"
        size_error_margin_dict["value"] = int(re.findall("\d+", size_error_margin_str)[0])

  return size_error_margin_dict


def createImage(dimensions_in_pixels_tuple, expected_size_in_bytes, size_error_margin_str):
  class Error(Exception): pass
  class Found(Exception): pass
  class NotFound(Exception): pass

  blackColor = [0, 0, 0]

  width, height = dimensions_in_pixels_tuple[0], dimensions_in_pixels_tuple[1]
  channel = 3

  RGB_pixel_array = numpy.full((height, width, channel), blackColor, dtype=('uint8'))
  image_ref = Image.fromarray(RGB_pixel_array, mode = "RGB")

  size_error_margin_dict = parseSizeErrorMargin(size_error_margin_str)
  if size_error_margin_dict["type"] == "PERCENTUAL":
    size_error_margin = expected_size_in_bytes * size_error_margin_dict["value"]
  else:
    size_error_margin = size_error_margin_dict["value"]

  try:
    for w in range(width):
      for h in range(height):
        image_size_in_bytes = getImageSizeInBytes(image_ref)
        if expected_size_in_bytes < image_size_in_bytes:
          raise Error
        elif abs(expected_size_in_bytes - image_size_in_bytes) <= size_error_margin:
          raise Found
        elif image_size_in_bytes > expected_size_in_bytes + size_error_margin:
          raise NotFound

        RGB_pixel_array[h][w] = getRandomRGBColor()
        image_ref = Image.fromarray(RGB_pixel_array, mode = "RGB")

  except Found:
    print(f"[CREATED] image created with {image_size_in_bytes} bytes")
    return image_ref, image_size_in_bytes

  except NotFound:
    print(f"[NOT CREATED] size_error_margin is {size_error_margin} bytes but it should be {abs(expected_size_in_bytes - image_size_in_bytes)} bytes")

  except Error:
    print(f"[NOT CREATED] expected_size_in_bytes is less than {image_size_in_bytes} bytes")

In [None]:
def syntheticImagesGenerator(width_in_pixels_list, height_in_pixels_list, expected_size_in_bytes_list, sample_size, size_error_margin_str = "5bytes"):
  dimensions_tuples_list = list(zip(sorted(width_in_pixels_list.tolist()), sorted(height_in_pixels_list.tolist())))
  expected_size_in_bytes_list = sorted(expected_size_in_bytes_list)

  image_size_in_bytes_list = []
  for image_idx, dimensions_tuple in enumerate(dimensions_tuples_list):
    print(f"[image idx = {image_idx}] width = {dimensions_tuple[0]} px | height = {dimensions_tuple[1]} px | expected size = {expected_size_in_bytes_list[image_idx]} bytes")

    try:
        image, image_size_in_bytes = createImage(dimensions_tuple, expected_size_in_bytes_list[image_idx], size_error_margin_str)
        # image.show()
        image_size_in_bytes_list.append(image_size_in_bytes)
    except TypeError:
        print("Cannot unpack non-iterable NoneType object")

  return [width_in_pixels_list, height_in_pixels_list, image_size_in_bytes_list]