<a href="https://colab.research.google.com/github/jackschedel/AutoCalibr/blob/main/AutoCalibr.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dataset importing

In [None]:
#@title Define imports and constants
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

DRIVE = '/content/drive/MyDrive/AutoCalibr/'
DATASET = DRIVE + 'dataset/'

# intermediate folders
INTERMEDIATES = '/content/intermediates'
CONVERTED_PLY = INTERMEDIATES + '/converted_ply/'
NORMALIZED_PLY = INTERMEDIATES + '/normalized_ply/'

# sometimes used for debug outputting into non-cluttered directory
DIR = '/content/'

!rm -r sample_data/ 2>/dev/null
!mkdir {INTERMEDIATES} 2>/dev/null

Collecting wandb
  Downloading wandb-0.15.8-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.32-py3-none-any.whl (188 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m188.5/188.5 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-1.29.2-py2.py3-none-any.whl (215 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m215.6/215.6 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting pathtools (from wandb)
  Downloading pathtools-0.1.2.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.2-cp310-cp310-manylinux_2_5_x86_64.manylin

In [None]:
#@title Convert FBX to PLY {vertical-output: true}

import os

try:
  import bpy
except ImportError:
  !pip install bpy
  print('-' * 50)
  import bpy

!rm -r {CONVERTED_PLY} 2>/dev/null
!mkdir {CONVERTED_PLY}

bpy.ops.wm.read_factory_settings()

# needs to be rotated counterclockwise (left) 90 degrees - could be mostly automated (noted below)
needs_extra_rotation = ["Ace Of Spades", "Cantata-57", "Cloudstrike", "Dead Mans Tale", "Duality", "False Promises", "Fugue 55", "Hawkmoon", "Jack Queen King 3", "Mindbenders Ambition", "No Time To Explain", "Ruinous Effigy", "Seven Seraph Carbine", "Seventh Seraph CQC-12", "Seventh Seraph Officer Revolver", "Seventh Seraph SAW", "Seventh Seraph SI-2", "Seventh Seraph VY-7", "Trustee", "Witherhoard"]

fbx_dir = DATASET + 'fbx/'

for f in os.listdir(fbx_dir):
  if f.endswith('.fbx'):
    # Isolate the name of the .fbx file (without extension)
    name_no_ext = os.path.splitext(os.path.basename(f))[0]

    print(f"Object: {name_no_ext}\n")

    # Delete all mesh objects to avoid exporting multiple models into the same file
    bpy.ops.object.select_all(action='DESELECT')
    bpy.ops.object.select_by_type(type='MESH')
    bpy.ops.object.delete()

    # Load in FBX file
    bpy.ops.import_scene.fbx(filepath=os.path.join(fbx_dir, f))

    # Select the object
    obj_object = bpy.context.selected_objects[0]
    bpy.context.view_layer.objects.active = obj_object

    if name_no_ext in needs_extra_rotation:
      forwards_dir = 'Z'
      needs_extra_rotation.remove(name_no_ext)
    else:
      forwards_dir = '-X'

    # Export object to PLY
    bpy.ops.export_mesh.ply(filepath=os.path.join(CONVERTED_PLY, f.replace('.fbx', '.ply')), use_ascii=True, use_mesh_modifiers=True, use_normals=False, use_uv_coords=False, use_colors=False, axis_forward=forwards_dir, axis_up='Y')

    print('-' * 50)

# Ensure that any models that were supposed to receive extra rotation were hit
if len(needs_extra_rotation) > 0:
  print(f"\nThe following manually-specified models were not hit (check for typos): {needs_extra_rotation}")

# Dataset processing

In [None]:
# @title Define PlyObject Class
import random
import math
from random import choice
import numpy as np
import copy


def random_list_of_sum(length, total_sum):
  adjusted_sum = total_sum - length
  result = np.random.multinomial(adjusted_sum, np.ones(length)/length) + 1

  return result.tolist()


class Vertex:
  def __init__(self, x, y, z):
    self.x = x
    self.y = y
    self.z = z


  def copy(self):
    return Vertex(self.x, self.y, self.z)


  def distance(self, other_vertex):
    diff_x = self.x - other_vertex.x
    diff_y = self.y - other_vertex.y
    diff_z = self.z - other_vertex.z

    distance = math.sqrt(diff_x**2 + diff_y**2 + diff_z**2)
    return distance


  def to_list(self):
    return [self.x, self.y, self.z]


  def scale(self, scale_x, scale_y, scale_z):
    self.x *= scale_x
    self.y *= scale_y
    self.z *= scale_z


  def translate(self, offset_x, offset_y, offset_z):
    self.x += offset_x
    self.y += offset_y
    self.z += offset_z


  def __hash__(self):
    return hash((self.x, self.y, self.z))


  def __eq__(self, other):
    if isinstance(other, Vertex):
      return self.x == other.x and self.y == other.y and self.z == other.z
    return False


class Face:
  def __init__(self, vertices):
    # vertices is a list of indexes (of the object's vertices list) of the connected vertices that form the face
    self.vertices = vertices


  def copy(self):
    return Face(self.vertices.copy())


  def __lt__(self, other):
    for val1, val2 in zip(self.vertices, other.vertices):
      if val1 < val2:
        return True
      elif val1 > val2:
        return False
    return len(self.vertices) < len(other.vertices)


  def __hash__(self):
    return hash(tuple(self.vertices))


  def __eq__(self, other):
    if isinstance(other, Face):
      return (sorted(self.vertices) == sorted(other.vertices))
    return False


class PlyObject:
  def __init__(self, name, vertices, faces, id):
    self.name = name
    self.vertices = vertices
    self.faces = faces
    # lazy me doesnt want to refactor every processing loop so doing this for printing
    self.id = id


  def copy(self):
    return PlyObject(self.name, [v.copy() for v in self.vertices], [f.copy() for f in self.faces], self.id)


  def get_2d_face_list(self):
    return np.array([face.vertices for face in self.faces])


  def get_2d_encoded_face_list(self):
    # Create an empty 2D numpy array with the correct shape
    encoded_faces = np.empty((len(self.faces) // 2, 3), dtype=np.int32)

    # Process two faces at a time
    for i in range(0, len(self.faces), 2):
      face1 = self.faces[i]
      face2 = self.faces[i + 1]

      # Process each property (x, y, z)
      for j in range(3):
        # Cast to 16-bit, then encode together as a 32-bit value using alternating digits
        value1_16 = np.int16(face1.vertices[j])
        value2_16 = np.int16(face2.vertices[j])
        encoded = np.bitwise_or.reduce(np.bitwise_or(np.left_shift(value1_16, np.arange(16)*2), np.left_shift(value2_16, np.arange(16)*2 + 1)))

        # Store in the result array
        encoded_faces[i // 2, j] = encoded

    return encoded_faces


  def get_2d_vertex_list(self):
    return np.array([[vertex.x, vertex.y, vertex.z] for vertex in self.vertices])


  def pad_with_random(self, max_vertices, max_faces):
    self.subdivide_faces_as_padding(max_vertices, max_faces)

    vertices_to_add = max_vertices - len(self.vertices)
    faces_to_add = max_faces - len(self.faces)

    if vertices_to_add > 0:
      self.add_random_duplicate_vertices(vertices_to_add)
    if faces_to_add > 0:
      self.add_random_duplicate_faces(faces_to_add)

    self.sort_tri_data(True)


  def export_random_to_model(self, count, max_vertices, max_faces):
    vertex_list_3d = np.zeros((count, max_vertices, 3))
    face_list_3d = np.zeros((count, max_faces, 3))

    for i in range(count):
      new_variation = self

      original_vertices = self.vertices.copy()
      original_faces = self.faces.copy()

      self.pad_with_random(max_vertices, max_faces)

      vertex_list_3d[i] = self.get_2d_vertex_list()
      face_list_3d[i] = self.get_2d_face_list()

      self.vertices = original_vertices
      self.faces = original_faces

    return { 'vertices': vertex_list_3d, 'faces': face_list_3d }


  def save_file(self, filename):
    with open(filename, "w") as file:
      file.write("ply\n")
      file.write("format ascii 1.0\n")
      file.write("comment Created by PlyObject class\n")
      file.write(f"element vertex {len(self.vertices)}\n")
      file.write("property float x\n")
      file.write("property float y\n")
      file.write("property float z\n")
      file.write(f"element face {len(self.faces)}\n")
      file.write("property list uchar uint vertex_indices\n")
      file.write("end_header\n")

      for vertex in self.vertices:
        file.write(f"{vertex.x} {vertex.y} {vertex.z}\n")

      for face in self.faces:
        formatted_vertices = ' '.join(str(v) for v in face.vertices)
        file.write(f"{len(face.vertices)} {formatted_vertices}\n")


  def scale(self, scale_x, scale_y, scale_z):
    for vertex in self.vertices:
      vertex.scale(scale_x, scale_y, scale_z)


  def translate(self, offset_x, offset_y, offset_z):
    for vertex in self.vertices:
      vertex.translate(offset_x, offset_y, offset_z)


  def calculate_volume(self):
    volume = 0
    for face in self.faces:
      v0 = self.vertices[face.vertices[0]]
      v1 = self.vertices[face.vertices[1]]
      v2 = self.vertices[face.vertices[2]]
      volume += (-v0.x*v1.y*v2.z + v1.x*v0.y*v2.z + v0.x*v2.y*v1.z - v2.x*v0.y*v1.z + v2.x*v1.y*v0.z - v1.x*v2.y*v0.z)
    return abs(volume) / 6.0


  def remove_overlapping(self):
    vert_dict = {}
    convert_dict = {}

    new_vertices = []
    # iterate over existing vertices to identify and save unique ones
    for idx, vertex in enumerate(self.vertices):
      if vertex in vert_dict:
        convert_dict[idx] = vert_dict[vertex]
      else:
        # assign a unique index to each vertex
        vert_dict[vertex] = len(new_vertices)
        convert_dict[idx] = len(new_vertices)
        new_vertices.append(vertex)

    # replace original vertices with new, duplicate-free list
    self.vertices = new_vertices

    # apply convert_dict to update face vertices to match new unique indexing
    for face in self.faces:
      face.vertices = [convert_dict[vertex] for vertex in face.vertices]

    # convert faces to set to remove any potential duplicate faces
    self.faces = set(self.faces)
    self.faces = list(set(self.faces))


  # not great for scale since re-sorting is needed, use a batch version after dataset processing
  def add_random_duplicate_vertices(self, count_to_add):
    # note: will need to re-sort tri data if already sorted
    for _ in range(count_to_add):
      to_duplicate = choice(self.vertices)
      new_vertex = Vertex(to_duplicate.x, to_duplicate.y, to_duplicate.z)
      self.vertices.append(new_vertex)


  # not great for scale since re-sorting is needed, use a batch version after dataset processing
  def add_random_duplicate_faces(self, count_to_add):
    # note: will need to re-sort tri data if already sorted
    for _ in range(count_to_add):
      to_duplicate = choice(self.faces)
      new_face = Face(to_duplicate.vertices)
      self.faces.append(new_face)


  def subdivide_face(self, face_index):
    # Get the face to be subdivided
    face = self.faces[face_index]

    # Find vertices to split between and create new vertex in between
    vertex_index_1, vertex_index_2 = face.vertices[0], face.vertices[1]
    vertex_1, vertex_2 = self.vertices[vertex_index_1], self.vertices[vertex_index_2]

    new_vertex = Vertex((vertex_1.x + vertex_2.x) / 2, (vertex_1.y + vertex_2.y) / 2, (vertex_1.z + vertex_2.z) / 2)

    # Add new_vertex to the vertices list and store its index
    self.vertices.append(new_vertex)
    new_vertex_index = len(self.vertices) - 1

    # Create two new faces with correct order to maintain outward normal
    new_face_1 = Face([vertex_index_1, new_vertex_index, face.vertices[2]])
    new_face_2 = Face([new_vertex_index, vertex_index_2, face.vertices[2]])

    # Replace the old face with the new faces
    self.faces[face_index] = new_face_1
    self.faces.append(new_face_2)



  def subdivide_faces_as_padding(self, max_vertices, max_faces):
    # note: loses sorted status
    while len(self.faces) < max_faces and len(self.vertices) < max_vertices:
      face_index = random.randint(0, len(self.faces) - 1)
      obj.subdivide_face(face_index)


  def categorize_faces(self):
    face_dict = {}

    for face in obj.faces:
      length = len(face.vertices)

      if length in face_dict:
        face_dict[length] = face_dict[length] + 1
      else:
        face_dict[length] = 1

    return face_dict


  def get_value_extrema(self):
    min_val = float('inf')
    max_val = float('-inf')

    for v in self.vertices:
      min_val = min(min_val, v.x, v.y, v.z)
      max_val = max(max_val, v.x, v.y, v.z)

    return {'min': min_val, 'max': max_val}


  def get_max_values(self):
    max_values = {'x': None, 'y': None, 'z': None}

    for vertex in self.vertices:
      if max_values['x'] is None or vertex.x > max_values['x']:
        max_values['x'] = vertex.x
      if max_values['y'] is None or vertex.y > max_values['y']:
        max_values['y'] = vertex.y
      if max_values['z'] is None or vertex.z > max_values['z']:
        max_values['z'] = vertex.z

    return max_values


  def get_min_values(self):
    min_values = {'x': None, 'y': None, 'z': None}

    for vertex in self.vertices:
      if min_values['x'] is None or vertex.x < min_values['x']:
        min_values['x'] = vertex.x
      if min_values['y'] is None or vertex.y < min_values['y']:
        min_values['y'] = vertex.y
      if min_values['z'] is None or vertex.z < min_values['z']:
        min_values['z'] = vertex.z

    return min_values


  def center_object(self):
    max_vals = self.get_max_values()
    min_vals = self.get_min_values()

    offset_vals = {'x': 0, 'y': 0, 'z': 0}

    for dim in offset_vals:
      center = (max_vals[dim] + min_vals[dim]) / 2
      offset_vals[dim] = -center

    self.translate(offset_vals['x'], offset_vals['y'], offset_vals['z'])


  def normalize_scale(self):
    x_coordinates = [vertex.x for vertex in self.vertices]
    y_coordinates = [vertex.y for vertex in self.vertices]
    z_coordinates = [vertex.z for vertex in self.vertices]

    max_distance = max(x_coordinates + y_coordinates + z_coordinates)
    min_distance = min(x_coordinates + y_coordinates + z_coordinates)
    normalization_range = max_distance - min_distance

    if normalization_range == 0:
      raise ValueError("Normalization range cannot be zero")

    for vertex in self.vertices:
      vertex.x = 2 * (vertex.x - min_distance) / normalization_range - 1
      vertex.y = 2 * (vertex.y - min_distance) / normalization_range - 1
      vertex.z = 2 * (vertex.z - min_distance) / normalization_range - 1


  def squares_to_tris(self):
    new_faces = []
    for face in self.faces:
      if len(face.vertices) == 4:
        new_faces.append(Face([face.vertices[0], face.vertices[1], face.vertices[2]]))
        new_faces.append(Face([face.vertices[0], face.vertices[2], face.vertices[3]]))
      else:
        new_faces.append(face)

    self.faces = new_faces


  def delete_plane(self):
    # Note: only call if the object has a plane artifact!
    max_x_index = max(range(len(self.vertices)), key = lambda index: self.vertices[index].x)
    min_x_index = min(range(len(self.vertices)), key = lambda index: self.vertices[index].x)
    max_z_index = max(range(len(self.vertices)), key = lambda index: self.vertices[index].z)
    min_z_index = min(range(len(self.vertices)), key = lambda index: self.vertices[index].z)

    indices_to_remove = set([max_x_index, min_x_index, max_z_index, min_z_index])

    for f, face in enumerate(self.faces):
      if set(face.vertices).intersection(indices_to_remove) == indices_to_remove:
        del self.faces[f]
        break
    else:
      raise ValueError("Face with all vertices not found")

    for index in sorted(indices_to_remove, reverse=True):
      del self.vertices[index]

    for face in self.faces:
      face.vertices = [idx if idx not in indices_to_remove else -1 for idx in face.vertices]

    self.remove_disconnected_vertices()


  def remove_disconnected_vertices(self):
    connected_vertices = set()
    for face in self.faces:
      connected_vertices |= set(face.vertices)

    shift_indices = []
    new_vertices = []
    for idx, vertex in enumerate(self.vertices):
      if idx in connected_vertices:
        new_vertices.append(vertex)
        shift_indices.append(len(new_vertices) - 1)
      else:
        shift_indices.append(None)

    self.vertices = new_vertices

    for face in self.faces:
      face.vertices = [shift_indices[vertex] if shift_indices[vertex] is not None else None for vertex in face.vertices]
      face.vertices = [vertex for vertex in face.vertices if vertex is not None]

    self.faces = [face for face in self.faces if face.vertices]


  def stretch_to_max(self):
    # note: update scale globals before calling
    self.scale(global_stretch_scale_x, global_stretch_scale_y, global_stretch_scale_z)


  def revert_stretch(self):
    self.scale(1/global_stretch_scale_x, 1/global_stretch_scale_y, 1/global_stretch_scale_z)


  def dimension_range(self):
    xmin = xmax = obj.vertices[0].x
    ymin = ymax = obj.vertices[0].y
    zmin = zmax = obj.vertices[0].z

    for vertex in obj.vertices:
      xmin = min(xmin, vertex.x)
      xmax = max(xmax, vertex.x)
      ymin = min(ymin, vertex.y)
      ymax = max(ymax, vertex.y)
      zmin = min(zmin, vertex.z)
      zmax = max(zmax, vertex.z)

    xrang = xmax - xmin
    yrang = ymax - ymin
    zrang = zmax - zmin

    return {'x':xrang, 'y':yrang, 'z':zrang}


  def sort_tri_data(self, break_normals = True):
    index_map = {}

    # sort the vertices by x first, then y, then z
    sorted_vertices = sorted(
      enumerate(self.vertices),
      key=lambda pair: (pair[1].x, pair[1].y, pair[1].z)
    )
    self.vertices = [pair[1] for pair in sorted_vertices]

    # record the new indices of the vertices in the map
    for i, pair in enumerate(sorted_vertices):
      old_index, _ = pair
      index_map[old_index] = i

    # convert old face lists to new face lists using the index map
    new_faces = []
    for face in self.faces:
      new_face_vertices = [index_map[i] for i in face.vertices]
      # new_face_vertices.sort()
      if break_normals:
        new_face_vertices.sort()
      new_faces.append(Face(new_face_vertices))

    # replace old face list with new face list, which we first sort
    new_faces.sort()

    self.faces = new_faces


  @classmethod
  def from_file(cls, filepath, index):
    with open(filepath, 'r') as f:
      if next(f).strip() != "ply":
        raise ValueError("The file being read is not a PLY file.")

      for _ in range(2):
        next(f)

      n_vertices = int(next(f).split()[-1])

      for _ in range(3):
        next(f)

      n_faces = int(next(f).split()[-1])

      for _ in range(2):
        next(f)

      vertices = []
      for _ in range(n_vertices):
        x, y, z = map(float, next(f).split())
        vertices.append(Vertex(x, y, z))

      faces = []
      for _ in range(n_faces):
        face_vertices = list(map(int, next(f).split()[1:]))
        faces.append(Face(face_vertices))

    name = os.path.splitext(os.path.basename(filepath))[0]

    return cls(name, vertices, faces, index)

  @classmethod
  def from_model(cls, name, vertices_input, faces_input, index):
    vertices_input = vertices_input.numpy()
    faces_input = faces_input.numpy()

    vertices = []
    for vertex in vertices_input:
      vertices.append(Vertex(vertex[0], vertex[1], vertex[2]))

    faces = []
    for face in faces_input:
      faces.append(Face([round(face[0]), round(face[1]), round(face[2])]))

    return cls(name, vertices, faces, index)

In [None]:
#@title Process PLY files as PlyObject {vertical-output: true}

import os

# Toggle between using full dataset and just preconverted subset
use_fbx_dataset = False
use_ply_dataset = True
# if both are false, it will use a single test file from the ply dataset

ply_objs = []

idx = 0
if use_fbx_dataset:
  for f in os.listdir(CONVERTED_PLY):
    if f.endswith('.ply'):
      start_time = time.time()
      obj = PlyObject.from_file(os.path.join(CONVERTED_PLY, f), idx)
      ply_objs.append(obj)
      elapsed_time = time.time() - start_time

      idx += 1
      print(f"Object: {obj.name}\n")
      print(f'Processed auto-converted PLY into PlyObject (took {elapsed_time:.2f} s)')
      print(f'\nVertice count: {len(obj.vertices)}')
      print(f'Face count: {len(obj.faces)}')
      print('-' * 50)


# Import pre-converted .ply files
if use_ply_dataset:
  for f in os.listdir(DATASET+'ply/'):
    if f.endswith('.ply'):
      start_time = time.time()
      obj = PlyObject.from_file(os.path.join(DATASET+'ply/', f), idx)
      ply_objs.append(obj)
      elapsed_time = time.time() - start_time

      idx += 1
      print(f"Object: {obj.name}\n")
      print(f'Processed pre-converted PLY into PlyObject (took {elapsed_time:.2f} s)')
      print(f'\nVertice count: {len(obj.vertices)}')
      print(f'Face count: {len(obj.faces)}')
      print('-' * 50)

if not (use_ply_dataset or use_fbx_dataset):
  f = DATASET+'ply/'+'Blind Perdition.ply'
  if not os.path.isfile(f):
    raise Exception(f'{f} not found!')

  start_time = time.time()
  obj = PlyObject.from_file(os.path.join(DATASET+'ply/', f), 0)
  ply_objs.append(obj)
  elapsed_time = time.time() - start_time

  print(f"Object: {obj.name}\n")
  print(f'Processed pre-converted PLY into PlyObject (took {elapsed_time:.2f} s)')
  print(f'\nVertice count: {len(obj.vertices)}')
  print(f'Face count: {len(obj.faces)}')
  print('-' * 50)

Object: Line in the Sand

Processed pre-converted PLY into PlyObject (took 0.30 s)

Vertice count: 24644
Face count: 29692
--------------------------------------------------
Object: Rat King

Processed pre-converted PLY into PlyObject (took 0.50 s)

Vertice count: 12219
Face count: 24308
--------------------------------------------------
Object: Outbreak Perfected

Processed pre-converted PLY into PlyObject (took 0.63 s)

Vertice count: 86406
Face count: 28802
--------------------------------------------------
Object: Prometheus Lens

Processed pre-converted PLY into PlyObject (took 0.26 s)

Vertice count: 13902
Face count: 24149
--------------------------------------------------
Object: Blind Perdition

Processed pre-converted PLY into PlyObject (took 0.50 s)

Vertice count: 13044
Face count: 14070
--------------------------------------------------


In [None]:
#@title Remove plane artifacts (dataset specific cleaning) {vertical-output: true}

# has a giant rectangular plane originally used as a background, will need to be filtered out
has_plane_artifact = ['Abbadon', 'Blind Perdition', 'Ex Machina', 'Komodo-4FR', 'Nova Mortis', 'Trespasser', 'Vestian Dynasty', 'Vouchsafe', 'Hereafter']

# Remove plane artifact from manually specified objects

# Could also iterate over all objects and use exceptions,
# but would need to give delete_plane function stricter pre-deletion checking
for obj in ply_objs:
  if obj.name in has_plane_artifact:
    has_plane_artifact.remove(obj.name)

    obj_range = obj.dimension_range()

    print(f"Object ({obj.id + 1}/{len(ply_objs)}): {obj.name}\n")
    print("Dimension Range:")
    print('X:', obj_range['x'])
    print('Y:', obj_range['y'])
    print('Z:', obj_range['z'])

    try:
      start_time = time.time()

      obj.delete_plane()

      elapsed_time = time.time() - start_time

      obj_range = obj.dimension_range()

      print(f"\nPlane detected and deleted (took {elapsed_time:.2f} s)")
      print(f"\nDimension Range (plane deleted in {elapsed_time:.2f} s):")
      print('X:', obj_range['x'])
      print('Y:', obj_range['y'])
      print('Z:', obj_range['z'])
    except ValueError:
      print(f"\nNo plane found!")

    print('-' * 50)

if len(has_plane_artifact) > 0:
  print(f"\nThe following manually-specified models were not hit (check for typos): {has_plane_artifact}")

Object: Blind Perdition

Dimension Range:
X: 1.831408
Y: 0.47424900000000003
Z: 3.43575

Plane detected and deleted (took 0.06 s)

Dimension Range (plane deleted in 0.06 s):
X: 0.10221
Y: 0.46234
Z: 1.319475
--------------------------------------------------

The following manually-specified models were not hit (check for typos): ['Abbadon', 'Ex Machina', 'Komodo-4FR', 'Nova Mortis', 'Trespasser', 'Vestian Dynasty', 'Vouchsafe', 'Hereafter']


In [None]:
#@title Categorize face data to check for any bad n-gons

face_lengths = {}

for obj in ply_objs:
  temp_face_lengths = obj.categorize_faces()

  for length, count in temp_face_lengths.items():
    if length in face_lengths:
      face_lengths[length] += count
    else:
      face_lengths[length] = count

for length, count in face_lengths.items():
  print(f'Faces with {length} vertices: {count} instances')

for length, count in face_lengths.items():
  if length < 3 or length > 4:
    raise Exception(f'\nFace of unsupported size {length}!')

Faces with 3 vertices: 119728 instances
Faces with 4 vertices: 1292 instances


In [None]:
#@title Convert any objects with square faces to tris {vertical-output: true}

# this could probably done using bpy before exporting as a PLY but this allows this to be done for any PlyObject

face_lengths = {}
square_obj_count = 0
for obj in ply_objs:

  face_data = obj.categorize_faces()

  if 4 in face_data:
    square_obj_count = square_obj_count + 1

    print(f"Object ({obj.id + 1}/{len(ply_objs)}): {obj.name}\n")
    if 3 in face_data:
      print(f"Tris: {face_data[3]}")
    else:
      print(f"Squares: 0")
    print(f"Squares: {face_data[4]}")

    start_time = time.time()

    obj.squares_to_tris()

    elapsed_time = time.time() - start_time

    print(f'\nConverted square faces to tris (took {elapsed_time:.2f} s)\n')
    face_data = obj.categorize_faces()
    print(f"Tris: {face_data[3]}")
    if 4 in face_data:
      print(f"Squares: {face_data[4]}")
    else:
      print(f"Squares: 0")

    print('-' * 50)

if square_obj_count == 0:
  print(f"No objects containing squares found. All objects contain only tris.")

Object: Prometheus Lens

Tris: 22857
Squares: 1292

Converted square faces to tris (took 0.01 s)

Tris: 25441
Squares: 0
--------------------------------------------------


In [None]:
#@title Merge overlapping/duplicate vertices and faces {vertical-output: true}

for obj in ply_objs:
  initial_vertex_count = len(obj.vertices)
  initial_face_count = len(obj.faces)

  print(f"Object ({obj.id + 1}/{len(ply_objs)}): {obj.name}\n")
  print(f'Vertex count: {initial_vertex_count}')
  print(f'Face count: {initial_face_count}')

  start_time = time.time()

  obj.remove_overlapping()

  elapsed_time = time.time() - start_time

  print(f'\nDuplicate vertices: {initial_vertex_count - len(obj.vertices)}')
  print(f'Duplicate faces: {initial_face_count - len(obj.faces)}')
  print(f'\nMerged any overlaping vertices and faces (took {elapsed_time:.2f} s)')
  print(f'\nVertex count: {len(obj.vertices)}')
  print(f'Face count: {len(obj.faces)}')
  print('-' * 50)

Object: Line in the Sand

Vertex count: 24644
Face count: 29692

Duplicate vertices: 8148
Duplicate faces: 3

Merged any overlaping vertices and faces (took 0.09 s)

Vertex count: 16496
Face count: 29689
--------------------------------------------------
Object: Rat King

Vertex count: 12219
Face count: 24308

Duplicate vertices: 0
Duplicate faces: 0

Merged any overlaping vertices and faces (took 0.05 s)

Vertex count: 12219
Face count: 24308
--------------------------------------------------
Object: Outbreak Perfected

Vertex count: 86406
Face count: 28802

Duplicate vertices: 78934
Duplicate faces: 609

Merged any overlaping vertices and faces (took 0.18 s)

Vertex count: 7472
Face count: 28193
--------------------------------------------------
Object: Prometheus Lens

Vertex count: 13902
Face count: 25441

Duplicate vertices: 1312
Duplicate faces: 44

Merged any overlaping vertices and faces (took 0.05 s)

Vertex count: 12590
Face count: 25397
--------------------------------------

In [None]:
#@title Center the objects to the origin {vertical-output: true}

for obj in ply_objs:
    start_time = time.time()

    obj.center_object()

    elapsed_time = time.time() - start_time

    print(f"Object ({obj.id + 1}/{len(ply_objs)}): {obj.name}\n")

    print(f'Centered to origin (took {elapsed_time:.2f} s)')

    print('-' * 50)

Object: Line in the Sand

Centered to origin (took 0.03 s)
--------------------------------------------------
Object: Rat King

Centered to origin (took 0.01 s)
--------------------------------------------------
Object: Outbreak Perfected

Centered to origin (took 0.02 s)
--------------------------------------------------
Object: Prometheus Lens

Centered to origin (took 0.02 s)
--------------------------------------------------
Object: Blind Perdition

Centered to origin (took 0.01 s)
--------------------------------------------------


In [None]:
#@title Normalize individual object scale to perfectly fit boundaries {vertical-output: true}

for obj in ply_objs:

  extrema = obj.get_value_extrema()

  print(f"Object ({obj.id + 1}/{len(ply_objs)}): {obj.name}\n")
  print("Extrema in any Dimension:")
  print(f"Minimum: {extrema['min']}")
  print(f"Maximum: {extrema['max']}")


  start_time = time.time()

  obj.normalize_scale()

  elapsed_time = time.time() - start_time

  print(f'\nNormalised object scale to boundaries (took {elapsed_time:.2f} s)')

  extrema = obj.get_value_extrema()

  print("\nExtrema in any Dimension:")
  print(f"Minimum: {extrema['min']}")
  print(f"Maximum: {extrema['max']}")
  print('-' * 50)

Object: Line in the Sand

Extrema in any Dimension:
Minimum: -1.0
Maximum: 1.0

Normalised object scale to boundaries (took 0.02 s)

Extrema in any Dimension:
Minimum: -1.0
Maximum: 1.0
--------------------------------------------------
Object: Rat King

Extrema in any Dimension:
Minimum: -0.1960095
Maximum: 0.1960095

Normalised object scale to boundaries (took 0.02 s)

Extrema in any Dimension:
Minimum: -1.0
Maximum: 1.0
--------------------------------------------------
Object: Outbreak Perfected

Extrema in any Dimension:
Minimum: -0.4440425
Maximum: 0.4440425

Normalised object scale to boundaries (took 0.01 s)

Extrema in any Dimension:
Minimum: -1.0
Maximum: 1.0
--------------------------------------------------
Object: Prometheus Lens

Extrema in any Dimension:
Minimum: -1.0
Maximum: 1.0

Normalised object scale to boundaries (took 0.02 s)

Extrema in any Dimension:
Minimum: -1.0
Maximum: 1.0
--------------------------------------------------
Object: Blind Perdition

Extrema in

In [None]:
#@title Calculate minimum volume of any object

volumes = {}
min_volume = float('inf')
volume_calc_times = {}

for obj in ply_objs:
    start_time = time.time()

    volumes[obj.name] = obj.calculate_volume()

    volume_calc_times[obj.name] = time.time() - start_time

    min_volume = min(min_volume, volumes[obj.name])

print(f'Global minimum volume: {min_volume:.6f}')

Global minimum volume: 0.028479


In [None]:
#@title Scale each object to match the minimum global volume {vertical-output: true}

# just scaled to boundaries so the objects must be scaled down, not up

import math

for obj in ply_objs:
    print(f"Object ({obj.id + 1}/{len(ply_objs)}): {obj.name}\n")
    print(f'Volume calculated as: {volumes[obj.name]:.6f} (took {volume_calc_times[obj.name]:.2f} s)')
    start_time = time.time()

    to_scale = math.pow(min_volume / volumes[obj.name], 1/3)
    obj.scale(to_scale, to_scale, to_scale)

    elapsed_time = time.time() - start_time
    print(f'\nScaled by {to_scale} (took {elapsed_time:.2f} s)')
    start_time = time.time()

    volume = obj.calculate_volume()

    elapsed_time = time.time() - start_time
    print(f'\nVolume calculated as: {volume:.6f} (took {elapsed_time:.2f} s)')
    print('-' * 50)

Object: Line in the Sand

Volume calculated as: 0.028479 (took 0.08 s)

Scaled by 1.0 (took 0.01 s)

Volume calculated as: 0.028479 (took 0.07 s)
--------------------------------------------------
Object: Rat King

Volume calculated as: 0.272199 (took 0.06 s)

Scaled by 0.47121092908297674 (took 0.01 s)

Volume calculated as: 0.028479 (took 0.05 s)
--------------------------------------------------
Object: Outbreak Perfected

Volume calculated as: 0.065950 (took 0.07 s)

Scaled by 0.7558567953805982 (took 0.01 s)

Volume calculated as: 0.028479 (took 0.07 s)
--------------------------------------------------
Object: Prometheus Lens

Volume calculated as: 0.050188 (took 0.05 s)

Scaled by 0.8278999959990196 (took 0.01 s)

Volume calculated as: 0.028479 (took 0.06 s)
--------------------------------------------------
Object: Blind Perdition

Volume calculated as: 0.044829 (took 0.03 s)

Scaled by 0.8596580597884671 (took 0.00 s)

Volume calculated as: 0.028479 (took 0.03 s)
-------------

In [None]:
#@title Analyze dimension ranges to pseudo-verify object orientation

# could do anywhere before doing global stretch
# choosing to do it after all other processing

# used for checking if I missed any rotation overrides
count_smallest = {'x': 0, 'y': 0, 'z': 0}
count_middle = {'x': 0, 'y': 0, 'z': 0}
count_largest = {'x': 0, 'y': 0, 'z': 0}

for obj in ply_objs:

  obj_range = obj.dimension_range()

  # count the podium placings of ranges for every dimension
  sorted_keys = sorted(obj_range, key=obj_range.get)
  smallest_key = sorted_keys[0]
  middle_key = sorted_keys[1]
  largest_key = sorted_keys[2]

  count_smallest[smallest_key] += 1
  count_middle[middle_key] += 1
  count_largest[largest_key] += 1

print("Times with largest dimensional range:")
print(f"X: {count_largest['x']}")
print(f"Y: {count_largest['y']}")
print(f"Z: {count_largest['z']}")

print("\nTimes with middle dimensional range:")
print(f"X: {count_middle['x']}")
print(f"Y: {count_middle['y']}")
print(f"Z: {count_middle['z']}")

print("\nTimes with smallest dimensional range:")
print(f"X: {count_smallest['x']}")
print(f"Y: {count_smallest['y']}")
print(f"Z: {count_smallest['z']}")

Times with largest dimensional range:
X: 0
Y: 0
Z: 5

Times with middle dimensional range:
X: 0
Y: 5
Z: 0

Times with smallest dimensional range:
X: 5
Y: 0
Z: 0


In [None]:
#@title Calculate global stretch values given global dimensional extrema

min_x = min_y = min_z = float('inf')
max_x = max_y = max_z = float('-inf')

for obj in ply_objs:
  for vertex in obj.vertices:
    min_x = min(min_x, vertex.x)
    min_y = min(min_y, vertex.y)
    min_z = min(min_z, vertex.z)
    max_x = max(max_x, vertex.x)
    max_y = max(max_y, vertex.y)
    max_z = max(max_z, vertex.z)

scale_x, scale_y, scale_z = 2/(max_x - min_x), 2/(max_y - min_y), 2/(max_z - min_z)

print(f"Dimensional minima:")
print(f"X: {min_x}")
print(f"Y: {min_y}")
print(f"Z: {min_z}")

print(f"\nDimensional maxima:")
print(f"X: {max_x}")
print(f"Y: {max_y}")
print(f"Z: {max_z}\n")

print('-' * 50)

print(f"\nDerived Stretch Value:")
print(f"X: {scale_x}")
print(f"Y: {scale_y}")
print(f"Z: {scale_z}")

Dimensional minima:
X: -0.12194939584895402
Y: -0.33554149999999994
Z: -1.0

Dimensional maxima:
X: 0.12194939584895394
Y: 0.33554149999999994
Z: 1.0

--------------------------------------------------

Derived Stretch Value:
X: 8.200122624950072
Y: 2.9802572856114673
Z: 1.0


In [None]:
#@title Stretch non-bounded dimensions using constant values to tighten scope {vertical-output: true}

global_stretch_scale_x = scale_x
global_stretch_scale_y = scale_y
global_stretch_scale_z = scale_z

for obj in ply_objs:
  obj_range = obj.dimension_range()

  print(f"Object ({obj.id + 1}/{len(ply_objs)}): {obj.name}\n")
  print("Dimension Range:")
  print('X:', obj_range['x'])
  print('Y:', obj_range['y'])
  print('Z:', obj_range['z'])

  start_time = time.time()
  obj.stretch_to_max()
  elapsed_time = time.time() - start_time

  obj_range = obj.dimension_range()

  print(f"\nStretched non-bounded dimensions using global constants (took {elapsed_time:.2f} s):")
  print(f"\nDimension Range (stretched in {elapsed_time:.2f} s):")
  print('X:', obj_range['x'])
  print('Y:', obj_range['y'])
  print('Z:', obj_range['z'])
  print('-' * 50)

Object: Line in the Sand

Dimension Range:
X: 0.14593999999999996
Y: 0.6710829999999999
Z: 2.0

Stretched non-bounded dimensions using global constants (took 0.01 s):

Dimension Range (stretched in 0.01 s):
X: 1.1967258958852132
Y: 2.0
Z: 2.0
--------------------------------------------------
Object: Rat King

Dimension Range:
X: 0.14158240522736318
Y: 0.5979953838336013
Z: 0.9424218581659535

Stretched non-bounded dimensions using global constants (took 0.01 s):

Dimension Range (stretched in 0.01 s):
X: 1.1609930843997502
Y: 1.7821800994321162
Z: 0.9424218581659535
--------------------------------------------------
Object: Outbreak Perfected

Dimension Range:
X: 0.24389879169790796
Y: 0.5689507899464725
Z: 1.5117135907611965

Stretched non-bounded dimensions using global constants (took 0.01 s):

Dimension Range (stretched in 0.01 s):
X: 2.0
Y: 1.6956197368923742
Z: 1.5117135907611965
--------------------------------------------------
Object: Prometheus Lens

Dimension Range:
X: 0.12

In [None]:
#@title Offset objects to pin Z and Y object maximums to upper boundaries to localize similar features (dataset specific scope reduction) {vertical-output: true}

# pinning all objects against back wall

for obj in ply_objs:
  start_time = time.time()

  max_values = obj.get_max_values()

  max_z = max_values['z']
  z_offset = 1 - max_z

  max_y = max_values['y']
  y_offset = 1 - max_y

  obj.translate(0, y_offset, z_offset)

  max_values = obj.get_max_values()
  new_max_z = max_values['z']
  new_max_y = max_values['y']

  elapsed_time = time.time() - start_time

  print(f"Object ({obj.id + 1}/{len(ply_objs)}): {obj.name}\n")
  print(f"\nMaximum Z: {max_z}")
  print(f"\nMaximum Y: {max_y}")
  print(f'\nOffset object to boundaries (took {elapsed_time:.2f} s)')
  print(f"\nMaximum Z: {new_max_z}")
  print(f"\nMaximum Y: {new_max_y}")
  print('-' * 50)

Object: Line in the Sand

Maximum Z: 1.0

Maximum Y: 1.0

Offset object to boundaries (took 0.03 s)

Maximum Z: 1.0

Maximum Y: 1.0
--------------------------------------------------
Object: Rat King

Maximum Z: 0.47121092908297674

Maximum Y: 0.8910900497160581

Offset object to boundaries (took 0.02 s)

Maximum Z: 1.0

Maximum Y: 1.0
--------------------------------------------------
Object: Outbreak Perfected

Maximum Z: 0.7558567953805982

Maximum Y: 0.8478098684461871

Offset object to boundaries (took 0.02 s)

Maximum Z: 1.0

Maximum Y: 1.0
--------------------------------------------------
Object: Prometheus Lens

Maximum Z: 0.8278999959990196

Maximum Y: 0.8251934392958161

Offset object to boundaries (took 0.02 s)

Maximum Z: 1.0

Maximum Y: 1.0
--------------------------------------------------
Object: Blind Perdition

Maximum Z: 0.8596580597884671

Maximum Y: 0.8977177250156672

Offset object to boundaries (took 0.02 s)

Maximum Z: 1.0

Maximum Y: 1.0
-----------------------

In [None]:
#@title Verify that dataset dimensional extrema are properly constrained

track_min = {'x': float('inf'), 'y': float('inf'), 'z': float('inf')}
track_max = {'x': float('-inf'), 'y': float('-inf'), 'z': float('-inf')}

for obj in ply_objs:
  max_values = obj.get_max_values()
  min_values = obj.get_min_values()

  for dimension in ['x', 'y', 'z']:
    track_min[dimension] = min(track_min[dimension], min_values[dimension])
    track_max[dimension] = max(track_max[dimension], max_values[dimension])

print("Dimensional Minima:")
print('X:', track_min['x'])
print('Y:', track_min['y'])
print('Z:', track_min['z'])

print("\nDimensional Maxima:")
print('X:', track_max['x'])
print('Y:', track_max['y'])
print('Z:', track_max['z'])

Dimensional Minima:
X: -1.0000000000000002
Y: -1.0
Z: -1.0

Dimensional Maxima:
X: 0.9999999999999997
Y: 1.0
Z: 1.0


In [None]:
#@title Sort order of vertices and faces numerically to remove arbitrary sample noise {vertical-output: true}

for obj in ply_objs:
    start_time = time.time()

    obj.sort_tri_data(True)

    elapsed_time = time.time() - start_time

    print(f"Object ({obj.id + 1}/{len(ply_objs)}): {obj.name}\n")

    print(f'Sorted object vertices and faces numerically (took {elapsed_time:.2f} s)')

    print('-' * 50)

Object: Line in the Sand

Sorted object vertices and faces numerically (took 0.89 s)
--------------------------------------------------
Object: Rat King

Sorted object vertices and faces numerically (took 0.37 s)
--------------------------------------------------
Object: Outbreak Perfected

Sorted object vertices and faces numerically (took 1.00 s)
--------------------------------------------------
Object: Prometheus Lens

Sorted object vertices and faces numerically (took 0.39 s)
--------------------------------------------------
Object: Blind Perdition

Sorted object vertices and faces numerically (took 0.54 s)
--------------------------------------------------


In [None]:
#@title Export normalized PLY files {vertical-output: true}

!rm -r {NORMALIZED_PLY} 2>/dev/null
!mkdir {NORMALIZED_PLY}

for obj in ply_objs:
  start_time = time.time()

  # todo: remove before final model use
  obj_copy = copy.copy(obj)
  obj_copy.revert_stretch()

  obj_copy.save_file(NORMALIZED_PLY + obj.name + '.ply')
  elapsed_time = time.time() - start_time


  print(f"Object ({obj.id + 1}/{len(ply_objs)}): {obj.name}\n")
  print(f'Exported normalized PLY file (took {elapsed_time:.2f} s)')
  print('-' * 50)

# note - resets normalized_ply directory in drive if set to True
send_to_google_drive = False
if send_to_google_drive:
  !rm -f {DRIVE + 'normalized_ply/*'} 2>/dev/null
  !cp -r {NORMALIZED_PLY} {DRIVE}

Object: Line in the Sand

Exported normalized PLY file (took 0.24 s)
--------------------------------------------------
Object: Rat King

Exported normalized PLY file (took 0.15 s)
--------------------------------------------------
Object: Outbreak Perfected

Exported normalized PLY file (took 0.14 s)
--------------------------------------------------
Object: Prometheus Lens

Exported normalized PLY file (took 0.16 s)
--------------------------------------------------
Object: Blind Perdition

Exported normalized PLY file (took 0.08 s)
--------------------------------------------------


# Dataset size padding as random variations

In [None]:
#@title Define imports and functions

import time
import os
import numpy as np

use_partial_dataset = True
variations_use_gdrive = False

def extract_num_from_string(filenames, string):
  numbers = set()
  for filename in filenames:
    if string in filename and filename.endswith(".npy"):
      number = ''.join(filter(str.isdigit, filename))
      if number.isdigit():
        numbers.add(int(number))
  return numbers


def highest_variation_index_in_path(folder_path):
  filenames = os.listdir(folder_path)
  vertex_numbers = extract_num_from_string(filenames, 'vertex_input_list_')
  face_numbers = extract_num_from_string(filenames, 'face_input_list_')
  completed_variations = sorted(list(vertex_numbers & face_numbers))

  if not completed_variations:
    return -1

  last_consecutive_index = -1
  for i in range(0, len(completed_variations)):
    if completed_variations[i] == i:
      last_consecutive_index = i
    else:
      break

  return last_consecutive_index

In [None]:
#@title Import pre-normalized PLY files {vertical-output: true}

# pulling from google drive

ply_objs=[]

idx = 0
for f in os.listdir(DRIVE + 'normalized_ply/'):
  if f.endswith('.ply') and ((not use_partial_dataset) or f.startswith('Lu')):
    start_time = time.time()
    obj = PlyObject.from_file(os.path.join(DRIVE + 'normalized_ply/', f), idx)
    ply_objs.append(obj)
    elapsed_time = time.time() - start_time

    idx += 1
    print(f"Object: {obj.name}\n")
    print(f'Processed pre-normalized PLY file into PlyObject (took {elapsed_time:.2f} s)')
    print(f'\nVertice count: {len(obj.vertices)}')
    print(f'Face count: {len(obj.faces)}')
    print('-' * 50)

Object: Luna's Howl

Processed pre-normalized PLY file into PlyObject (took 0.04 s)

Vertice count: 5502
Face count: 10168
--------------------------------------------------
Object: Lumina

Processed pre-normalized PLY file into PlyObject (took 0.47 s)

Vertice count: 9249
Face count: 17312
--------------------------------------------------


In [None]:
#@title Analyze object tri data to determine global target padding size and autoencoder input layer size

max_vertices = 0
max_faces = 0

for obj in ply_objs:
  max_vertices = max(max_vertices, len(obj.vertices))
  max_faces = max(max_faces, len(obj.faces))

print(f'Object count: {len(ply_objs)}')


print(f'\nMaximum vertices for any object: {max_vertices}')
print(f'Maximum faces for any object: {max_faces}')

vertex_target_count = int(max_vertices * 1.2)
face_target_count = int(max_faces * 1.2)

# make max_faces even so we can encode 2 faces onto eachother
#face_target_count = face_target_count + face_target_count % 2

print(f'\n120% of vertex count: {vertex_target_count}')
print(f'120% of face count: {face_target_count}')

vertex_input_size = vertex_target_count
#face_input_size = face_target_count // 2
face_input_size = face_target_count


print(f'\nVertex input layer width: {vertex_input_size}')
print(f'Face input layer width (2 faces per): {face_input_size}')

print(f'\nVertex neurons (3 per): {vertex_input_size * 3}')
print(f'Face neurons (3 per): {face_input_size * 3}')
print(f'Total Input Neurons: {vertex_input_size * 3 + face_input_size * 3}\n')

Object count: 2

Maximum vertices for any object: 9249
Maximum faces for any object: 17312

120% of vertex count: 11098
120% of face count: 20774

Vertex input layer width: 11098
Face input layer width (2 faces per): 20774

Vertex neurons (3 per): 33294
Face neurons (3 per): 62322
Total Input Neurons: 95616



In [None]:
#@title Randomly pad PlyObjects to match global target size and export to numpy array files {vertical-output: true}

# batch processing notes (post-fix):
# all 129 items
# 100 variations
# 12047 seconds
# ~1 second per variation

# if this is false, it will iterate the file name
delete_old_file_variations = False
# todo: final model should be very large
variations_to_generate = 2


if variations_use_gdrive:
  if use_partial_dataset:
    path_to_save = DRIVE + "variation_gen_partial/"
  else:
    path_to_save = DRIVE + "variation_gen/"
else:
  path_to_save = DIR + "variation_gen/"

if delete_old_file_variations:
  !rm -r {path_to_save} 2>/dev/null
  !mkdir {path_to_save} 2>/dev/null
  idx = 0
else:
  !mkdir {path_to_save} 2>/dev/null
  idx = highest_variation_index_in_path(path_to_save) + 1

for obj in ply_objs:
  initial_vertex_count = len(obj.vertices)
  initial_face_count = len(obj.faces)
  print(f"Object ({obj.id + 1}/{len(ply_objs)}): {obj.name}\n")
  print(f'Vertex count: {len(obj.vertices)}')
  print(f'Face count: {len(obj.faces)}')
  start_time = time.time()

  variations = obj.export_random_to_model(variations_to_generate, vertex_target_count, face_target_count)

  variation_vertices = variations['vertices']
  variation_faces = variations['faces']

  first_idx = idx

  for i in range(len(variation_vertices)):
    np.save(f"{path_to_save}vertex_input_list_{idx}.npy", variation_vertices[i])
    np.save(f"{path_to_save}face_input_list_{idx}.npy", variation_faces[i])

    idx += 1

  elapsed_time = time.time() - start_time
  if variations_to_generate == 1:
    print(f'\nFor 1 variation (file index {first_idx})')
  else:
    print(f'\nFor {variations_to_generate} variations (file indexes {first_idx}-{idx})')

  print(f'Added vertices: {vertex_target_count - initial_vertex_count}')
  print(f'Added faces: {face_target_count - initial_face_count}')
  if variations_to_generate == 1:
      print(f'\nGenerated 1 random variation (took {elapsed_time:.2f} s)')
  else:
    print(f'\nGenerated {variations_to_generate} random variation(s) (took {elapsed_time:.2f} s)')
  print('-' * 50)


# Run this step before defining VAE models to avoid system memory cap

Object (1/2): Luna's Howl

Vertex count: 11098
Face count: 19587

For 2 variations (file indexes 0-2)
Added vertices: 0
Added faces: 1187

Generated 2 random variation(s) (took 1.42 s)
--------------------------------------------------
Object (2/2): Lumina

Vertex count: 9249
Face count: 17312

For 2 variations (file indexes 2-4)
Added vertices: 1849
Added faces: 3462

Generated 2 random variation(s) (took 1.92 s)
--------------------------------------------------


In [None]:
#@title Import pre-computed random variations (to allow for runtime restart) {vertical-output: true}

if variations_use_gdrive:
  if use_partial_dataset:
    path_to_save = DRIVE + "variation_gen_partial/"
  else:
    path_to_save = DRIVE + "variation_gen/"
else:
  path_to_save = DIR + "variation_gen/"

dataset_size = highest_variation_index_in_path(path_to_save) + 1

if dataset_size > 0:
  mmapped_file = np.lib.format.open_memmap(f"{path_to_save}vertex_input_list_0.npy")
  vertex_input_size = mmapped_file.shape[0]

  mmapped_file = np.lib.format.open_memmap(f"{path_to_save}face_input_list_0.npy")
  face_input_size = mmapped_file.shape[0]

vertex_input_list = np.zeros((dataset_size, vertex_input_size, 3))
face_input_list = np.zeros((dataset_size, face_input_size, 3))

for idx in range(dataset_size):
  vertex_input_list[idx] = np.load(f"{path_to_save}vertex_input_list_{idx}.npy")
  face_input_list[idx] = np.load(f"{path_to_save}face_input_list_{idx}.npy")

  print(f'Vertex data file: vertex_input_list_{idx}.npy')
  print(f'Face data file: face_input_list_{idx}.npy')
  print(f'\nImported variation {idx + 1}/{dataset_size}')
  print("-" * 50)

Vertex data file: vertex_input_list_0.npy
Face data file: face_input_list_0.npy

Imported variation 1/4
--------------------------------------------------
Vertex data file: vertex_input_list_1.npy
Face data file: face_input_list_1.npy

Imported variation 2/4
--------------------------------------------------
Vertex data file: vertex_input_list_2.npy
Face data file: face_input_list_2.npy

Imported variation 3/4
--------------------------------------------------
Vertex data file: vertex_input_list_3.npy
Face data file: face_input_list_3.npy

Imported variation 4/4
--------------------------------------------------


In [None]:
#@title Analyze random padding results to verify dataset validity

print(f'Vertex list shape: {vertex_input_list.shape}')
print(f'Face list shape: {face_input_list.shape}')
print(f'Number of zeros in vertex_input_list: {np.count_nonzero(vertex_input_list==0)}')
print(f'Number of zeros in face_input_list: {np.count_nonzero(face_input_list==0)}')

print(f'\nMean of vertex_input_list values: {np.mean(vertex_input_list)}, Std Dev: {np.std(vertex_input_list)}')
print(f'Mean absolute value of vertex_input_list: {np.mean(np.abs(vertex_input_list))}')
print(f'Mean of face_input_list values: {np.mean(face_input_list)}, Std Dev: {np.std(face_input_list)}')

print(f'\nNumber of NaNs in vertex_input_list: {np.isnan(vertex_input_list).sum()}')
print(f'Number of NaNs in face_input_list: {np.isnan(face_input_list).sum()}')

Vertex list shape: (4, 11098, 3)
Face list shape: (4, 20774, 3)
Number of zeros in vertex_input_list: 0
Number of zeros in face_input_list: 26

Mean of vertex_input_list values: 0.367421866525923, Std Dev: 0.3982629537978347
Mean absolute value of vertex_input_list: 0.4498698553557993
Mean of face_input_list values: 5541.2629849812265, Std Dev: 3223.9122948922786

Number of NaNs in vertex_input_list: 0
Number of NaNs in face_input_list: 0


# Autoencoder initialization and training

In [None]:
#@title Define imports and sampling function

import time
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.utils import pad_sequences
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.models import load_model
import time
try:
  import wandb
except ImportError:
  !pip install wandb
  import wandb
from wandb.keras import WandbMetricsLogger, WandbModelCheckpoint

model_use_gdrive = False

class Sampling(layers.Layer):
  def call(self, inputs):
    z_mean, z_log_var = inputs
    batch = tf.shape(z_mean)[0]
    dim = tf.shape(z_mean)[1]
    epsilon = tf.random.normal(shape=(batch, dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [None]:
#@title Normalize our final variation data

vertex_input_list = tf.cast(vertex_input_list, tf.float32)
face_input_list = tf.cast(face_input_list, tf.float32)

normalizer_ver = preprocessing.Normalization(axis=-1)
normalizer_face = preprocessing.Normalization(axis=-1)

normalizer_ver.adapt(vertex_input_list)
normalizer_face.adapt(face_input_list)

mean_ver, variance_ver = normalizer_ver.mean, normalizer_ver.variance
mean_face, variance_face = normalizer_face.mean, normalizer_face.variance

vertex_input_list = normalizer_ver(vertex_input_list)
face_input_list = normalizer_face(face_input_list)

print(f'\nMean of vertex_input_list values: {np.mean(vertex_input_list)}, Std Dev: {np.std(vertex_input_list)}')
print(f'Mean of face_input_list values: {np.mean(face_input_list)}, Std Dev: {np.std(face_input_list)}')


Mean of vertex_input_list values: -4.078909796589869e-07, Std Dev: 1.0000001192092896
Mean of face_input_list values: 1.4184929568727966e-05, Std Dev: 1.0000032186508179


In [None]:
#@title Initialize our run in wandb

dataset_string = "partial" if use_partial_dataset else "full"

wandb.init(
    # set the wandb project where this run will be logged
    project="AutoCalibr",

    # track hyperparameters and run metadata
    config={
      "vertex_input_size": vertex_input_size,
      "face_input_size": face_input_size,
      "learning_rate": 0.005,
      "dataset": f"{dataset_string}",
      "global_size_multiplier": 0.2,
      "vertex_size_multiplier": 1.0,
      "face_size_multiplier": 0.2,
      "layer_1_size_red": 0.5,
      "layer_2_size_red": 0.7,
      "combination_layer_1_size_red": 0.7,
      "combination_layer_2_size_red": 0.7,
      "latent_dim": 32,
      "prenormalized_vertex_mean": mean_ver,
      "prenormalized_vertex_variance": variance_ver,
      "prenormalized_face_mean": mean_face,
      "prenormalized_face_variance": variance_face,
    }
)

config = wandb.config

NameError: ignored

In [None]:
#@title Initialize encoder model

ply_objs = None

# should likely be max 0.2. Start around 0.05
global_size_multiplier = config.global_size_multiplier

vertex_size_multiplier = config.vertex_size_multiplier
face_size_multiplier = config.face_size_multiplier

layer_1_size_red = config.layer_1_size_red
layer_2_size_red = config.layer_2_size_red
combination_layer_1_size_red = config.combination_layer_1_size_red
combination_layer_2_size_red = config.combination_layer_2_size_red

latent_dim = config.latent_dim

vertex_inputs = keras.Input(shape=(vertex_input_size, 3), name='vertex_input')
face_inputs = keras.Input(shape=(face_input_size, 3), name='face_input')

vertex_size_weighted = vertex_input_size * vertex_size_multiplier
face_size_weighted = face_input_size * face_size_multiplier

current_layer_multiplier = 3 * global_size_multiplier * layer_1_size_red
vertex_layer = layers.Flatten(name='vertex_flatten')(vertex_inputs)
face_layer = layers.Flatten(name='face_flatten')(face_inputs)

vertex_layer = layers.Dense(vertex_size_weighted * current_layer_multiplier, activation="relu", name='vertex_dense_1')(vertex_layer)
face_layer = layers.Dense(face_size_weighted * current_layer_multiplier, activation="relu", name='face_dense_1')(face_layer)

current_layer_multiplier *= layer_2_size_red
vertex_layer = layers.Dense(vertex_size_weighted * current_layer_multiplier, activation="relu", name='vertex_dense_2')(vertex_layer)
face_layer = layers.Dense(face_size_weighted * current_layer_multiplier, activation="relu", name='face_dense_2')(face_layer)

combined_layer = layers.Concatenate(name='concatenate')([vertex_layer, face_layer])
concatenate_result_size = (vertex_size_weighted + face_size_weighted) * current_layer_multiplier
vertex_layer = None
face_layer = None

current_layer_multiplier = combination_layer_1_size_red
combined_layer = layers.Dense(concatenate_result_size * current_layer_multiplier, activation="relu", name='combined_dense_1')(combined_layer)

current_layer_multiplier *= combination_layer_2_size_red
combined_layer = layers.Dense(concatenate_result_size * current_layer_multiplier, activation="relu", name='combined_dense_2')(combined_layer)

z_mean = layers.Dense(latent_dim, name="z_mean")(combined_layer)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(combined_layer)
z = Sampling()([z_mean, z_log_var])

encoder = keras.Model((vertex_inputs, face_inputs), (z_mean, z_log_var, z), name="encoder")
encoder.summary()

Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 vertex_input (InputLayer)      [(None, 11098, 3)]   0           []                               
                                                                                                  
 face_input (InputLayer)        [(None, 20774, 3)]   0           []                               
                                                                                                  
 vertex_flatten (Flatten)       (None, 33294)        0           ['vertex_input[0][0]']           
                                                                                                  
 face_flatten (Flatten)         (None, 62322)        0           ['face_input[0][0]']             
                                                                                            

In [None]:
#@title Initialize decoder model

latent_inputs = keras.Input(shape=(latent_dim,), name='latent')

decode_dense_multiplier = 3 * global_size_multiplier * layer_1_size_red * layer_2_size_red
concatenate_result_size = (vertex_size_weighted + face_size_weighted) * decode_dense_multiplier
current_layer_multiplier = combination_layer_1_size_red * combination_layer_2_size_red

combined_layer = layers.Dense(concatenate_result_size * current_layer_multiplier, activation="relu", name='from_latent_dense_1')(latent_inputs)

current_layer_multiplier = combination_layer_1_size_red
combined_layer = layers.Dense(concatenate_result_size * current_layer_multiplier, activation="relu", name='from_latent_dense_2')(combined_layer)

combined_layer = layers.Dense(concatenate_result_size, activation="relu", name='from_latent_dense_3')(combined_layer)

vertex_layer = layers.Dense(vertex_size_weighted * decode_dense_multiplier, activation="relu", name='decode_vertex_dense_1')(combined_layer)
face_layer = layers.Dense(face_size_weighted * decode_dense_multiplier, activation="relu", name='decode_face_dense_1')(combined_layer)
combined_layer = None

decode_dense_multiplier /= layer_2_size_red
vertex_layer = layers.Dense(vertex_size_weighted * decode_dense_multiplier, activation="relu", name='decode_vertex_dense_2')(vertex_layer)
face_layer = layers.Dense(face_size_weighted * decode_dense_multiplier, activation="relu", name='decode_face_dense_2')(face_layer)

vertex_layer = layers.Dense(vertex_input_size * 3, activation="relu", name='decode_vertex_dense_3')(vertex_layer)
face_layer = layers.Dense(face_input_size * 3, activation="relu", name='decode_face_dense_3')(face_layer)

vertex_layer = layers.Reshape((vertex_input_size, 3), name='vertex_output')(vertex_layer)
face_layer = layers.Reshape((face_input_size, 3), name='face_output')(face_layer)

decoder = keras.Model(latent_inputs, (vertex_layer, face_layer), name="decoder")
decoder.summary()

Model: "decoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 latent (InputLayer)            [(None, 32)]         0           []                               
                                                                                                  
 from_latent_dense_1 (Dense)    (None, 1569)         51777       ['latent[0][0]']                 
                                                                                                  
 from_latent_dense_2 (Dense)    (None, 2242)         3519940     ['from_latent_dense_1[0][0]']    
                                                                                                  
 from_latent_dense_3 (Dense)    (None, 3203)         7184329     ['from_latent_dense_2[0][0]']    
                                                                                            

In [None]:
#@title Define VAE class

print_shape_debug = False
print_loss_debug = True

class VAE(keras.Model):
  def __init__(self, encoder, decoder, beta=0.1, **kwargs):
    super().__init__(**kwargs)
    self.encoder = encoder
    self.decoder = decoder
    self.beta = beta
    self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
    self.reconstruction_loss_tracker = keras.metrics.Mean(
      name="reconstruction_loss"
    )
    self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

  @property
  def metrics(self):
    return [
      self.total_loss_tracker,
      self.reconstruction_loss_tracker,
      self.kl_loss_tracker,
    ]

  def call(self, inputs):
    # Define the forward pass here using the layers defined in init
    z_mean, z_log_var, z = self.encoder(inputs)
    decoded = self.decoder(z)
    return decoded

  def train_step(self, data):
    with tf.GradientTape() as tape:
      data_vertex, data_face = data[0]

      check_data_vertex = tf.debugging.check_numerics(data_vertex, "data_vertex Nan or Inf")
      check_data_face = tf.debugging.check_numerics(data_face, "data_face Nan or Inf")

      if print_shape_debug:
        print(f"data_vertex : {check_data_vertex}")
        print(f"data_face : {check_data_face}")

      z_mean, z_log_var, z = self.encoder((data_vertex, data_face))
      check_z_mean = tf.debugging.check_numerics(z_mean, "z_mean Nan or Inf")
      check_z_log_var = tf.debugging.check_numerics(z_log_var, "z_log_var Nan or Inf")
      check_z = tf.debugging.check_numerics(z_log_var, "z Nan or Inf")

      if print_shape_debug:
        print(f"z_mean : {check_z_mean}")
        print(f"z_log_var : {check_z_log_var}")
        print(f"z : {check_z}")

      reconstruction = self.decoder(z)
      reconstruction_vertex, reconstruction_face = reconstruction

      check_reconstruction_vertex = tf.debugging.check_numerics(reconstruction_vertex, "reconstruction_vertex Nan or Inf")
      check_reconstruction_face = tf.debugging.check_numerics(reconstruction_face, "reconstruction_face Nan or Inf")

      if print_shape_debug:
        print(f"reconstruction_vertex : {check_reconstruction_vertex}")
        print(f"reconstruction_face : {check_reconstruction_face}")

      mse_loss_vertex = keras.losses.mean_squared_error(data_vertex, reconstruction_vertex)
      tf.debugging.check_numerics(mse_loss_vertex, "mse_loss_vertex Nan or Inf")
      sum_loss_vertex = tf.reduce_sum(mse_loss_vertex, axis=(1))
      tf.debugging.check_numerics(sum_loss_vertex, "sum_loss_vertex Nan or Inf")
      reconstruction_loss_vertex = tf.reduce_mean(sum_loss_vertex)
      tf.debugging.check_numerics(reconstruction_loss_vertex, "reconstruction_loss_vertex Nan or Inf")

      mse_loss_face = keras.losses.mean_squared_error(data_face, reconstruction_face)
      tf.debugging.check_numerics(mse_loss_face, "mse_loss_face Nan or Inf")
      sum_loss_face = tf.reduce_sum(mse_loss_face, axis=(1))
      tf.debugging.check_numerics(sum_loss_face, "sum_loss_face Nan or Inf")
      reconstruction_loss_face = tf.reduce_mean(sum_loss_face)
      tf.debugging.check_numerics(reconstruction_loss_face, "reconstruction_loss_face Nan or Inf")

      reconstruction_loss = reconstruction_loss_vertex + reconstruction_loss_face

      kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
      kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
      total_loss = reconstruction_loss + self.beta * kl_loss
    grads = tape.gradient(total_loss, self.trainable_weights)
    self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
    self.total_loss_tracker.update_state(total_loss)
    self.reconstruction_loss_tracker.update_state(reconstruction_loss)
    self.kl_loss_tracker.update_state(kl_loss)
    return {
      "loss": self.total_loss_tracker.result(),
      "reconstruction_loss": self.reconstruction_loss_tracker.result(),
      "kl_loss": self.kl_loss_tracker.result(),
    }

In [None]:
#@title Train the VAE with our artifically expanded dataset {vertical-output: true}

# model size for 208mil encoder + 218mil decoder = 1.6GB

ply_objs = None

# Initialize VAE and try one prediction to allow weight import/export
vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam(config.learning_rate))
_ = vae.predict([vertex_input_list[:1], face_input_list[:1]])

start_epoch = 0
end_epoch = 100000
model_checkpoint_frequency = 5000 if use_partial_dataset else 2000

if model_use_gdrive:
  if use_partial_dataset:
    backup_checkpoint_path = DRIVE + "models/partialdataset_model_weights.h5"
  else:
    backup_checkpoint_path = DRIVE + "models/model_weights.h5"

  try:
      vae.load_weights(backup_checkpoint_path)
  except FileNotFoundError:
      print('Model weights not found.')

for i in range(start_epoch, end_epoch, model_checkpoint_frequency):
  hist = vae.fit([vertex_input_list, face_input_list], initial_epoch = i,
                 epochs = min(i+model_checkpoint_frequency, end_epoch), batch_size = dataset_size, callbacks=[
                      WandbMetricsLogger(log_freq=5),
                      WandbModelCheckpoint("models")
                    ])

  vae.save_weights(backup_checkpoint_path)

Epoch 1/5000
Epoch 2/5000
Epoch 3/5000
Epoch 4/5000
Epoch 5/5000
Epoch 6/5000
Epoch 7/5000
Epoch 8/5000
Epoch 9/5000
Epoch 10/5000
Epoch 11/5000
Epoch 12/5000
Epoch 13/5000
Epoch 14/5000
Epoch 15/5000
Epoch 16/5000
Epoch 17/5000
Epoch 18/5000
Epoch 19/5000
Epoch 20/5000
Epoch 21/5000
Epoch 22/5000
Epoch 23/5000
Epoch 24/5000
Epoch 25/5000
Epoch 26/5000
Epoch 27/5000
Epoch 28/5000
Epoch 29/5000
Epoch 30/5000
Epoch 31/5000
Epoch 32/5000
Epoch 33/5000
Epoch 34/5000
Epoch 35/5000
Epoch 36/5000
Epoch 37/5000
Epoch 38/5000
Epoch 39/5000
Epoch 40/5000
Epoch 41/5000
Epoch 42/5000
Epoch 43/5000
Epoch 44/5000
Epoch 45/5000
Epoch 46/5000
Epoch 47/5000
Epoch 48/5000
Epoch 49/5000
Epoch 50/5000
Epoch 51/5000
Epoch 52/5000
Epoch 53/5000
Epoch 54/5000
Epoch 55/5000
Epoch 56/5000
Epoch 57/5000
Epoch 58/5000
Epoch 59/5000
Epoch 60/5000
Epoch 61/5000
Epoch 62/5000
Epoch 63/5000
Epoch 64/5000
Epoch 65/5000
Epoch 66/5000
Epoch 67/5000
Epoch 68/5000
Epoch 69/5000
Epoch 70/5000
Epoch 71/5000
Epoch 72/5000
E

KeyboardInterrupt: ignored

In [None]:
# to run if training is halted via interrupt
if model_use_gdrive:
  if use_partial_dataset:
    backup_checkpoint_path = DRIVE + "models/partialdataset_model_weights.h5"
  else:
    backup_checkpoint_path = DRIVE + "models/model_weights.h5"

In [None]:
#@title Export model output as PLY file {vertical-output: true}

#todo: still using input_list as fake model output
vertex_output = vertex_input_list[0]
face_output = face_input_list[0]

# De-normalize the output data
vertex_output = vertex_output * tf.sqrt(variance_ver) + mean_ver
face_output = face_output * tf.sqrt(variance_face) + mean_face

print(f'Mean of vertex_input_list values: {np.mean(vertex_output)}, Std Dev: {np.std(vertex_output)}')
print(f'\nMean of face_input_list values: {np.mean(face_output)}, Std Dev: {np.std(face_output)}')

obj_from_model = PlyObject.from_model("from_tf_arr", vertex_output[0], face_output[0], 0)

obj_from_model.save_file(f"/content/{obj_from_model.name}.ply")

Mean of vertex_input_list values: 0.3851780891418457, Std Dev: 0.34732112288475037

Mean of face_input_list values: 5532.9873046875, Std Dev: 3210.900146484375


# Side Adventure: Characterize performance for random list generation

In [None]:
#@title {vertical-output: true}

# algorithm specification:
# generate an evenly distributed random list that sums to total_sum, minimum value of any element = 1

# this will be used extensively during padding, so it's worth spending time optimizing and performance testing

import random
import numpy as np
import pandas as pd


def verify_random_list(lst, length, total_sum):
  if len(lst) != length:
    raise ValueError("Incorrect length")
  elif sum(lst) != total_sum:
    raise ValueError("Incorrect sum")
  elif min(lst) < 1:
    raise ValueError("Minimum value not 1")


def random_list_simple(length, total_sum):
  list = [1]*length

  # Distribute the total_sum across the list
  for i in range(total_sum - length):
    list[random.randint(0, length - 1)] += 1

  return list


def random_list_dist_fill_float(length, total_sum, fill_float):
  # Pre-allocate the list to the target length
  list = [0]*length

  # Range for random value generation for each element
  upper_bound = int(fill_float * ((total_sum - length) / length))

  if upper_bound < 1:
    return random_list_simple(length, total_sum)

  # Generate initial list and calculate the current sum
  current_sum = 0
  for i in range(length):
    list[i] = random.randint(1, upper_bound)
    current_sum += list[i]

  # If current_sum already exceed total_sum, retry the function
  if current_sum > total_sum:
    return random_list_dist_fill_float(length, total_sum, fill_float)

  # Distribute the remaining sum across the list
  for i in range(current_sum, total_sum):
    list[random.randint(0, length - 1)] += 1

  return list


def random_list_try_dist_then_simple(length, total_sum, fill_float):
  # Pre-allocate the list to the target length
  list = [0]*length

  # Range for random value generation for each element
  upper_bound = int(fill_float * ((total_sum - length) / length))

  if upper_bound < 1:
    return random_list_simple(length, total_sum)

  # Generate initial list and calculate the current sum
  current_sum = 0
  for i in range(length):
    list[i] = random.randint(1, upper_bound)
    current_sum += list[i]

  # If current_sum already exceed total_sum, retry the function
  if current_sum > total_sum:
    return random_list_simple(length, total_sum)

  # Distribute the remaining sum across the list
  for i in range(current_sum, total_sum):
    list[random.randint(0, length - 1)] += 1

  return list


def random_list_np_multinomal(length, total_sum):
  adjusted_sum = total_sum - length
  result = np.random.multinomial(adjusted_sum, np.ones(length)/length) + 1

  return result.tolist()


scenarios = {
  "Small Scale 10x": (100, 1000),
  "Large Scale 5x": (4000, 21000),
  "Large Scale 2x": (10500, 21000),
  "Large Scale 1.3x": (16000, 21000),
}

functions = [
  random_list_simple,
  random_list_np_multinomal
]

float_functions = [
  random_list_dist_fill_float,
  random_list_try_dist_then_simple,
]

num_iterations = 100

float_values = [1.6, 1.8, 1.95, 2.0]

for scenario_name, scenario_values in scenarios.items():
  print(f"Scenario: {scenario_name} (repeated {num_iterations} iterations)")
  for func in functions:
    start_time = time.time()

    results = []
    errors = set()
    for _ in range(num_iterations):
      try:
        res = func(*scenario_values)
        results.append(res)
        verify_random_list(res, *scenario_values)
      except ValueError as e:
        errors.add(str(e))

    if not errors:
      results_df = pd.DataFrame(results)
      std_dev = results_df.std()
      avg_std_dev = np.mean(std_dev)
      max_value = results_df.max().max()
      min_value = results_df.min().min()
      times_max_occurred = results_df.eq(max_value).sum().sum()
      times_min_occurred = results_df.eq(min_value).sum().sum()

    elapsed_time = time.time() - start_time
    error_message = 'Failed' if errors else 'Succeeded'

    print(f"\n{func.__name__} ran in {elapsed_time:.2f} s. {error_message}.")
    if errors:
      print(f"Errors: {', '.join(errors)}")
    else:
      print(f"Average standard deviation across iterations: {avg_std_dev:.2f}")
      print(f"Max value: {max_value} (occurred {times_max_occurred} times)")
      print(f"Min value: {min_value} (occurred {times_min_occurred} times)")

  # try float functions with different float values
  for float_func in float_functions:
    for float_value in float_values:
      print()
      start_time = time.time()

      results = []
      errors = set()
      for _ in range(num_iterations):
        try:
          res = float_func(*scenario_values, float_value)
          results.append(res)
          verify_random_list(res, *scenario_values)
        except ValueError as e:
          errors.add(str(e))

      if not errors:
        results_df = pd.DataFrame(results)
        std_dev = results_df.std()
        avg_std_dev = np.mean(std_dev)
        max_value = results_df.max().max()
        min_value = results_df.min().min()
        times_max_occurred = results_df.eq(max_value).sum().sum()
        times_min_occurred = results_df.eq(min_value).sum().sum()

      elapsed_time = time.time() - start_time
      error_message = 'Failed' if errors else 'Succeeded'

      print(f"{float_func.__name__} (float {float_value}) ran in {elapsed_time:.2f} s. {error_message}.")
      if errors:
        print(f"{', '.join(errors)}")
      else:
        print(f"Average standard deviation across iterations: {avg_std_dev:.2f}")
        print(f"Max value: {max_value} (occurred {times_max_occurred} times)")
        print(f"Min value: {min_value} (occurred {times_min_occurred} times)")

  print('-' * 50)