In [1]:
# Imports
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import random
from datetime import datetime
import math
import json
import training
import color

In [2]:
# Constants

# Network Structure
CONTEXT_SIZE = 5      # How many other voxels are considered for a training example
EMBEDDING_SIZE = 520    # Dimensionality of the voxel embedding vector
STACKED_LAYERS = 1      # How many times the network structure repeats itself
ATTENTION_HEADS = 10    # Number of heads in each multi-headed attention mechanism

# Training Hyperparameters
CHECK_RADIUS = 7        # How far away voxels can be to be part of a training example
CENTER_FOCUS = 0.3      # How much to focus on picking voxels close to the center of the cube. Must be between 0 and 1.
LEARNING_RATE = 1e-3
TRAINING_EXAMPLES = 4

In [3]:
# Load voxel palette
# The output is a 255-dimensional vector of probabilities for different colors
# Which 255 colors can be generated is decided by the palette file

# Index 0 is reserved as 'undecided' voxel
# Index 1 is reserved as 'air' voxel
# Index 2-255 are colors. So there are 254 possible colors.
with open('data/palette.json', 'r') as json_file:
    raw_palette = json.load(json_file)['colors']
    palette = color.expand_palette(raw_palette)
    palette_size = len(palette)

print(f"Palette has {palette_size} colors")

Palette has 256 colors


In [4]:
# Create model
def main_model():
    input = keras.Input(shape=(CONTEXT_SIZE, EMBEDDING_SIZE,), name='input')
    input_next_pos = keras.Input(shape=(CONTEXT_SIZE, EMBEDDING_SIZE,), name='input_next_pos')

    # Normalization
    x = keras.layers.LayerNormalization(name=f'normalization_start_a')(input)

    # Stacked layers
    for i in range(STACKED_LAYERS):
        # Multi-headed attention
        fx = keras.layers.MultiHeadAttention(
            num_heads=ATTENTION_HEADS,
            key_dim=EMBEDDING_SIZE,
            name=f'multi_head_attention_{i}',
        )(x, x, use_causal_mask=True)

        # Residual connection
        x = keras.layers.Add(name=f'residual_connection_{i}a')([x,fx])

        # Normalization
        x = keras.layers.LayerNormalization(name=f'normalization_{i}a')(x)

        # Feedforward
        fx = keras.layers.Dense(EMBEDDING_SIZE, name=f'feedforward_{i}')(x)
        fx = keras.layers.LeakyReLU(name=f'relu_{i}')(fx)

        # Residual connection
        x = keras.layers.Add(name=f'residual_connection_{i}b')([x,fx])

        # Normalization
        x = keras.layers.LayerNormalization(name=f'normalization_{i}b')(x)
    
    # Concatenate with next_pos input
    x = keras.layers.Concatenate(axis=2, name='concatenate_next_pos')([x,input_next_pos])

    # Final feedforward layer
    # Output size should be palette_size-1, since we don't want it to be able to choose "undecided"
    x = keras.layers.Dense(palette_size-1, name='feedforward_final')(x)

    # Softmax
    x = keras.layers.Softmax(name='softmax')(x)
    
    # Build and return model
    return keras.Model(inputs=[input, input_next_pos], outputs=x)

model = main_model()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input (InputLayer)             [(None, 5, 520)]     0           []                               
                                                                                                  
 normalization_start_a (LayerNo  (None, 5, 520)      1040        ['input[0][0]']                  
 rmalization)                                                                                     
                                                                                                  
 multi_head_attention_0 (MultiH  (None, 5, 520)      10832120    ['normalization_start_a[0][0]',  
 eadAttention)                                                    'normalization_start_a[0][0]']  
                                                                                              

In [5]:
# Set up loss and optimizer
loss_function = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)

In [14]:
# Design training examples
training_examples = training.generate_training_examples(TRAINING_EXAMPLES, CONTEXT_SIZE)

print(f"{len(training_examples)} training examples created.")

print(random.choice(training_examples))

4 training examples created.
[((0, 2, 5), 231), ((1, 2, 5), 1), ((2, 2, 5), 1), ((4, 4, 4), 231), ((4, 5, 4), 1), ((4, 4, 5), 231)]


In [15]:
def encode_training_input(example):
    inputEntry = []

    # Encode context vector
    for index, voxel in enumerate(example):
        if len(inputEntry) < CONTEXT_SIZE+1:
            inputEntry.append(training.embed(index, voxel[0], voxel[1], palette, EMBEDDING_SIZE))

    # Pad remainder of context with zeros
    if len(inputEntry) < CONTEXT_SIZE+1:
        zero_elem = [0,] * EMBEDDING_SIZE
        inputEntry += [zero_elem,] * ((CONTEXT_SIZE+1) - len(inputEntry))

    return inputEntry

def encode_training_input_next_pos(example, next_pos=None):
    if next_pos == None:
        inputEntry = []

        # Encode context vector
        for index, voxel in enumerate(example):
            if len(inputEntry) < CONTEXT_SIZE+1:
                inputEntry.append(training.embed(index, voxel[0], -1, palette, EMBEDDING_SIZE))

        # Pad remainder of context with zeros
        if len(inputEntry) < CONTEXT_SIZE+1:
            zero_elem = [0,] * EMBEDDING_SIZE
            inputEntry += [zero_elem,] * ((CONTEXT_SIZE+1) - len(inputEntry))
        
        return inputEntry
    else:
        # Create zero vector
        zero_elem = [0,] * EMBEDDING_SIZE
        inputEntry = [zero_elem,] * (CONTEXT_SIZE+1)

        inputEntry[len(example)] = training.embed(len(example), next_pos, -1, palette, EMBEDDING_SIZE)

        return inputEntry

def encode_training_output(example):
    outputEntry = []
    for voxel in example:
        outputEntry.append(training.encode_one_hot(voxel[1], palette_size))
    return outputEntry

# Reformat training examples into tensor format
def encode_training_examples(training_examples):
    training_inputs = []
    training_inputs_next_pos = []
    training_outputs = []
    for example in training_examples:
        input_entry = encode_training_input(example)
        input_entry_next_pos = encode_training_input_next_pos(example)
        output_entry = encode_training_output(example)
        
        # Shift the output
        input_entry.pop(-1)
        input_entry_next_pos.pop(0)
        output_entry.pop(0)

        # Push to training example list
        training_inputs.append(input_entry)
        training_inputs_next_pos.append(input_entry_next_pos)
        training_outputs.append(output_entry)

    training_input_tensor = tf.Variable(training_inputs, tf.float64)
    training_input_next_pos_tensor = tf.Variable(training_inputs_next_pos, tf.float64)
    training_output_tensor = tf.Variable(training_outputs, tf.float64)

    return training_input_tensor, training_input_next_pos_tensor, training_output_tensor

training_input_tensor, training_input_next_pos_tensor, training_output_tensor = encode_training_examples(training_examples)

# print(training_input_tensor[0, :, 10:])
# print(training_input_next_pos_tensor[0, :, 10:])
# print(training_output_tensor[0, :, 10:])

tf.Tensor(
[[ 0.0000000e+00  1.0000000e+00  0.0000000e+00 ...  1.0000000e+00
   6.2099438e-08  1.0000000e+00]
 [ 8.4147096e-01  5.4030228e-01  7.2094172e-01 ...  1.0000000e+00
   6.2099438e-08  1.0000000e+00]
 [ 9.0929741e-01 -4.1614684e-01  9.9921900e-01 ...  1.0000000e+00
   6.2099438e-08  1.0000000e+00]
 [-7.5680250e-01 -6.5364361e-01 -7.8966245e-02 ...  1.0000000e+00
   4.9679549e-08  1.0000000e+00]
 [-7.5680250e-01 -6.5364361e-01 -7.8966245e-02 ...  1.0000000e+00
   4.9679549e-08  1.0000000e+00]], shape=(5, 510), dtype=float32)
tf.Tensor(
[[ 8.4147096e-01  5.4030228e-01  7.2094172e-01 ...  1.0000000e+00
   6.2099438e-08  1.0000000e+00]
 [ 9.0929741e-01 -4.1614684e-01  9.9921900e-01 ...  1.0000000e+00
   6.2099438e-08  1.0000000e+00]
 [-7.5680250e-01 -6.5364361e-01 -7.8966245e-02 ...  1.0000000e+00
   4.9679549e-08  1.0000000e+00]
 [-7.5680250e-01 -6.5364361e-01 -7.8966245e-02 ...  1.0000000e+00
   4.9679549e-08  1.0000000e+00]
 [-7.5680250e-01 -6.5364361e-01 -7.8966245e-02 ...  1.

In [None]:
# Batch training data
dataset_input = tf.data.Dataset.from_tensor_slices(training_input_tensor)
dataset_input_next_pos = tf.data.Dataset.from_tensor_slices(training_input_next_pos_tensor)
dataset_output = tf.data.Dataset.from_tensor_slices(training_output_tensor)

training_input_batched = dataset_input.batch(128)
training_input_next_pos_batched = dataset_input_next_pos.batch(128)
training_output_batched = dataset_output.batch(128)

In [None]:
# Construct an example sculpture using the model's current progress
# TODO: Move this to a separate file so we can do multithreading and other improvements
def build_sculpture(count, base=None, temperature=1.0):
    # If a base wasn't specified, create one
    color = int(random.random() * 254) + 1
    voxels = {}
    # start_pos = (0, 0, 0)
    start_pos = (int(random.random() * training.SIZE[0]), int(random.random() * training.SIZE[1]), int(random.random() * training.SIZE[2]))
    voxels[training.ttos(start_pos)] = color
    
    # Build context vector for the sculpture
    context = [(start_pos, color)]

    for i in range(count-1):
        # Determine where the next voxel will go
        # TODO: Encode this data into the model somehow
        next_pos = training.pick_next_voxel(voxels, context)

        # Get the output from the model
        input_data = encode_training_input(context)
        input_data.pop(-1)
        input_tensor = tf.Variable(input_data, tf.float64)
        input_tensor = tf.reshape(input_tensor, [1, -1, EMBEDDING_SIZE])

        input_next_pos_data = encode_training_input_next_pos(context, next_pos)
        input_next_pos_data.pop(0)
        input_next_pos_tensor = tf.Variable(input_next_pos_data, tf.float64)
        input_next_pos_tensor = tf.reshape(input_tensor, [1, -1, EMBEDDING_SIZE])

        output = model([input_tensor, input_next_pos_tensor], training=False)
        output_probabilities = output[0][len(context)-1]

        # Pick which voxel to generate based on output probabilities
        # TODO: Implement temperature
        choice = random.random()
        chosen_voxel = 1
        for i in range(len(output_probabilities)):
            choice -= output_probabilities[i]
            if choice < 0:
                chosen_voxel = i+1
                break
        # chosen_voxel = 1
        # best = 0
        # for i in range(len(output_probabilities)):
        #     if output_probabilities[i] > best:
        #         best = output_probabilities[i]
        #         chosen_voxel = i+1
        
        # Build the voxel
        voxels[training.ttos(next_pos)] = chosen_voxel
        context.append((next_pos, chosen_voxel))
        if len(context) > CONTEXT_SIZE:
            context = training.remove_farthest(context, next_pos)
        
    return voxels

In [None]:
# Function for saving sculptures to json
def save_sculpture(sculpture, filename):
    json_data = {
        "size": {
            "x": training.SIZE[0],
            "y": training.SIZE[1],
            "z": training.SIZE[2],
        },
        "voxels": sculpture,
    }
    with open(filename, 'w') as output_file:
        json.dump(json_data, output_file, indent=2)

In [None]:
# Training step
@tf.function
def train_step(input_data, input_next_pos_data, output_data):
    # Set up tape
    with tf.GradientTape() as tape:
      output = model([input_data, input_next_pos_data], training=True)

      loss = loss_function(output_data, output)

      gradients = tape.gradient(loss, model.trainable_variables)

      optimizer.apply_gradients(zip(gradients, model.trainable_variables))

# Training loop
def train(epochs):
  time_started = datetime.now()
  example_time = 3
  sculptures_made = 0

  # Epochs
  for epoch in range(epochs):
    # Minibatches
    for (batch_input, batch_input_next_pos, batch_output) in zip(training_input_batched, training_input_next_pos_batched, training_output_batched):
      train_step(batch_input, batch_input_next_pos, batch_output)

    # Print status
    # print(f"Completed Epoch {epoch}")

    # Check if we should output an example sculpture
    if (datetime.now()-time_started).total_seconds() >= example_time:
      print(f"Epoch {epoch}")
      print("Building example sculpture...")
      sculpture = build_sculpture(training.SIZE[0] * training.SIZE[1] * training.SIZE[2])
      sculptures_made += 1
      sculpture_filename = f"examples/json/example_{(time_started-datetime.utcfromtimestamp(0)).total_seconds()}_{sculptures_made}.json"
      save_sculpture(sculpture, sculpture_filename)
      print("Done")
      example_time = (datetime.now()-time_started).total_seconds() * 1.3

train(10000000)

In [None]:
model.save('sorpok.h5')