In [1]:
# Imports
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import matplotlib.pyplot as plt
import random
import time
import math
import json
import training

In [2]:
# Constants

# Network Structure
CONTEXT_SIZE = 8        # How many other voxels are considered for a training example
EMBEDDING_SIZE = 64     # Dimensionality of the voxel embedding vector
STACKED_LAYERS = 2      # How many times the network structure repeats itself
ATTENTION_HEADS = 2     # Number of heads in each multi-headed attention mechanism

# Training Hyperparameters
CHECK_RADIUS = 7        # How far away voxels can be to be part of a training example
CENTER_FOCUS = 0.3      # How much to focus on picking voxels close to the center of the cube. Must be between 0 and 1.
LEARNING_RATE = 1e-4
TRAINING_EXAMPLES = 100

In [3]:
# Load voxel palette
# The output is a 255-dimensional vector of probabilities for different colors
# Which 255 colors can be generated is decided by the palette file

# Index 0 is reserved as 'undecided' voxel
# Index 1 is reserved as 'air' voxel
# Index 2-255 are colors. So there are 254 possible colors.
with open('data/palette.json', 'r') as json_file:
    palette = json.load(json_file)['colors']
    palette_size = len(palette)

print(f"Palette has {palette_size} colors")


Palette has 256 colors


In [4]:
# Create model
def main_model():
    input = keras.Input(shape=(CONTEXT_SIZE, EMBEDDING_SIZE,), name='input_layer')

    x = input

    # Stacked layers
    for i in range(STACKED_LAYERS):
        # Multi-headed attention
        fx = keras.layers.MultiHeadAttention(
            num_heads=ATTENTION_HEADS,
            key_dim=EMBEDDING_SIZE,
            name=f'multi_head_attention_{i}',
        )(x, x)

        # Normalization
        fx = keras.layers.LayerNormalization(name=f'normalization_{i}a')(x)

        # Residual connection
        x = keras.layers.Add(name=f'residual_connection_{i}a')([x,fx])

        # Feedforward
        fx = keras.layers.Dense(EMBEDDING_SIZE, name=f'feedforward_{i}')(x)
        fx = keras.layers.LeakyReLU(name=f'relu_{i}')(fx)

        # Normalization
        fx = keras.layers.LayerNormalization(name=f'normalization_{i}b')(fx)

        # Residual connection
        x = keras.layers.Add(name=f'residual_connection_{i}b')([x,fx])
    
    # Final feedforward layer
    # Output size should be palette_size-1, since we don't want it to be able to choose "undecided"
    x = keras.layers.Dense(palette_size-1, name='feedforward_final')(x)

    # Softmax
    x = keras.layers.Softmax(name='softmax')(x)
    
    # Build and return model
    return keras.Model(inputs=input, outputs=x)

model = main_model()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, 8, 64)]      0           []                               
                                                                                                  
 normalization_0a (LayerNormali  (None, 8, 64)       128         ['input_layer[0][0]']            
 zation)                                                                                          
                                                                                                  
 residual_connection_0a (Add)   (None, 8, 64)        0           ['input_layer[0][0]',            
                                                                  'normalization_0a[0][0]']       
                                                                                              

In [5]:
# Set up loss and optimizer
loss_function = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)

In [19]:
def encode(pos, dim, size):
    w = pos/(10000**((2*dim)/size))
    if dim % 2 == 0:
        w = math.sin(w)
    else:
        w = math.cos(w)
    return w

# Define embedding function
def embed(position, index):
    # Get rgb value
    if index <= 1:
        rgb = (-10, -10, -10)
    else:
        rgb = palette[index]
        rgb[0] /= 255
        rgb[1] /= 255
        rgb[2] /= 255

    # Spread rgb across the embedding size (r, g, b, r, g, b, r, g, b, etc.)
    scale = int((EMBEDDING_SIZE+2)/3)
    embedding = list(rgb) * scale

    # Cut off the remainder
    while len(embedding) > EMBEDDING_SIZE:
        embedding.pop()

    # Add positional encoding
    dimension_length = int(EMBEDDING_SIZE / 3)
    for i in range(dimension_length):
        # X
        embedding[i] += encode(position[0], i, dimension_length)
        # Y
        embedding[i + dimension_length] += encode(position[1], i, dimension_length)
        # Z
        embedding[i + dimension_length*2] += encode(position[2], i, dimension_length)
    
    # Return
    return embedding

# Encode a palette index into a one-hot-encoded output vector
def encode_one_hot(index):
    # Change UNDECIDED into AIR
    if index < 1:
        index = 1
    
    # Create the zeros vector
    ret = np.zeros((palette_size-1,))

    # Add the one at the right index
    # We remove the zero index because the model should never output UNDECIDED
    ret[index-1] = 1.0
    return ret

In [20]:
# Design training examples
training_examples = training.generate_training_examples(TRAINING_EXAMPLES, CONTEXT_SIZE)

print(f"{len(training_examples)} training examples created.")
print(training_examples[0])

# Reformat training examples into tensor format
training_inputs = []
training_outputs = []
for example in training_examples:
    inputEntry = []
    outputEntry = []
    for voxel in example:
        inputEntry.append(embed(voxel[0], voxel[1]))
        outputEntry.append(encode_one_hot(voxel[1]))
    
    # Shift the output
    inputEntry.pop(0)
    outputEntry.pop(-1)

    # Push to training example list
    training_inputs.append(inputEntry)
    training_outputs.append(outputEntry)

training_input_tensor = tf.Variable(training_inputs, tf.float64)
training_output_tensor = tf.Variable(training_outputs, tf.float64)

print(training_inputs[0])
print(training_outputs[0])

100 training examples created.
[((7, 5, 7), 247), ((9, 7, 8), 247), ((8, 2, 8), 247), ((6, -1, 9), 247), ((6, 1, 9), 247), ((7, 0, 8), 247), ((2, 2, 2), 247), ((2, 1, 2), 247), ((2, 2, 1), 247)]
[[0.4121184852417566, -0.8241971521534709, 0.9999072470297011, 0.7974632995076811, 0.2661742906796119, 0.9937269805470542, 0.04659839157698506, 0.9998120215418507, 0.00806526707599458, 0.9999943725485259, 0.0013954637489467698, 0.9999998315377371, 0.00024144261922937773, 0.9999999949569518, 4.1774299490365034e-05, 0.9999999998490324, 7.227771499189438e-06, 0.9999999999954806, 1.2505459449354991e-06, 0.9999999999998647, 2.1636892651589597e-07, 0.6569865987187891, -0.9736893839936586, 0.9360167858093346, 0.8757640820029914, 0.2080201664904841, 0.9962036408312902, 0.036248379994106544, 0.9998862832288925, 0.006273012371676211, 0.9999965957379703, 0.0010853608327887425, 0.9999998980907288, 0.0001877887045658654, 0.9999999969492671, 3.249112182957281e-05, 0.9999999999086739, 5.6216000549444545e-06, 

In [None]:
# Training step
@tf.function
def train_step(input_data, output_data):
    # Set up tape
    with tf.GradientTape() as tape:
      output = model(input_data, training=True)

      loss = loss_function(output_data, output)

      gradients = tape.gradient(loss, model.trainable_variables)

      optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    
    # Evaluate
    #input_data_test, output_data_test = generate_data(100)
    #output = model(input_data_test, training=False)

# Training loop
def train(epochs):
  # For each epoch...
  for epoch in range(epochs):
    
    
    # For each minibatch...
    for _ in range(1):
      train_step(training_examples)

      print()

  
train(1000000)