# Fifth attempt at a world model
## Basic Process

In [1]:
# Settings and setup
import tensorflow as tf

for gpu in tf.config.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)

# Main Settings
ROLE = "TAGGER"
PRED_FRAMES_IN_ADVANCE = 10

# Encoder Settings
IMG_INPUT_SHAPE=(640, 640, 3)

NUM_DBS = 8
NUM_LAYERS_PER_DB = 1
NUM_FILTERS_PER_LAYER_OF_DB = 2

DOWNSCALING_LAYERS = 4
DOWNSCALING_FACTOR = 2
OUTPUT_SIZE = 64
ENC_DROPOUT = 0.25

# PREDICTOR
PRED_POSSIBLE_OUTCOMES = 8
PRED_ACTION_LAYERS = 2
PRED_ENCODED_LAYERS = 2
PRED_INTER_LAYERS = 2
PRED_INTER_LAYER_SIZE = 2 * PRED_POSSIBLE_OUTCOMES * OUTPUT_SIZE # This should scale to the number of predictions it makes and the size of each prediction
PRED_DROPOUT = 0.25

# Cost Calculator Settings
CC_INTER_LAYERS = 6
CC_INTER_LAYER_SIZE = OUTPUT_SIZE * 4
CC_DROPOUT = 0.25
CC_TOPLINE_DROPOUT = 0.5 # This dropout should address the difficulties and uncertenties of the predictors job by making this encoder provide somewhat generalizable encodings

# Action Generator Settings
ACTIONS = 1
NUM_POSSIBLE_ACTIONS = 100
ACTION_SPACE_SMOOTHING = 5
ACT_INTER_LAYERS = 2 + PRED_INTER_LAYERS + CC_INTER_LAYERS # So that it is large enough to understand the other two models in play
ACT_INTER_LAYER_SIZE = 1024
ACT_DROPOUT = 0.1

2023-11-09 16:38:29.108038: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-09 16:38:29.176956: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Encoder

In [2]:
# Image segmenter base
from tensorflow.keras import layers, models

def create_dense_block(x, num_of_layers, num_filters, kernel_size=5):
    output_layers = [x]
    for l in range(num_of_layers):
        x = layers.Conv2D(num_filters, kernel_size, activation="elu", padding="same")(x)
        output_layers.append(x)
        x = layers.Concatenate()(output_layers)
    return x

def create_transistion_down(x, layer_size_change, num_filters=None):
    if num_filters:
        x = layers.Conv2D(num_filters, 1, activation="elu", padding="same")(x)
    
    return layers.MaxPool2D(layer_size_change)(x)

# Keep track for skip connections and other things
skip_connections = []
previous_filters = [NUM_LAYERS_PER_DB * NUM_FILTERS_PER_LAYER_OF_DB]

# Num of filters: 4 (layers) * 32 (idk) + 64 (previous filters)
# Should prob figure out why this is

model_in = layers.Input(shape=IMG_INPUT_SHAPE)
x = layers.Conv2D(previous_filters[-1], 9, activation="elu", padding="same")(model_in)

for i in range(NUM_DBS):
    previous_filters.append(NUM_LAYERS_PER_DB * NUM_FILTERS_PER_LAYER_OF_DB + previous_filters[-1])
    x = create_dense_block(x, NUM_LAYERS_PER_DB, previous_filters[-1])
    skip_connections.append(x)
    x = create_transistion_down(x, 2, num_filters=(previous_filters[-1]))


previous_filters.append(NUM_LAYERS_PER_DB * NUM_FILTERS_PER_LAYER_OF_DB + previous_filters[-1])
x = create_dense_block(x, NUM_LAYERS_PER_DB, previous_filters[-1]) 

dense_net = models.Model(model_in, x)

dense_net.summary()

2023-11-09 16:38:30.884821: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22161 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:18:00.0, compute capability: 8.9
2023-11-09 16:38:30.885363: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22451 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:af:00.0, compute capability: 8.6


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 640, 640, 3)]        0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 640, 640, 2)          488       ['input_1[0][0]']             
                                                                                                  
 conv2d_1 (Conv2D)           (None, 640, 640, 4)          204       ['conv2d[0][0]']              
                                                                                                  
 concatenate (Concatenate)   (None, 640, 640, 6)          0         ['conv2d[0][0]',              
                                                                     'conv2d_1[0][0]']        

In [3]:
# Encoder (Simple downscaler)

# Added encoder to flatten dense_net
encoder = models.Sequential()
encoder.add(layers.Input(shape=IMG_INPUT_SHAPE))
encoder.add(layers.RandomFlip())
encoder.add(dense_net)
encoder.add(layers.Flatten())

for i in range(DOWNSCALING_LAYERS + 1):
    encoder.add(layers.Dense(DOWNSCALING_FACTOR**(DOWNSCALING_LAYERS-i) * OUTPUT_SIZE, activation="elu",)) # kernel_regularizer='l2'))
    encoder.add(layers.Dropout(ENC_DROPOUT))

# encoder.add(layers.BatchNormalization(axis=-1))
encoder.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 random_flip (RandomFlip)    (None, 640, 640, 3)       0         
                                                                 
 model (Functional)          (None, 2, 2, 38)          35780     
                                                                 
 flatten (Flatten)           (None, 152)               0         
                                                                 
 dense (Dense)               (None, 1024)              156672    
                                                                 
 dropout (Dropout)           (None, 1024)              0         
                                                                 
 dense_1 (Dense)             (None, 512)               524800    
                                                                 
 dropout_1 (Dropout)         (None, 512)               0

# Cost Calculator

In [4]:
cost_calculator = models.Sequential()
cost_calculator.add(layers.Input(shape=OUTPUT_SIZE))
cost_calculator.add(layers.Dropout(CC_TOPLINE_DROPOUT))

# Add internal layers
for _ in range(CC_INTER_LAYERS):
    cost_calculator.add(layers.Dense(CC_INTER_LAYER_SIZE, activation="elu",)) # kernel_regularizer='l2'))
    cost_calculator.add(layers.Dropout(CC_DROPOUT))

cost_calculator.add(layers.Dense(CC_INTER_LAYER_SIZE//2)) #, activation="elu"))
cost_calculator.add(layers.Dense(CC_INTER_LAYER_SIZE//4)) #, activation="elu"))
cost_calculator.add(layers.Dense(1, activation="sigmoid")) #, activation="elu"))

cost_calculator.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dropout_5 (Dropout)         (None, 64)                0         
                                                                 
 dense_5 (Dense)             (None, 256)               16640     
                                                                 
 dropout_6 (Dropout)         (None, 256)               0         
                                                                 
 dense_6 (Dense)             (None, 256)               65792     
                                                                 
 dropout_7 (Dropout)         (None, 256)               0         
                                                                 
 dense_7 (Dense)             (None, 256)               65792     
                                                                 
 dropout_8 (Dropout)         (None, 256)              

# Predictor

In [5]:
# Build new predictor model

# Handle actionspace
keys = layers.Input(shape=ACTIONS)
pred_keys = keys

for _ in range(PRED_ACTION_LAYERS):
    pred_keys = layers.Dense(PRED_INTER_LAYER_SIZE//2, activation="elu",)(pred_keys)
    pred_keys = layers.Dropout(PRED_DROPOUT)(pred_keys)

# Handle encoded frame
encoded_frame = layers.Input(shape=OUTPUT_SIZE)
pred_frame = encoded_frame

for _ in range(PRED_ENCODED_LAYERS):
    pred_frame = layers.Dense(PRED_INTER_LAYER_SIZE//2, activation="elu",)(pred_frame)
    pred_frame = layers.Dropout(PRED_DROPOUT)(pred_frame)

# Combine them
predictor = tf.concat([pred_keys, pred_frame], axis=-1)
for _ in range(PRED_INTER_LAYERS):
    predictor = layers.Dense(PRED_INTER_LAYER_SIZE, activation="elu",)(predictor)
    predictor = layers.Dropout(PRED_DROPOUT)(predictor)

predictor = layers.Dense(PRED_POSSIBLE_OUTCOMES * OUTPUT_SIZE, activation="elu")(predictor)
predictor = layers.Dense(PRED_POSSIBLE_OUTCOMES * (OUTPUT_SIZE//2), activation="elu")(predictor)
predictor = layers.Dense(PRED_POSSIBLE_OUTCOMES * (OUTPUT_SIZE//4), activation="elu")(predictor)
predictor = layers.Dense(PRED_POSSIBLE_OUTCOMES, activation="sigmoid")(predictor)

predictor = tf.reshape(predictor, (tf.shape(keys)[0], PRED_POSSIBLE_OUTCOMES, 1))

predictor = models.Model([keys, encoded_frame], predictor)
predictor.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_4 (InputLayer)        [(None, 1)]                  0         []                            
                                                                                                  
 input_5 (InputLayer)        [(None, 64)]                 0         []                            
                                                                                                  
 dense_14 (Dense)            (None, 512)                  1024      ['input_4[0][0]']             
                                                                                                  
 dense_16 (Dense)            (None, 512)                  33280     ['input_5[0][0]']             
                                                                                            

# Training

In [6]:
# Settings
ENCODER_AND_COST_AND_PRED_LR = 1 * 10**-3
COST_LR = 1 * 10**-4
PRED_LR = 1 * 10**-4

MAX_EPOCHS = 200
BATCH_SIZE = 16

In [None]:
# Create dataset
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler
import numpy as np

with tf.device("cpu:0"):
    runs = pd.read_csv("game_runs.csv")[["Run", "Winner"]].sample(frac=1).values.tolist()
    all_frames = []
    all_next_frames = []
    all_input = []
    all_scores = []
    for run, winner in runs[:75]:
        frames = tf.cast(tf.io.parse_tensor(tf.io.read_file("runs/%d/frames.proto_tensor" % run), tf.uint8), tf.float16)/255.0
        all_frames.append(frames)
    
        if ROLE == "TAGGER":
            all_input.append(tf.io.parse_tensor(tf.io.read_file("runs/%d/tagger_inputs.proto_tensor" % run), float))
        else:
            all_input.append(tf.io.parse_tensor(tf.io.read_file("runs/%d/taggee_inputs.proto_tensor" % run), float))
    
        tagger_poses = tf.io.parse_tensor(tf.io.read_file("runs/%d/tagger_poses.proto_tensor" % run), float)
        taggee_poses = tf.io.parse_tensor(tf.io.read_file("runs/%d/taggee_poses.proto_tensor" % run), float)
    
        scores_normalizer = tf.math.sqrt(tf.cast(tf.math.square(IMG_INPUT_SHAPE[0]) + tf.math.square(IMG_INPUT_SHAPE[1]), float))
        # scores_normalizer = IMG_INPUT_SHAPE[0]
        
        if ROLE == "TAGGER":
            scores = tf.math.sqrt(tf.math.reduce_sum(tf.math.square(tagger_poses - taggee_poses), axis=1))/scores_normalizer
        else:
            scores = (scores_normalizer - tf.math.sqrt(tf.math.reduce_sum(tf.math.square(tagger_poses - taggee_poses), axis=1)))/scores_normalizer
        
        # if winner != ROLE:
        #     # # Set all last scores to be 1
        #     # # scores = tf.concat([scores[:-5:], tf.repeat(tf.constant([1.0]), repeats=5)], axis=0)
            
        #     # Make scores at the end more linear
        #     dist = 0.75 - scores[-10].numpy()
        #     smoothed_end = [scores[-10].numpy() + (dist / 10 * (i + 1)) for i in range(10)]
        #     scores = tf.concat([scores[:-10:], tf.constant(smoothed_end, float)], axis=0)

        all_scores.append(scores)
        print(run)

    # all_next_frames = tf.concat([frame[PRED_FRAMES_IN_ADVANCE:] for frame in all_frames], axis=0)
    all_frames = tf.concat([frame[:-PRED_FRAMES_IN_ADVANCE] for frame in all_frames], axis=0)
    all_input = tf.cast(tf.concat([input[:-PRED_FRAMES_IN_ADVANCE] for input in all_input], axis=0), tf.float16)
    all_future_score_diffs = tf.cast(tf.concat([score[PRED_FRAMES_IN_ADVANCE:] - score[:-PRED_FRAMES_IN_ADVANCE] for score in all_scores], axis=0), tf.float16)
    all_scores = tf.cast(tf.concat([score[:-PRED_FRAMES_IN_ADVANCE] for score in all_scores], axis=0), tf.float16)

    # Normalize scores
    all_scores = tf.expand_dims(all_scores, axis=-1)
    all_scores = tf.numpy_function(lambda x: RobustScaler().fit_transform(x), [all_scores], float)
    all_scores_min, all_scores_max = tf.math.reduce_min(all_scores, keepdims=True), tf.math.reduce_max(all_scores, keepdims=True)
    all_scores = (all_scores - all_scores_min) / (all_scores_max - all_scores_min)
    all_scores = tf.cast(all_scores, tf.float16)

    # Normalize future scores
    all_future_score_diffs = tf.expand_dims(all_future_score_diffs, axis=-1)
    all_future_score_diffs = tf.numpy_function(lambda x: RobustScaler().fit_transform(x), [all_future_score_diffs], float)
    all_future_score_diffs_min, all_future_score_diffs_max = tf.math.reduce_min(all_future_score_diffs, keepdims=True), tf.math.reduce_max(all_future_score_diffs, keepdims=True)
    all_future_score_diffs = (all_future_score_diffs - all_future_score_diffs_min) / (all_future_score_diffs_max - all_future_score_diffs_min)
    all_future_score_diffs = tf.cast(all_future_score_diffs, tf.float16)
    
    # Sum the inputs
    mult_inputs = tf.constant([[1, -1]], tf.float16)
    print(all_input.shape, mult_inputs.shape)
    all_input *= mult_inputs
    all_input = tf.math.reduce_sum(all_input, axis=-1, keepdims=True)

    # Round the inputs
    all_input *= 100
    all_input = tf.math.round(all_input)/100
    
    print(tf.shape(all_frames), tf.shape(all_input), tf.shape(all_scores), tf.shape(all_future_score_diffs)) #, tf.shape(all_delta_scores))
    print(tf.math.reduce_max(all_scores), tf.math.reduce_min(all_scores)) # , tf.math.reduce_max(all_delta_scores), tf.math.reduce_min(all_delta_scores))

    # Plot histogram of scores
    plt.hist(all_scores[:, 0].numpy(), bins=100)
    plt.title("Score Distribution")
    plt.tight_layout()
    plt.show()

    # Plot histogram of input
    plt.hist(all_input[:, 0].numpy(), bins=100)
    plt.title("Input Distribution")
    plt.tight_layout()
    plt.show()

    # Plot histogram of score changes
    plt.hist(all_future_score_diffs[:, 0].numpy(), bins=100)
    plt.title("Change in Score Distribution")
    plt.tight_layout()
    plt.show()

    # Plot how inputs relate to scores
    plt.scatter(all_input[:, 0].numpy(), all_future_score_diffs[:, 0].numpy())
    a, b = np.polyfit(all_input[:, 0].numpy().astype(float), all_future_score_diffs[:, 0].numpy().astype(float), 1)
    plt.plot(all_input[:, 0].numpy(), a*all_input[:, 0].numpy()+b, label="LofBF: m: %.5f, b: %.1f" % (a, b), c="tab:orange")
    plt.xlabel("Current Input")
    plt.ylabel("Change in Score")
    plt.title("Inputs Vs Changes in Future Scores")
    plt.legend()
    plt.tight_layout()
    plt.show()

    # plt.hist(all_delta_scores[:, 0].numpy(), bins=100)
    # plt.title("Delta Score Distribution")
    # plt.show()

    # Make DS
    big_train_ds = tf.data.Dataset.from_tensor_slices((all_frames, all_input, all_scores, all_future_score_diffs)) # , all_delta_scores))
    big_train_ds = big_train_ds.shuffle(big_train_ds.cardinality())
    big_valid_ds = big_train_ds.shard(10, 8)
    big_test_ds = big_train_ds.shard(10, 9)
    big_train_ds = big_train_ds.take((8 * big_train_ds.cardinality())//10)

In [None]:
# Make the histogram of inputs completely even

# Approach: using a really small epison, ensure that there is only one x value within the range (-1, -1 + e), (-1 + e, -1 + 2*e), ...  The epsilon should be really really small 

In [None]:
# Train encoder, predictor, and cost calculaor together
from tensorflow.keras import optimizers

def minimum_mse(y_true, y_pred):
    se = tf.math.square(tf.expand_dims(y_true, axis=1) - y_pred)
    mse = tf.math.reduce_mean(se, axis=2) # because axis 0 is batch and 1 is the 4 predictions
    return tf.math.reduce_min(mse, axis=1) # because axis 0 is batch

def mse(y_true, y_pred):
    se = tf.math.square(tf.expand_dims(y_true, axis=1) - y_pred)
    return tf.math.reduce_mean(se, axis=[1, 2]) # Because axis 0 is batch axis

def combined_loss(y_true, y_pred, mse_weight=0.25):
    return minimum_mse(y_true, y_pred) + mse_weight * mse(y_true, y_pred)

# Comment out these two lines to train the encoder from scratch
# encoder = tf.keras.models.load_model("encoder_%s.keras" % ROLE)
# cost_calculator = tf.keras.models.load_model("cost_calculator_%s.keras" % ROLE)
# predictor = tf.keras.models.load_model("predictor_%s.keras" % ROLE)

frame_and_score = lambda w, x, y, z: (w, y)
enc_cost_calc = models.Sequential([layers.Input(shape=IMG_INPUT_SHAPE), encoder, cost_calculator])
enc_cost_calc.compile(optimizers.Adam(learning_rate=ENCODER_AND_COST_AND_PRED_LR), loss="mse", metrics=["mse"])
enc_cost_calc.summary()

train_ds_frame_and_score = big_train_ds.map(frame_and_score, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False).batch(BATCH_SIZE).cache()
valid_ds_frame_and_score = big_valid_ds.map(frame_and_score, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False).batch(BATCH_SIZE).cache()

frame_key_next_frame = lambda w, x, y, z: ((w, x), z)
cur_frame_in = layers.Input(shape=IMG_INPUT_SHAPE)
cur_key_in = layers.Input(shape=ACTIONS)
enc_pred_enc = encoder(cur_frame_in)
enc_pred_enc = predictor((cur_key_in, enc_pred_enc))
enc_pred_enc = models.Model([cur_frame_in, cur_key_in], enc_pred_enc)
enc_pred_enc.compile(optimizers.Adam(learning_rate=ENCODER_AND_COST_AND_PRED_LR), loss=combined_loss, metrics=[minimum_mse, mse])
enc_pred_enc.summary()

train_ds_frame_key_next_frame = big_train_ds.map(frame_key_next_frame, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False).batch(BATCH_SIZE).cache()
valid_ds_frame_key_next_frame = big_valid_ds.map(frame_key_next_frame, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False).batch(BATCH_SIZE).cache()

best_encoder, best_cost_calculator, best_pred = None, None, None
min_cc_losses, min_pred_losses = 10**10, 10**10

time_without_save = 0

for i in range(MAX_EPOCHS):
    train_iter_frame_and_score = iter(train_ds_frame_and_score)
    train_iter_frame_key_next_frame = iter(train_ds_frame_key_next_frame)
    
    while True:
        try:
            # Fit cost calculator and encoder to one batch
            data = train_iter_frame_and_score.next()
            enc_cc_loss = enc_cost_calc.train_on_batch(
                data[0], data[1]
            )

            # Fit predictor and encoder to one batch
            data = train_iter_frame_key_next_frame.next()
            enc_pred_enc.train_on_batch(
                data[0], data[1]
            )
        except StopIteration:
            break

    enc_cc_loss = enc_cost_calc.evaluate(valid_ds_frame_and_score, return_dict=True)["loss"]
    enc_pred_loss = enc_pred_enc.evaluate(valid_ds_frame_key_next_frame, return_dict=True)["loss"]
            
    if enc_cc_loss <= min_cc_losses and enc_pred_loss <= min_pred_losses:
        print(i, "Saving")

        min_cc_losses = enc_cc_loss
        min_pred_losses = enc_pred_loss
        
        min_cc_losses
        encoder.save("encoder_%s.keras" % ROLE)
        cost_calculator.save("cost_calculator_%s.keras" % ROLE)
        predictor.save("predictor_%s.keras" % ROLE)
        time_without_save = 0
    else:
        print(i)
        time_without_save += 1

    if time_without_save > MAX_EPOCHS//10:
        break

encoder = tf.keras.models.load_model("encoder_%s.keras" % ROLE)
cost_calculator = tf.keras.models.load_model("cost_calculator_%s.keras" % ROLE)
predictor = tf.keras.models.load_model("predictor_%s.keras" % ROLE, custom_objects={"combined_loss": combined_loss, "minimum_mse": minimum_mse, "mse": mse})

encoder.trainable = False

In [None]:
# Train cost calculator without encoder and lower lr

cost_calculator.compile(optimizers.Adam(learning_rate=COST_LR), loss="mse", metrics=["mse"])
cost_calculator.summary()

encodings_and_score = lambda w, x, y, z: (encoder(w), y)

encoding_cost_train_ds = big_train_ds.batch(BATCH_SIZE).map(encodings_and_score, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False).cache()
encoding_cost_valid_ds = big_valid_ds.batch(BATCH_SIZE).map(encodings_and_score, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False).cache()

cost_calculator.fit(
    encoding_cost_train_ds,
    validation_data=encoding_cost_valid_ds,
    epochs=MAX_EPOCHS,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=MAX_EPOCHS//10, restore_best_weights=True)]
)

cost_calculator.save("cost_calculator_%s.keras" % ROLE)
cost_calculator.trainable = False

In [None]:
# Train prediction

with tf.device("cpu:0"):
    apply_encoder = lambda w, x, y, z: ((y, tf.cast(encoder(w), tf.float16)), z)
    pred_train_ds = big_train_ds.batch(BATCH_SIZE).map(apply_encoder, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False).cache()
    pred_valid_ds = big_valid_ds.batch(BATCH_SIZE).map(apply_encoder, num_parallel_calls=tf.data.AUTOTUNE, deterministic=False).cache()

with tf.device("gpu:0"):
    predictor.compile(optimizers.Adam(learning_rate=PRED_LR), loss=combined_loss, metrics=[minimum_mse, mse])
    predictor.summary()
    
    predictor.fit(
        pred_train_ds,
        validation_data=pred_valid_ds,
        epochs=MAX_EPOCHS,
        callbacks=[tf.keras.callbacks.EarlyStopping(patience=MAX_EPOCHS//10, restore_best_weights=True)]
    )

predictor.save("predictor_%s.keras" % ROLE)
predictor.trainable=False

In [None]:
# Train action generator

@tf.function
def define_action(encoded_frames, smoothing=ACTION_SPACE_SMOOTHING, num_actions=NUM_POSSIBLE_ACTIONS):
    actions = tf.cast(tf.expand_dims(tf.linspace(-1, 1, num_actions), axis=-1), tf.float16)
    print(tf.shape(actions))
    actions = tf.repeat(tf.expand_dims(actions, axis=0), repeats=tf.shape(encoded_frames)[0], axis=0)
    print(tf.shape(actions))

    # Now actions has shape (batch_size, num_actions, 1)
    # Make it have shape (num_actions, batch_size, 1)
    actions = tf.transpose(actions, perm=[1, 0, 2])

    predictions = tf.map_fn(
        lambda x: predictor((x, encoded_frames)),
        actions,
        fn_output_signature=tf.TensorSpec((None, PRED_POSSIBLE_OUTCOMES, 1), float)
    )

    # Above returns the predicted scores in shape (num_actions, batch_size, num_predictons, 1)
    # Reshape to (num_actions, batch_size, num_predictons) then (batch_size, num_actions, num_predictons) then avg to (batch_size, num_actions)
    predictions = predictions[:, :, :, 0]
    predictions = tf.transpose(predictions, perm=[1, 0, 2])
    avg_predictions = tf.math.reduce_mean(predictions, axis=-1)

    # Use numpy to smooth each set of actions
    act_num_actions = num_actions - 2 * smoothing
    smoothing_func = lambda y: tf.numpy_function(lambda x: (np.convolve(x, np.ones(smoothing), 'same')[smoothing:-smoothing]/smoothing).astype(np.single), [y], float)
    smoothed_predictions = tf.map_fn(smoothing_func, avg_predictions, fn_output_signature=tf.TensorSpec((act_num_actions,), float))
    
    # return the argmin (normalized to actual input) of the smoothed function.  In addition, return the cost tensors
    pos_actions = tf.cast((tf.range(act_num_actions) - (act_num_actions//2))/(num_actions//2), float)
    moves = tf.math.argmin(smoothed_predictions, axis=-1)
    print(moves, pos_actions)
    moves = tf.map_fn(lambda x: tf.expand_dims(pos_actions[x], axis=-1), moves, fn_output_signature=tf.TensorSpec((1,), float))
    return moves, predictions, smoothed_predictions


class KerasGenActionLayer(layers.Layer):
    def call(self, x, training=None):
        print(tf.shape(x))
        return define_action(x, smoothing=ACTION_SPACE_SMOOTHING, num_actions=NUM_POSSIBLE_ACTIONS)[0]


inp = layers.Input(shape=IMG_INPUT_SHAPE)
action_gen = encoder(inp)
action_gen = KerasGenActionLayer()(action_gen)

action_gen = models.Model(inp, action_gen)

action_gen.summary()
action_gen.save("action_gen_%s.keras" % ROLE)

In [None]:
# Plot sanity test: how does turning effect score
import matplotlib.pyplot as plt

test_cc_ds = big_valid_ds.batch(16).take(1)

for elements in test_cc_ds.map(lambda w, x, y, z: (encoder(w), w, y, z)):
    move, costs, smoothed_costs = define_action(elements[0])
    for i in range(tf.shape(move)[0]):
        fig, ax = plt.subplots(1, 3, figsize=(15,10))

        # Plot image
        ax[0].imshow(elements[1][i].numpy().astype(float))
        ax[0].set_title("Current frame")

        # Plot scores for each action
        ax[1].plot(tf.range(NUM_POSSIBLE_ACTIONS)/(NUM_POSSIBLE_ACTIONS//2) - 1, costs[i])
        ax[1].set_title("Predicted future costs")
        ax[1].set_xlabel("Steering input")
        ax[1].set_ylabel("Predicted Change in Utility (Lower is Better)")

        # Plot smoothed scores for each action
        num_actions = NUM_POSSIBLE_ACTIONS - 2 * ACTION_SPACE_SMOOTHING
        ax[2].plot((tf.range(num_actions) - (num_actions//2))/(NUM_POSSIBLE_ACTIONS//2), smoothed_costs[i])
        ax[2].set_title("Predicted future costs")
        ax[2].set_xlabel("Steering input")
        ax[2].set_ylabel("Predicted Change in Utility (Lower is Better)")

        plt.tight_layout()
        plt.show()

        print(move[i])
    # print(, tf.shape(costs), tf.shape(smoothed_costs))

# test_cc_ds = big_valid_ds.take(10)
# for element in test_cc_ds:
#     fig, ax = plt.subplots(1, 3, figsize=(15,10))
#     ax[0].imshow(element[0].numpy().astype(float))
#     ax[0].set_title("Current frame")

#     actions = tf.cast(tf.expand_dims((tf.range(200)/100) - 1, axis=-1), tf.float16)
#     act_results = tf.map_fn(lambda x: cost_calculator(predictor((x, encoder(tf.expand_dims(element[0], axis=0))))[0, :, :]), actions, fn_output_signature=float)
#     actions = tf.cast(actions, float)
    
#     ax[1].plot(actions[:, 0], act_results[:, :, 0])
#     ax[1].set_title("Predicted future costs")
#     ax[1].set_xlabel("Steering input")
#     ax[1].set_ylabel("Predicted Utility (Lower is Better)")
    
#     real_cost = tf.cast(element[3][0], float)
#     calc_cost = cost_calculator(encoder(tf.expand_dims(element[0], axis=0)))[0, 0]

#     # decision_functions = tf.numpy_function(lambda x, y: np.polynomial.polynomial.polyfit(x, np.convolve(y, np.ones(5), 'same')/5, 1), [actions[:, 0], act_results[:, -1, 0]], float)
#     # evaled_functions = tf.map_fn(lambda x: tf.math.polyval(list(decision_functions.numpy()), x), actions[:, 0])
#     # evaled_functions = tf.transpose(tf.cast(evaled_functions, float))
    
#     ax[2].plot(actions[5:-5, 0], np.convolve(tf.math.reduce_mean(act_results[:, :, 0], axis=-1).numpy(), np.ones(5), 'same')[10:-10]/10 - calc_cost)
#     ax[2].set_title("Utility Minimization Function")
#     ax[2].set_xlabel("Steering input")
#     ax[2].set_ylabel("Predicted Utility (Lower is Better)")

#     plt.tight_layout()
#     plt.show()
    
#     print("Real cost, calculated cost, difference", real_cost.numpy(), calc_cost.numpy(), (real_cost - calc_cost).numpy())

#     calc_next_cost = cost_calculator(encoder(tf.expand_dims(element[1], axis=0)))[:, 0]
#     pred_next_cost = cost_calculator(predictor((element[2], encoder(tf.expand_dims(element[0], axis=0))))[0, :, :])[:, 0]

#     print("Caclulated next frame cost, predicted next frame cost cost, difference", calc_next_cost[0].numpy(), pred_next_cost.numpy(), (calc_next_cost - pred_next_cost).numpy())

In [None]:
for element in test_cc_ds.map(lambda w, x, y, z: w):
    moves = action_gen(element)[:, 0]
print(moves)

# Plot histogram of inputs
plt.hist(moves, bins=100)
plt.title("Change in Score Distribution")
plt.tight_layout()
plt.show()