In [None]:
import os
import time
from pathlib import Path
import random
from pprint import pprint

# try to suppress TF output before any potentially tf-importing modules
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
# import kerastuner as kt

In [None]:
DATASET_PATH = "../../../data/fixed-map-rands/main.csv.gzip"
# VALIDATION_DATASET_PATH = "../../../data/fixed-map-vps-1v1/main.csv.gzip"

In [None]:
data = pd.read_csv(DATASET_PATH, nrows=10, compression='gzip')
# validation_data = pd.read_csv(VALIDATION_DATASET_PATH, nrows=10, compression='gzip')

# assert (data.columns == validation_data.columns).all()
data

In [None]:
# ===== Read Dataset
INPUT_COLUMNS = list(filter(lambda x: x.startswith("F_") and not x.startswith("F_PORT") and not x.startswith("F_TILE"), data.columns))
# pprint(INPUT_COLUMNS)
NUM_FEATURES = len(INPUT_COLUMNS)
LABEL_COLUMN = "RETURN"
BATCH_SIZE = 1024

NORMALIZATION = False
NORMALIZATION_DIRECTORY = "data/reachability"
NORMALIZATION_MEAN_PATH = Path(NORMALIZATION_DIRECTORY, "samples-mean.npy")
NORMALIZATION_VARIANCE_PATH = Path(NORMALIZATION_DIRECTORY, "samples-variance.npy")

dataset = tf.data.experimental.make_csv_dataset(
    str(DATASET_PATH),
    batch_size=BATCH_SIZE,
    num_epochs=1,
    label_name=LABEL_COLUMN,
    select_columns=INPUT_COLUMNS + [LABEL_COLUMN],
    compression_type="GZIP",
    shuffle=True,  # shuffle will shuffle at the element level. nice.
    shuffle_seed=1,
    shuffle_buffer_size=1000,
    prefetch_buffer_size=100,
)
# validation_dataset = tf.data.experimental.make_csv_dataset(
#     str(VALIDATION_DATASET_PATH),
#     batch_size=BATCH_SIZE,
#     num_epochs=1,
#     label_name=LABEL_COLUMN,
#     select_columns=INPUT_COLUMNS + [LABEL_COLUMN],
#     compression_type="GZIP",
#     shuffle=True,
#     shuffle_seed=1,
#     shuffle_buffer_size=1000,
#     prefetch_buffer_size=100,
# )

def preprocess(batch, label):
    features = tf.stack(
        [tf.cast(tensor, tf.float32) 
         for feature_name, tensor in batch.items() 
         if feature_name in INPUT_COLUMNS
        ], axis=1
    )
    return features, tf.stack(label)

dataset = dataset.map(preprocess)
# validation_dataset = validation_dataset.map(preprocess)

In [None]:
# ===== Configuration
EPOCHS = 100
PREFETCH_BUFFER_SIZE = 10

MODEL_NAME = "1v1-value-network"
MODEL_PATH = f"data/models/{MODEL_NAME}"
LOG_DIR = f"data/logs/{MODEL_NAME}/{int(time.time())}"

# ===== REGULAR MODEL
init = tf.keras.initializers.HeUniform()
inputs = tf.keras.Input(shape=(NUM_FEATURES,))
outputs = inputs

if NORMALIZATION:
    mean = np.load(NORMALIZATION_MEAN_PATH)[FEATURE_INDICES]
    variance = np.load(NORMALIZATION_VARIANCE_PATH)[FEATURE_INDICES]
    normalizer_layer = tf.keras.layers.experimental.preprocessing.Normalization(
        mean=mean, variance=variance
    )
    outputs = normalizer_layer(outputs)

# outputs = tf.keras.layers.BatchNormalization()(outputs)
# outputs = tf.keras.layers.Dense(352, activation="relu")(outputs)
# outputs = tf.keras.layers.Dense(320, activation="relu")(outputs)
# outputs = tf.keras.layers.Dense(160, activation="relu")(outputs)
# outputs = tf.keras.layers.Dense(512, activation="relu")(outputs)
# outputs = tf.keras.layers.Dense(352, activation="relu")(outputs)
# outputs = tf.keras.layers.Dense(64, activation="relu")(outputs)
# outputs = tf.keras.layers.Dense(32, activation="relu")(outputs)
outputs = tf.keras.layers.Dense(
    8, activation="relu", kernel_initializer="random_normal"
)(outputs)

# BINARY CLASSIFICATION SETUP
outputs = tf.keras.layers.Dense(
    units=1,
    activation="sigmoid",
    kernel_initializer="random_normal",
    kernel_regularizer="l2",
)(outputs)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
model.compile(
    metrics=["mae", "accuracy"],
    optimizer=tf.keras.optimizers.Adam(learning_rate=3e-4, clipnorm=1),
    loss="binary_crossentropy",
)

model.summary()

In [None]:
# ===== Fit Final Model
start = time.time()
history = model.fit(
    dataset, 
    epochs=50,
    # steps_per_epoch=10,
    # validation_steps=10,
    # validation_data=validation_dataset,
    # class_weight=class_weight,
    callbacks=[
        # tf.keras.callbacks.EarlyStopping(
        #     monitor="val_mae", patience=1, min_delta=0.0001
        # ),
        tf.keras.callbacks.TensorBoard(
            log_dir=LOG_DIR, histogram_freq=1, write_graph=True
        ),
    ],
)
print("Training took", time.time() - start)

In [None]:
# summarize history for categorical_accuracy
plt.plot(history.history['accuracy'])
# plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:

model.save(MODEL_PATH)
print("Saved model at:", MODEL_PATH)