In [None]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import tensorflow as tf
import tensorflow_decision_forests as tfdf
import tensorflow_transform as tft
import numpy as np
import matplotlib.pyplot as plt
import os.path

#Disable annoying "warnings" from tf that aren't relevant
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

In [None]:
# Load Data and Labels
contentPath = "GBT_data/"

inputlayer = 20
#fileName = "activation_20_layer_26"
#fileName = "dense_11_layer_25"
#fileName = "activation_19_layer_24"
fileName = "batch_normalization_16_layer_20"

dataSuffix = "_data.npy"
labelsSuffix = "_labels.npy"
imgSuffix = "img.npy"
batchSize = 99999

train_x = np.load(contentPath + "train/data/" + fileName + dataSuffix)
train_y = np.load(contentPath + "train/labels/" + fileName + labelsSuffix)
train_y_img = np.load(contentPath + "train/labels/" + imgSuffix)
train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).batch(batchSize).prefetch(tf.data.experimental.AUTOTUNE)

valid_x = np.load(contentPath + "validation/data/" + fileName + dataSuffix)
valid_y = np.load(contentPath + "validation/labels/" + fileName + labelsSuffix)
valid_y_img = np.load(contentPath + "validation/labels/" + imgSuffix)
valid_dataset = tf.data.Dataset.from_tensor_slices((valid_x, valid_y)).batch(batchSize).prefetch(tf.data.experimental.AUTOTUNE)

test_x = np.load(contentPath + "test/data/" + fileName + dataSuffix)
test_y = np.load(contentPath + "test/labels/" + fileName + labelsSuffix)
test_y_img = np.load(contentPath + "test/labels/" + imgSuffix)
test_dataset = tf.data.Dataset.from_tensor_slices((test_x, test_y)).batch(batchSize).prefetch(tf.data.experimental.AUTOTUNE)

print(train_x.shape)
print(valid_x.shape)
print(test_x.shape)

print(train_y_img.shape)

In [None]:
import time

# depth_range = (10, 19, 1)
# trees_range = (160, 341, 20)

# depth_range = (12, 18, 5)
# trees_range = (160, 400, 20)

# depth_range = (12, 18, 5)
# trees_range = (280, 431, 5)

# depth_range = (12, 13, 5)
# trees_range = (330, 385, 1)

depth_range = (12, 13, 5)
trees_range = (367, 368, 1)

modelSaveDir = "/home/prateek/training/models/classification"
@tf.autograph.experimental.do_not_convert
def train_model(layer, max_depth, num_trees):
  print(f'Training GBT with max_depth={max_depth} and num_trees={num_trees}:')
  model = None
  saveLoc = f"{modelSaveDir}/{layer}_{max_depth}_{num_trees}_model"
  start_time = time.perf_counter()
  if(os.path.isdir(saveLoc)):
    model = tf.keras.saving.load_model(saveLoc)
    end_time = time.perf_counter()
    print(f' - Loaded from file in {int(end_time-start_time)}s!')
  else:
    model = tfdf.keras.GradientBoostedTreesModel(max_depth=max_depth, num_trees=num_trees, early_stopping="NONE", verbose=0)
    model.fit(train_dataset)
    model.compile(metrics=["accuracy"])
    model.save(saveLoc)
    end_time = time.perf_counter()
    print(f' - Took {int(end_time-start_time)}s ({int((end_time-start_time)/60*100)/100}m)!')

  return model

In [None]:
# Train Gradient Boosted Trees Model
models = [] #[[max_depth, num_trees, model], ...]
for max_depth in range(*depth_range):
  for num_trees in range(*trees_range):
    models.append([max_depth, num_trees, train_model(inputlayer, max_depth, num_trees)])

print(f'{len(models)} models trained!')

In [None]:
# Get accuracies of the trained models
accuracies = [] #[[max_depth, num_trees, accuracy, model], ...]
accuracies_train = [] #[[max_depth, num_trees, accuracy, model], ...]

def get_accuracies(ds, lst):
    for model_list in models:
      model = model_list[2]
      evaluation = model.evaluate(ds, return_dict=True, verbose=0)
      lst.append((model_list[0], model_list[1], evaluation["accuracy"], model))

print("Getting validation accuracies...")
get_accuracies(valid_dataset, accuracies)
print("Getting training accuracies...")
get_accuracies(train_dataset, accuracies_train)

# Sort by accuracy
#accuracies.sort(key=lambda x: -x[2])

for (d, n, a, _) in accuracies_train:
  print(f'd={d}, n={n}, a={a}')

In [None]:
# Plot accuracy heatmap
depths = []
trees = []
values = []

for n in range(*depth_range):
  depths.append(n)

for n in range(*trees_range):
  trees.append(n)

for depth in range(*depth_range):
  row = []
  for n_trees in range(*trees_range):
    accur = 0
    for tpl in accuracies_train:
      if(tpl[0] == depth and tpl[1] == n_trees):
        accur = tpl[2]
        break
    row.append(accur)
  values.append(row)

values = np.array(values)

fig, ax = plt.subplots()
im = ax.imshow(values)

ax.set_yticks(np.arange(len(depths)), labels=depths)
ax.set_xticks(np.arange(len(trees)), labels=trees)

for i in range(len(depths)):
    for j in range(len(trees)):
        val = int(values[i, j]*10000)/10000
        color = "black"
        #if(val >= 0.613):
        #  color = "r"
        text = ax.text(j, i, val,
                       ha="center", va="center", color=color, size=8)
        
fig.set_figwidth(15)
fig.colorbar(im, ax=ax)

ax.set_title("Accuracy for Training Set\n(Higher is Better)")
plt.xlabel("Number of Trees")
plt.ylabel("Maximum Depth of Trees")

#fig.set_size_inches(15,10)
#fig.set_size_inches(15,3)
fig.set_size_inches(23,3)

plt.show()

In [None]:
# Create confusion matrix
# (true, predicted)
CATEGORIES = ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']

matrix = np.zeros((7, 7))

#bestModel = accuracies[0][3]
#prediction = bestModel.predict(test_dataset)

for n in range(prediction.shape[0]):
  #predicted = np.argmax(prediction[n])
  predicted = np.argmax(test_x[n])
  truth = test_y[n]
  matrix[truth, predicted] += 1

#Normalize
row_sums = matrix.sum(axis=1)
matrix = matrix / row_sums[:, np.newaxis]

#Render Image
fig, ax = plt.subplots()
im = ax.imshow(matrix)

ax.set_yticks(np.arange(len(CATEGORIES)), labels=CATEGORIES)
ax.set_xticks(np.arange(len(CATEGORIES)), labels=CATEGORIES)

plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")

for i in range(7):
    for j in range(7):
        val = int(matrix[i, j]*10000)/10000
        color = "r"
        text = ax.text(j, i, val,
                       ha="center", va="center", color=color, size=7)

fig.set_figwidth(15)
fig.colorbar(im, ax=ax)

ax.set_title("Confusion Matrix for Test Dataset\n(CNN)")

plt.xlabel("Truth")
plt.ylabel("Predicted")

plt.show()

In [None]:
# Create graph for when max_depth is fixed at 12

resultsValidation_N = [i[1] for i in accuracies if i[0] == 12]
resultsValidation_Error = [1 - i[2] for i in accuracies if i[0] == 12]

resultsTraining_N = [i[1] for i in accuracies_train if i[0] == 12]
resultsTraining_Error = [1 - i[2] for i in accuracies_train if i[0] == 12]

plt.plot(resultsValidation_N, resultsValidation_Error, label='Validation')
plt.plot(resultsTraining_N, resultsTraining_Error, label='Training', color='#ff7f0e')

plt.xlabel("Number of Trees")
plt.ylabel("Error")
plt.title("Classification Error for GBTs with max depth of 12\n(Lower is Better)")
plt.legend(loc='best')
plt.show()