## Setup and Libraries

In [None]:
!pip install -U tensorflow-text --quiet
!pip install transformers --quiet
!pip install tensorflow_decision_forests --quiet
!pip install optuna --quiet
!pip install --upgrade pandas



In [None]:
## Library imports
import gzip
import io
import pandas as pd
import warnings
import math
import numpy as np

import tensorflow as tf
from tensorflow import keras
import tensorflow_decision_forests as tfdf
import optuna

import matplotlib.pyplot as plt

from google.colab import files

# Other options
warnings.filterwarnings("ignore", category=DeprecationWarning)

## Data load

In [None]:
## Load datasets
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')
model_checkpoints_path = '/content/drive/My Drive/ds266proj/model_checkpoints'

# Read data from to Google Drive
hs_all_data = pd.read_csv('/content/drive/My Drive/ds266proj/hs_cards_data_text.csv')
nr_all_data = pd.read_csv('/content/drive/My Drive/ds266proj/nr_cards_data_text.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Baseline Model Experimentation

The baseline model will be to use a decision tree and random forest approach that also ignores the text of the cards. This is expected to perform quite poorly.

### Data preparation

In [None]:
## Set up the various subsetdatasets for modeling

## Include all the mechanics
hs_features_allnontext = hs_all_data[['classes',
                                      # Key info
                                      'health', 'cost', 'attack', 'durability', 'armor',
                                      # Card dscriptors
                                      'Hero', 'Location', 'Minion', 'Spell', 'Weapon',
                                      # tribes
                                      'MECH', 'QUILBOAR', 'DEMON', 'PIRATE', 'TOTEM', 'NAGA', 'ELEMENTAL',
                                      'ALL', 'BEAST', 'MURLOC', 'DRAGON', 'UNDEAD',
                                      # spell schools
                                      'ARCANE', 'FEL', 'FIRE', 'FROST', 'HOLY', 'NATURE', 'SHADOW',
                                      # keywords
                                      'QUICKDRAW', 'COLLECTIONMANAGER_FILTER_MANA_ODD', 'ADJACENT_BUFF', 'GEARS',
                                      'DEAL_DAMAGE', 'FORGETFUL', 'MODULAR', 'SECRET', 'ECHO',
                                      'MULTIPLY_BUFF_VALUE', 'CHOOSE_ONE', 'FORGE', 'WHELP', 'HONORABLE_KILL',
                                      'TWINSPELL', 'QUEST', 'FRENZY', 'OVERHEAL', 'CHARGE', 'FREEZE',
                                      'OVERKILL', 'SPELLPOWER', 'DIVINE_SHIELD', 'AFFECTED_BY_SPELL_POWER',
                                      'ENRAGED', 'COMBO', 'DEATHRATTLE', 'REBORN', 'CORRUPT', 'HIDE_STATS',
                                      'SILENCE', 'TOPDECK', 'POISONOUS', 'WINDFURY', 'TAUNT', 'KABAL',
                                      'COLOSSAL', 'GRIMY_GOONS', 'TRADEABLE', 'OVERLOAD', 'INSPIRE',
                                      'NON_KEYWORD_ECHO', 'SUMMON', 'EXCAVATE', 'AURA', 'BATTLECRY',
                                      'OUTCAST', 'MANATHIRST', 'RUSH', 'HEROPOWER_DAMAGE',
                                      'RECEIVES_DOUBLE_SPELLDAMAGE_BONUS', 'JADE_LOTUS', 'FINALE',
                                      'DEATH_KNIGHT', 'DREDGE', 'INFUSE',
                                      'COLLECTIONMANAGER_FILTER_MANA_EVEN', 'TRIGGER_VISUAL', 'LIFESTEAL',
                                      'START_OF_GAME_KEYWORD', 'IMP', 'RESTORE_HEALTH', 'JADE_GOLEM',
                                      'DISCOVER', 'STEALTH', 'FINISH_ATTACK_SPELL_ON_DAMAGE']].copy()

nr_features_allnontext = nr_all_data[['faction_code',
                                      # Key info
                                      'runner', 'corp', 'ice', 'upgrade', 'hardware', 'resource', 'identity',
                                      'operation', 'agenda', 'program', 'asset', 'event',
                                      # Card dscriptors
                                      'influence', 'unique', 'cost', 'trash_cost', 'memory_cost', 'strength',
                                      'advancement_cost', 'agenda_points', 'base_link',
                                      'influence_limit', 'minimum_deck_size', 'deck_limit',
                                      # # keywords
                                      'morph', 'job', 'location', 'deepnet', 'ambush', 'blackops',
                                      'advertisement', 'lockdown', 'codegate', 'enforcer', 'genetics',
                                      'sabotage', 'chip', 'source', 'grayops', 'consumergrade', 'division',
                                      'priority', 'killer', 'link', 'directive', 'sensie', 'caissa',
                                      'trap', 'remote', 'companion', 'deflector', 'hostile', 'vehicle', 'seedy',
                                      'bioroid', 'terminal', 'harmonic', 'deva', 'political', 'decoder', 'beanstalk',
                                      'psi', 'megacorp', 'cast', 'orgcrime', 'clone', 'reprisal', 'console', 'mythic',
                                      'corporation', 'clan', 'grail', 'ai', 'barrier', 'research', 'character', 'trojan',
                                      'stealth', 'triple', 'icebreaker', 'academic', 'securityprotocol', 'cyborg',
                                      'digital', 'connection', 'transaction', 'ritzy', 'fracter', 'region', 'expendable',
                                      'virus', 'expansion', 'offsite', 'industrial', 'daemon', 'virtual', 'observer',
                                      'facility', 'current', 'alliance', 'cloud', 'executive', 'condition', 'policedepartment',
                                      'weapon', 'destroyer', 'tracer', 'unorthodox', 'natural', 'sentry', 'subsidiary',
                                      'security', 'gear', 'sysop', 'next', 'run', 'double', 'gmod', 'ap',
                                      'cybernetic', 'initiative', 'illicit', 'mod', 'government', 'public']].copy()


In [None]:
## CODE FOR TRAIN AND TEST SPLITS
##===========================================================

def splitTrainTest(dataset,
                   label,
                   test_ratio = 0.3):

  # Set up the modeling dataset and classification label
  modeling_df = dataset.copy()
  classes = modeling_df[label].unique().tolist()
  print(f"Label classes: {classes}")
  # Convert the label to a numeric value
  modeling_df.loc[:, label] = modeling_df[label].map(classes.index)

  # Split into training and test data
  np.random.seed(2319)
  test_indices = np.random.rand(len(modeling_df)) < test_ratio
  print("{} examples in training, {} examples for testing.".format(
    len(modeling_df[~test_indices]), len(modeling_df[test_indices])))

  return modeling_df[~test_indices], modeling_df[test_indices]

hs_train, hs_test = splitTrainTest(hs_features_allnontext, label = 'classes')
nr_train, nr_test = splitTrainTest(nr_features_allnontext, label = 'faction_code')

Label classes: ['PRIEST', 'WARRIOR', 'WARLOCK', 'SHAMAN', 'DRUID', 'PALADIN', 'MAGE', 'HUNTER', 'ROGUE', 'DEMONHUNTER', 'DEATHKNIGHT', 'NEUTRAL']
5234 examples in training, 2291 examples for testing.
Label classes: ['neutral-corp', 'neutral-runner', 'weyland-consortium', 'nbn', 'jinteki', 'haas-bioroid', 'shaper', 'criminal', 'anarch']
1495 examples in training, 628 examples for testing.


### Decision Tree and Gradient Boosted Tree Modeling

In [None]:
## CODE TO GENERATE A TREE MODEL USING AN ARBITRARY DATASET
## Basic structure taken from https://www.tensorflow.org/decision_forests/tutorials/beginner_colab
##===========================================================

def fitTreeModel(training_data,
                 test_data,
                 label,
                 tree_type = 'RandomForest',
                 num_trees = 500):
  print("here")
  # Convert into tensorflow dfs
  train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(training_data, label=label)
  test_ds = tfdf.keras.pd_dataframe_to_tf_dataset(test_data, label=label)
  print("there")

  # Specify the model type
  if tree_type == 'RandomForest':
    model = tfdf.keras.RandomForestModel(num_trees = num_trees)
  elif tree_type == 'GBTree':
    model = tfdf.keras.GradientBoostedTreesModel(num_trees = num_trees)
  else:
    print('Unsupported tree type')
    return

  # Fit the selected model
  model.fit(train_ds)

  # Model evaluation
  model.compile(metrics=["accuracy"])
  evaluation = model.evaluate(test_ds, return_dict=True)

  return model, evaluation


In [None]:
## Train some models on the input datasets
##--------------------------------------------------
# hs_model, hs_model_eval = fitTreeModel(training_data = hs_train,
#                                        test_data = hs_test,
#                                        label = 'classes')

# hs_model_gb, hs_model_eval_gb = fitTreeModel(training_data = hs_train,
#                                              test_data = hs_test,
#                                              label = 'classes',
#                                              tree_type = 'GBTree')

# nr_model, nr_model_eval = fitTreeModel(training_data = nr_train,
#                                        test_data = nr_test,
#                                        label = 'faction_code')

# nr_model_gb, nr_model_eval_gb = fitTreeModel(training_data = nr_train,
#                                              test_data = nr_test,
#                                              label = 'faction_code',
#                                              tree_type = 'GBTree')

#### Neural Nets

In [None]:
## Input features should be arrays

def genModelArrays(training_data,
                   test_data,
                   label_col):
  # Split the training data
  train_labels = np.array(training_data[label_col])
  train_data = np.array(training_data[training_data.columns[training_data.columns != label_col]])
  # Split the test data
  test_labels = np.array(test_data[label_col])
  test_data = np.array(test_data[test_data.columns[test_data.columns != label_col]])
  # Cast as floats
  train_labels = np.asarray(train_labels).astype('float32')
  train_data = np.asarray(train_data).astype('float32')
  test_labels = np.asarray(test_labels).astype('float32')
  test_data = np.asarray(test_data).astype('float32')

  return train_data, train_labels, test_data, test_labels

# Generate the input sets
hsnn_train_data, hsnn_train_labels, hsnn_test_data, hsnn_test_labels = genModelArrays(hs_train, hs_test, label_col = 'classes')
nrnn_train_data, nrnn_train_labels, nrnn_test_data, nrnn_test_labels = genModelArrays(nr_train, nr_test, label_col = 'faction_code')

print(hsnn_train_data.shape)
print(hsnn_train_labels.shape)

print(nrnn_train_data.shape)
print(nrnn_train_labels.shape)

(5234, 95)
(5234,)
(1495, 125)
(1495,)


In [None]:
## Code to create a neural net for classification

def buildNNClassifier(n_input_features,
                      hidden_layer_dims,
                      n_classes = 12,
                      hidden_activation = 'relu',
                      dropout_rate = 0.25,
                      learning_rate = 0.001):

  ## Set up the input layer
  input_layer = tf.keras.layers.Input(shape=(n_input_features,), dtype='int32', name = 'input')

  last_hidden_output = input_layer

  ## Set up the hidden layers
  for i, h_dim in enumerate(hidden_layer_dims):
    last_hidden_output = tf.keras.layers.Dense(h_dim, activation=hidden_activation, name='hidden_%d' % i)(last_hidden_output)
    last_hidden_output = tf.keras.layers.Dropout(dropout_rate)(last_hidden_output)

  ## Set up the classification layer
  classification = tf.keras.layers.Dense(12, activation='softmax', name='classification')(last_hidden_output)

  ## Compile the model
  nn_model = tf.keras.models.Model(inputs=input_layer, outputs=[classification])
  nn_model.compile(loss='sparse_categorical_crossentropy',
                   optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                   metrics=['accuracy'])

  return nn_model

In [None]:
## Neural Net Experiment
##========================================================

## Using the optuna package to search for most performant NN model

def objectiveHS(trial):
  # First create the search space
  #----------------------------------------
  # Try activation functions
  activation = trial.suggest_categorical("hidden_activation", ["sigmoid", "relu"])
  # Try numbers of layers
  num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 3)
  # For the hidden layers try various layer sizes
  hidden_layer_dims = []
  for layer_n in range(num_hidden_layers):
    hidden_size = trial.suggest_int("hidden_size", 10, 20, step=1)
    hidden_layer_dims.append(hidden_size)
  trial.set_user_attr('hidden_layer_dims', hidden_layer_dims)
  # Try some number of epochs
  num_epochs = trial.suggest_int("num_epochs", 10, 25, step=1)
  # Next set up the model, run it, and check the finalized accuracy
  #----------------------------------------
  trial_model = buildNNClassifier(n_input_features=hsnn_train_data.shape[1],
                                  hidden_layer_dims=hidden_layer_dims,
                                  hidden_activation = activation,
                                  learning_rate = 0.001)
  trial_model_history = trial_model.fit(hsnn_train_data,
                                        hsnn_train_labels,
                                        validation_data = (hsnn_test_data, hsnn_test_labels),
                                        epochs = num_epochs,
                                        verbose = 0)
  # Save model info
  # trial.set_user_attr('trial_model', trial_model)
  # trial.set_user_attr('trial_model_history', trial_model_history)
  # Finally define the thing to minimize, in this case 1 minus the model validation accuracy
  #------------------------------------------
  result = 1 - trial_model_history.history['val_accuracy'][-1]

  return result

def objectiveNR(trial):
  # First create the search space
  #----------------------------------------
  # Try activation functions
  activation = trial.suggest_categorical("hidden_activation", ["sigmoid", "relu"])
  # Try numbers of layers
  num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 3)
  # For the hidden layers try various layer sizes
  hidden_layer_dims = []
  for layer_n in range(num_hidden_layers):
    hidden_size = trial.suggest_int("hidden_size", 10, 20, step=1)
    hidden_layer_dims.append(hidden_size)
  trial.set_user_attr('hidden_layer_dims', hidden_layer_dims)
  # Try some number of epochs
  num_epochs = trial.suggest_int("num_epochs", 10, 25, step=1)
  # Next set up the model, run it, and check the finalized accuracy
  #----------------------------------------
  trial_model = buildNNClassifier(n_input_features=nrnn_train_data.shape[1],
                                  hidden_layer_dims=hidden_layer_dims,
                                  hidden_activation = activation,
                                  learning_rate = 0.001)
  trial_model_history = trial_model.fit(nrnn_train_data,
                                        nrnn_train_labels,
                                        validation_data = (nrnn_test_data, nrnn_test_labels),
                                        epochs = num_epochs,
                                        verbose = 0)
  # Save model info
  # trial.set_user_attr('trial_model', trial_model)
  # trial.set_user_attr('trial_model_history', trial_model_history)
  # Finally define the thing to minimize, in this case 1 minus the model validation accuracy
  #------------------------------------------
  result = 1 - trial_model_history.history['val_accuracy'][-1]

  return result

In [None]:
hsnn_experiment = optuna.create_study(study_name="HSNN_Experiment")
hsnn_experiment.optimize(objectiveHS, n_trials=100)

[I 2024-03-19 01:07:41,074] A new study created in memory with name: HSNN_Experiment
[I 2024-03-19 01:08:11,961] Trial 0 finished with value: 0.6769969463348389 and parameters: {'hidden_activation': 'relu', 'num_hidden_layers': 2, 'hidden_size': 19, 'num_epochs': 15}. Best is trial 0 with value: 0.6769969463348389.
[I 2024-03-19 01:08:40,125] Trial 1 finished with value: 0.7145351469516754 and parameters: {'hidden_activation': 'sigmoid', 'num_hidden_layers': 1, 'hidden_size': 14, 'num_epochs': 14}. Best is trial 0 with value: 0.6769969463348389.
[I 2024-03-19 01:09:04,758] Trial 2 finished with value: 0.7066783010959625 and parameters: {'hidden_activation': 'sigmoid', 'num_hidden_layers': 1, 'hidden_size': 10, 'num_epochs': 19}. Best is trial 0 with value: 0.6769969463348389.
[I 2024-03-19 01:09:33,501] Trial 3 finished with value: 0.6983849704265594 and parameters: {'hidden_activation': 'relu', 'num_hidden_layers': 2, 'hidden_size': 11, 'num_epochs': 15}. Best is trial 0 with value: 0

In [None]:
print(hsnn_experiment.best_trial.params)
print(hsnn_experiment.best_trial.user_attrs)
print(f"Validation Acc: {1 - hsnn_experiment.best_trial.values[0]}")


{'hidden_activation': 'relu', 'num_hidden_layers': 1, 'hidden_size': 20, 'num_epochs': 24}
{'hidden_layer_dims': [20]}
Validation Acc: 0.36883458495140076


In [None]:
nrnn_experiment = optuna.create_study(study_name="NRNN_Experiment")
nrnn_experiment.optimize(objectiveNR, n_trials=100)

[I 2024-03-19 01:51:37,291] A new study created in memory with name: NRNN_Experiment
[I 2024-03-19 01:51:56,662] Trial 0 finished with value: 0.579617828130722 and parameters: {'hidden_activation': 'relu', 'num_hidden_layers': 2, 'hidden_size': 12, 'num_epochs': 14}. Best is trial 0 with value: 0.579617828130722.
[I 2024-03-19 01:52:26,593] Trial 1 finished with value: 0.7659235596656799 and parameters: {'hidden_activation': 'sigmoid', 'num_hidden_layers': 3, 'hidden_size': 20, 'num_epochs': 25}. Best is trial 0 with value: 0.579617828130722.
[I 2024-03-19 01:52:41,442] Trial 2 finished with value: 0.6815286576747894 and parameters: {'hidden_activation': 'sigmoid', 'num_hidden_layers': 2, 'hidden_size': 20, 'num_epochs': 16}. Best is trial 0 with value: 0.579617828130722.
[I 2024-03-19 01:53:06,229] Trial 3 finished with value: 0.5971337556838989 and parameters: {'hidden_activation': 'sigmoid', 'num_hidden_layers': 1, 'hidden_size': 13, 'num_epochs': 16}. Best is trial 0 with value: 0.

## Transformer Models



### Data Preparation

In [None]:
## HEARTHSTONE LANGUAGE DATA PREP
#===============================================================
# The various pieces of the the card need to be stuck together to create something sensible

# Constants
TRIBES = ['MECH', 'QUILBOAR', 'DEMON', 'PIRATE', 'TOTEM', 'NAGA', 'ELEMENTAL', 'ALL', 'BEAST', 'MURLOC', 'DRAGON', 'UNDEAD']
SPELLSCHOOLS = ['ARCANE', 'FEL', 'FIRE', 'FROST', 'HOLY', 'NATURE', 'SHADOW']
MECHANICS = ['QUICKDRAW', 'COLLECTIONMANAGER_FILTER_MANA_ODD', 'ADJACENT_BUFF', 'GEARS',
            'DEAL_DAMAGE', 'FORGETFUL', 'MODULAR', 'SECRET', 'ECHO',
            'MULTIPLY_BUFF_VALUE', 'CHOOSE_ONE', 'FORGE', 'WHELP', 'HONORABLE_KILL',
            'TWINSPELL', 'QUEST', 'FRENZY', 'OVERHEAL', 'CHARGE', 'FREEZE',
            'OVERKILL', 'SPELLPOWER', 'DIVINE_SHIELD', 'AFFECTED_BY_SPELL_POWER',
            'ENRAGED', 'COMBO', 'DEATHRATTLE', 'REBORN', 'CORRUPT', 'HIDE_STATS',
            'SILENCE', 'TOPDECK', 'POISONOUS', 'WINDFURY', 'TAUNT', 'KABAL',
            'COLOSSAL', 'GRIMY_GOONS', 'TRADEABLE', 'OVERLOAD', 'INSPIRE',
            'NON_KEYWORD_ECHO', 'SUMMON', 'EXCAVATE', 'AURA', 'BATTLECRY',
            'OUTCAST', 'MANATHIRST', 'RUSH', 'HEROPOWER_DAMAGE',
            'RECEIVES_DOUBLE_SPELLDAMAGE_BONUS', 'JADE_LOTUS', 'FINALE',
            'DEATH_KNIGHT', 'DREDGE', 'INFUSE',
            'COLLECTIONMANAGER_FILTER_MANA_EVEN', 'TRIGGER_VISUAL', 'LIFESTEAL',
            'START_OF_GAME_KEYWORD', 'IMP', 'RESTORE_HEALTH', 'JADE_GOLEM',
            'DISCOVER', 'STEALTH', 'FINISH_ATTACK_SPELL_ON_DAMAGE']

def extendDescription(desc, tag):
  if desc == "":
    desc = desc + tag.lower().replace("_", " ")
  else:
    desc = desc + ", " + tag.lower().replace("_", " ")
  return desc

def generateDescriptionHSData(row):
  ## Get together the subtype for minions/spell schools, or otherwise
  type_modifier = ""
  stats_modifier = ""
  if row['Minion'] == 1:
    for tribe in TRIBES:
      if row[tribe] == 1:
        type_modifier = extendDescription(type_modifier, tribe)
    type_modifier = type_modifier + " minion"
    stats_modifier = stats_modifier + f"with {row['health']} health and {row['attack']} attack"
    if row['armor'] > 0:
      stats_modifier = stats_modifier + f" and {row['armor']} armor"
  elif row['Spell'] == 1:
    for spellschool in SPELLSCHOOLS:
      if row[spellschool] == 1:
        type_modifier = extendDescription(type_modifier, spellschool)
    type_modifier = type_modifier + " spell"
  elif row['Weapon'] == 1:
    type_modifier = type_modifier + " weapon"
    stats_modifier = stats_modifier + f"with {row['attack']} attack and {row['durability']} durability"
  elif row['Hero'] == 1:
    type_modifier = type_modifier + " hero"
    stats_modifier = stats_modifier + f"with {row['armor']} armor"
  elif row['Location'] == 1:
    type_modifier = type_modifier + " location"
    stats_modifier = stats_modifier + f"with {row['health']} health"
  ## Add info about various special mechanics
  special_effects = ""
  has_special_effects = False
  for mechanic in MECHANICS:
    if row[mechanic] == 1:
      has_special_effects = True
      special_effects = extendDescription(special_effects, mechanic)
  ## Pull together the pieces of the description
  finaldesc = f"The card named {row['name']} is a {row['cost']} cost {type_modifier} {stats_modifier}"
  if has_special_effects:
    finaldesc = finaldesc + ", and includes the effects " + special_effects
  cleansed_text = str(row['text']).replace('\n', ' ')
  finaldesc = finaldesc + f". The card text says: {cleansed_text}"

  return finaldesc

hs_cards_data_text = hs_cards_data_augment.copy()
hs_cards_data_text['description'] = hs_cards_data_text.apply(lambda row: generateDescriptionHSData(row), axis=1)


  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


In [None]:
## Saving the dataset to look at elsewhere
##------------------------------------------------------------
hs_cards_data_text.to_csv('hs_cards_data_text.csv', index=False)
files.download('hs_cards_data_text.csv')




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#### BERT Classification

In [None]:
from transformers import BertTokenizer, TFBertModel, AutoTokenizer

from transformers import logging
logging.set_verbosity_error()


In [None]:
# import tensorflow_datasets as tfds
# train_data, test_data = tfds.load(
#     name="imdb_reviews",
#     split=('train[:80%]', 'test[80%:]'),
#     as_supervised=True)
# train_examples, train_labels = next(iter(train_data.batch(20000)))
# test_examples, test_labels = next(iter(test_data.batch(5000)))

In [None]:
checkpoint = 'bert-base-cased'
bert_tokenizer = BertTokenizer.from_pretrained(checkpoint)
auto_tokenizer = AutoTokenizer.from_pretrained(checkpoint)

  and should_run_async(code)


In [None]:
## good testing vals: 4, 7
pd.reset_option('display.max_colwidth')
test_str = train_ds_hs_desc['description'][7]
print(test_str)
print(bert_tokenizer.tokenize(train_ds_hs_desc['description'].tolist()[5]))

The card named Cenarius is a 8 cost  minion with 8 health and 5 attack, and includes the effects choose one. The card text says: Choose One plus Give your other minions plus 2 dash plus 2; or Summon two 2 dash 2 Treants with Taunt.
['The', 'card', 'named', 'Ce', '##nar', '##ius', 'is', 'a', '8', 'cost', 'mini', '##on', 'with', '8', 'health', 'and', '5', 'attack', ',', 'and', 'includes', 'the', 'effects', 'choose', 'one', '.', 'The', 'card', 'text', 'says', ':', 'Cho', '##ose', 'One', 'plus', 'Give', 'your', 'other', 'mini', '##ons', 'plus', '2', 'dash', 'plus', '2', ';', 'or', 'Su', '##mm', '##on', 'two', '2', 'dash', '2', 'T', '##rea', '##nts', 'with', 'Tau', '##nt', '.']


In [None]:
# print(train_ds_hs_desc['description'].tolist())
# print(train_ds_hs_desc['description'])
# print(train_ds_hs_desc['classes'].tolist())
# print(train_ds_hs_desc['classes'])

In [None]:
## Inspiration taken from https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/BERT/Fine_tuning_BERT_(and_friends)_for_multi_label_text_classification.ipynb#scrollTo=AFWlSsbZaRLc
MAX_SEQ_LENGTH = 512
## Encode the descriptions
encoding = bert_tokenizer(train_ds_hs_desc['description'].tolist(),
                          padding="max_length",
                          truncation=True,
                          max_length=MAX_SEQ_LENGTH,
                          return_tensors = 'tf')
encoding['labels'] = train_ds_hs_desc['classes'].tolist()


In [None]:
print(encoding['input_ids'])

tf.Tensor(
[[ 101 1109 3621 ...    0    0    0]
 [ 101 1109 3621 ...    0    0    0]
 [ 101 1109 3621 ...    0    0    0]
 ...
 [ 101 1109 3621 ...    0    0    0]
 [ 101 1109 3621 ...    0    0    0]
 [ 101 1109 3621 ...    0    0    0]], shape=(5071, 512), dtype=int32)


  and should_run_async(code)


In [None]:
## Simplistic BERT model creation
def create_bert_classification_model(bert_model,
                                     num_train_layers = 0,
                                     hidden_size = 200,
                                     dropout = 0.3,
                                     learning_rate = 0.00005):
    """
    Build a simple classification model with BERT. Use the Pooler Output for classification purposes
    """
    if num_train_layers == 0:
        # Freeze all layers of pre-trained BERT model
        bert_model.trainable = False
    elif num_train_layers == 12:
        # Train all layers of the BERT model
        bert_model.trainable = True
    else:
        # Restrict training to the num_train_layers outer transformer layers
        retrain_layers = []
        for retrain_layer_number in range(num_train_layers):
            layer_code = '_' + str(11 - retrain_layer_number)
            retrain_layers.append(layer_code)
        print('retrain layers: ', retrain_layers)

        for w in bert_model.weights:
            if not any([x in w.name for x in retrain_layers]):
                #print('freezing: ', w)
                w._trainable = False

    input_ids = tf.keras.layers.Input(shape=(MAX_SEQ_LENGTH,), dtype=tf.int64, name='input_ids_layer')
    token_type_ids = tf.keras.layers.Input(shape=(MAX_SEQ_LENGTH,), dtype=tf.int64, name='token_type_ids_layer')
    attention_mask = tf.keras.layers.Input(shape=(MAX_SEQ_LENGTH,), dtype=tf.int64, name='attention_mask_layer')

    ## Set up the BERT model; this will use the CLS token input
    bert_inputs = {'input_ids': input_ids,
                   'token_type_ids': token_type_ids,
                   'attention_mask': attention_mask}
    bert_out = bert_model(bert_inputs)
    cls_token = bert_out[0][:, 0, :]

    hidden = tf.keras.layers.Dense(hidden_size, activation='relu', name='hidden_layer')(cls_token)
    hidden = tf.keras.layers.Dropout(dropout)(hidden)

    classification = tf.keras.layers.Dense(12, activation='softmax',name='classification')(hidden)
    classification_model = tf.keras.Model(inputs=[input_ids, token_type_ids, attention_mask], outputs=[classification])
    classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                                 loss='sparse_categorical_crossentropy',
                                 metrics='accuracy')

    return classification_model

In [None]:
bert_model = TFBertModel.from_pretrained(checkpoint)
bert_classification_model = create_bert_classification_model(bert_model, num_train_layers=0)



ValueError: Exception encountered when calling layer 'tf_bert_model_3' (type TFBertModel).

Data of type <class 'keras.src.engine.keras_tensor.KerasTensor'> is not allowed only (<class 'tensorflow.python.framework.tensor.Tensor'>, <class 'bool'>, <class 'int'>, <class 'transformers.utils.generic.ModelOutput'>, <class 'tuple'>, <class 'list'>, <class 'dict'>, <class 'numpy.ndarray'>) is accepted for input_ids.

Call arguments received by layer 'tf_bert_model_3' (type TFBertModel):
  • input_ids={'input_ids': "<KerasTensor: shape=(None, 512) dtype=int64 (created by layer 'input_ids_layer')>", 'token_type_ids': "<KerasTensor: shape=(None, 512) dtype=int64 (created by layer 'token_type_ids_layer')>", 'attention_mask': "<KerasTensor: shape=(None, 512) dtype=int64 (created by layer 'attention_mask_layer')>"}
  • attention_mask=None
  • token_type_ids=None
  • position_ids=None
  • head_mask=None
  • inputs_embeds=None
  • encoder_hidden_states=None
  • encoder_attention_mask=None
  • past_key_values=None
  • use_cache=None
  • output_attentions=None
  • output_hidden_states=None
  • return_dict=None
  • training=False