In [1]:
import tensorflow as tf
import wandb
from tensorflow import keras
import os
import numpy as np
from official.nlp import optimization
import tensorflow_hub as hub
import tensorflow_text as text
from asgard.utils.data_loader import load_datasets
from asgard.callbacks.callbacks import EarlyStoppingHammingScore
from asgard.metrics.metrics import HammingScoreMetric

2023-06-28 05:06:07.319089: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-28 05:06:07.468011: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-06-28 05:06:08.855139: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.1/lib64:/usr/local/cuda-12.1/lib64:/usr/local/cuda-12.1/lib64:
2023-

In [2]:
def get_class_weight(train_set, class_weight_kind="balanced"):
  if (class_weight_kind is None) or (class_weight_kind == "None"):
    class_weights = None

  elif class_weight_kind == "balanced":
    class_weights = compute_class_weights(train_set)

  elif class_weight_kind == "two-to-one":
    class_weights = np.zeros((16, 2))
    class_weights[:, 0] = 1.0
    class_weights[:, 1] = 2.0

  return class_weights


def get_weighted_loss(weights):
  def weighted_loss(y_train, y_pred):
    return keras.backend.mean(
      (weights[:, 0] ** (1 - y_train))
      * (weights[:, 1] ** y_train)
      * keras.backend.binary_crossentropy(y_train, y_pred),
      axis=-1,
    )

  return weighted_loss

In [3]:
# Define number of epochs
epochs = 4
steps_per_epoch = 24813 # tf.data.experimental.cardinality(train_set).numpy()

# Define optimizer
num_train_steps = steps_per_epoch * epochs
num_warmup_steps = int(0.05 * num_train_steps)

init_lr = 3e-5
optimizer = optimization.create_optimizer(init_lr=init_lr,
                                          num_train_steps=num_train_steps,
                                          num_warmup_steps=num_warmup_steps,
                                          optimizer_type='adamw')

[2023-06-28 05:06:10] absl - INFO: using Adamw optimizer
[2023-06-28 05:06:10] absl - INFO: AdamWeightDecay gradient_clip_norm=1.000000


In [4]:
os.getcwd()

'/home/ubuntu/git/asgard/notebooks'

In [5]:
tfhub_encoder_handler = "https://tfhub.dev/jeongukjae/distilbert_en_uncased_L-6_H-768_A-12/1"
tfhub_preprocess_handler = "https://tfhub.dev/jeongukjae/distilbert_en_uncased_preprocess/2"

In [6]:
train_set, valid_set, test_set = load_datasets("../storage/datasets/tf_raw")

subsample_size = 100

# Shuffle the dataset
test_shuffled_dataset = test_set.shuffle(buffer_size=100)

# Take a subsample from the shuffled dataset
test_subsample = test_shuffled_dataset.take(subsample_size)

2023-06-28 05:06:15.492431: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-06-28 05:06:15.582099: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-06-28 05:06:15.584927: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-06-28 05:06:15.587747: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operati

In [7]:
run = wandb.init()
artifact = run.use_artifact('alexandre-hsd/ASGARD-DistilBERT/model-vibrant-sweep-14:v2', type='model')
artifact_dir = artifact.download()

[2023-06-28 05:06:26] wandb.jupyter - ERROR: Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33malexandre-hsd[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Downloading large artifact model-vibrant-sweep-14:v2, 772.98MB. 6 files... 
[34m[1mwandb[0m:   6 of 6 files downloaded.  
Done. 0:0:4.4


In [8]:
artifact_dir

'./artifacts/model-vibrant-sweep-14:v2'

In [9]:
target_folder = './artifacts/model-vibrant-sweep-14:v2'

weights = np.zeros((16, 2))
weights[:, 0] = 1.
weights[:, 1] = 2.

lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=init_lr,
    decay_steps=num_train_steps,
    end_learning_rate=0.0,
    power=1.0)

model = tf.keras.models.load_model(target_folder,
                                   custom_objects={"weighted_loss": get_weighted_loss(weights),
                                                   "AdamWeightDecay": optimization.create_optimizer(init_lr=init_lr,
                                                                                                    num_train_steps=num_train_steps,
                                                                                                    num_warmup_steps=num_warmup_steps,
                                                                                                    optimizer_type='adamw'),
                                                   "WarmUp": optimization.WarmUp(initial_learning_rate=init_lr,
                                                                                 decay_schedule_fn=lr_schedule,
                                                                                 warmup_steps=num_warmup_steps),
                                                   "HammingScoreMetric": HammingScoreMetric()}
                                   )

[2023-06-28 05:07:43] absl - INFO: using Adamw optimizer
[2023-06-28 05:07:43] absl - INFO: AdamWeightDecay gradient_clip_norm=1.000000
[2023-06-28 05:07:59] absl - INFO: AdamWeightDecay gradient_clip_norm=1.000000


In [10]:
predictions = model.predict(test_subsample)



In [19]:
predictions[0]

array([0.99002004, 0.13448282, 0.568767  , 0.24128814, 0.5312531 ,
       0.13886882, 0.00776579, 0.56381345, 0.0501148 , 0.96482897,
       0.40658736, 0.04535012, 0.08588963, 0.0132065 , 0.04039462,
       0.3102229 ], dtype=float32)

In [15]:
predictions[0]

array([0.47885454, 0.09220963, 0.28528795, 0.2871287 , 0.10069013,
       0.09461151, 0.06952499, 0.45123702, 0.33866397, 0.4710489 ,
       0.13410343, 0.6900652 , 0.29761913, 0.0959122 , 0.12628393,
       0.90638286], dtype=float32)

In [10]:
predictions[0]

array([0.09356621, 0.08309215, 0.13621739, 0.04193821, 0.05000394,
       0.29958984, 0.03654183, 0.10060693, 0.0481165 , 0.0402372 ,
       0.790424  , 0.18693845, 0.5943945 , 0.930888  , 0.3253251 ,
       0.03340747], dtype=float32)

In [11]:
predictions[0]

array([0.3458106 , 0.06801132, 0.0663984 , 0.1316111 , 0.12827072,
       0.08205006, 0.16548498, 0.8244478 , 0.89248174, 0.73548937,
       0.05578999, 0.9792474 , 0.27790752, 0.12828353, 0.5777957 ,
       0.33591527], dtype=float32)

In [12]:
first_element = next(iter(test_subsample.take(1)))
print(first_element[0][0], first_element[1][0])

tf.Tensor(b'COVID-19: Lessons for junior doctors redeployed to critical care', shape=(), dtype=string) tf.Tensor([0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.], shape=(16,), dtype=float32)
