# Playground notebook

## Import Packages

In [1]:
import tensorflow as tf
import wandb
from tensorflow import keras
import os
import numpy as np
from official.nlp import optimization
import tensorflow_hub as hub
import tensorflow_text as text
from asgard.utils.data_loader import load_datasets
from asgard.callbacks.callbacks import EarlyStoppingHammingScore
from asgard.metrics.metrics import HammingScoreMetric

2023-07-06 19:36:01.936049: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-07-06 19:36:02.585767: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-07-06 19:36:05.246351: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-12.1/lib64:/usr/local/cuda-12.1/lib64:/usr/local/cuda-12.1/lib64:
2023-

## Load Dataset and Model

In [2]:
def get_class_weight(train_set, class_weight_kind="balanced"):
  if (class_weight_kind is None) or (class_weight_kind == "None"):
    class_weights = None

  elif class_weight_kind == "balanced":
    class_weights = compute_class_weights(train_set)

  elif class_weight_kind == "two-to-one":
    class_weights = np.zeros((16, 2))
    class_weights[:, 0] = 1.0
    class_weights[:, 1] = 2.0

  return class_weights


def get_weighted_loss(weights):
  def weighted_loss(y_train, y_pred):
    return keras.backend.mean(
      (weights[:, 0] ** (1 - y_train))
      * (weights[:, 1] ** y_train)
      * keras.backend.binary_crossentropy(y_train, y_pred),
      axis=-1,
    )

  return weighted_loss

In [3]:
# Define number of epochs
epochs = 4
steps_per_epoch = 24813 # tf.data.experimental.cardinality(train_set).numpy()

# Define optimizer
num_train_steps = steps_per_epoch * epochs
num_warmup_steps = int(0.05 * num_train_steps)

init_lr = 3e-5
optimizer = optimization.create_optimizer(init_lr=init_lr,
                                          num_train_steps=num_train_steps,
                                          num_warmup_steps=num_warmup_steps,
                                          optimizer_type='adamw')

tfhub_encoder_handler = "https://tfhub.dev/jeongukjae/distilbert_en_uncased_L-6_H-768_A-12/1"
tfhub_preprocess_handler = "https://tfhub.dev/jeongukjae/distilbert_en_uncased_preprocess/2"

[2023-07-06 19:36:09] absl - INFO: using Adamw optimizer
[2023-07-06 19:36:09] absl - INFO: AdamWeightDecay gradient_clip_norm=1.000000


In [4]:
train_set, valid_set, test_set = load_datasets("../storage/datasets/tf_raw")

2023-07-06 19:36:11.858658: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-07-06 19:36:12.368991: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-07-06 19:36:12.370830: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-07-06 19:36:12.374879: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operati

In [5]:
target_folder = '../api/model/model-avid-sweep-1:v2'

weights = np.zeros((16, 2))
weights[:, 0] = 1.
weights[:, 1] = 2.

lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
    initial_learning_rate=init_lr,
    decay_steps=num_train_steps,
    end_learning_rate=0.0,
    power=1.0)

model = tf.keras.models.load_model(target_folder,
                                   custom_objects={"weighted_loss": get_weighted_loss(weights),
                                                   "AdamWeightDecay": optimization.create_optimizer(init_lr=init_lr,
                                                                                                    num_train_steps=num_train_steps,
                                                                                                    num_warmup_steps=num_warmup_steps,
                                                                                                    optimizer_type='adamw'),
                                                   "WarmUp": optimization.WarmUp(initial_learning_rate=init_lr,
                                                                                 decay_schedule_fn=lr_schedule,
                                                                                 warmup_steps=num_warmup_steps),
                                                   "HammingScoreMetric": HammingScoreMetric()}
                                   )

[2023-07-06 19:36:14] absl - INFO: using Adamw optimizer
[2023-07-06 19:36:14] absl - INFO: AdamWeightDecay gradient_clip_norm=1.000000
[2023-07-06 19:36:36] absl - INFO: AdamWeightDecay gradient_clip_norm=1.000000


## Explainable AI

In [6]:
sample = "Law enforcement effects on marine life preservation in the South Pacific"

In [25]:
len(set(sample.split(" ")))

11

In [7]:
import eli5
import pandas as pd
from eli5.lime import TextExplainer
from sklearn.feature_extraction.text import CountVectorizer

In [8]:
# vec = CountVectorizer(ngram_range=(1, 2), token_pattern='(?u)\\b\\w+\\b')
# vec.fit([sample])

In [9]:
# vec.get_feature_names_out()

In [29]:

te = TextExplainer(random_state=42, n_samples=500)
te.fit(sample, model.predict)





In [11]:
explanation = te.explain_weights(target_names=[f"SDG {i+1}" for i in range(16)],
                                 feature_names=te.vec_.get_feature_names_out(), top=10)

In [27]:
te.show_weights(target_names=[f"SDG {i+1}" for i in range(16)], feature_names=te.vec_.get_feature_names_out())

Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3
Weight?,Feature,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4
Weight?,Feature,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5,Unnamed: 8_level_5,Unnamed: 9_level_5,Unnamed: 10_level_5,Unnamed: 11_level_5,Unnamed: 12_level_5,Unnamed: 13_level_5,Unnamed: 14_level_5,Unnamed: 15_level_5
Weight?,Feature,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6,Unnamed: 8_level_6,Unnamed: 9_level_6,Unnamed: 10_level_6,Unnamed: 11_level_6,Unnamed: 12_level_6,Unnamed: 13_level_6,Unnamed: 14_level_6,Unnamed: 15_level_6
Weight?,Feature,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7,Unnamed: 8_level_7,Unnamed: 9_level_7,Unnamed: 10_level_7,Unnamed: 11_level_7,Unnamed: 12_level_7,Unnamed: 13_level_7,Unnamed: 14_level_7,Unnamed: 15_level_7
Weight?,Feature,Unnamed: 2_level_8,Unnamed: 3_level_8,Unnamed: 4_level_8,Unnamed: 5_level_8,Unnamed: 6_level_8,Unnamed: 7_level_8,Unnamed: 8_level_8,Unnamed: 9_level_8,Unnamed: 10_level_8,Unnamed: 11_level_8,Unnamed: 12_level_8,Unnamed: 13_level_8,Unnamed: 14_level_8,Unnamed: 15_level_8
Weight?,Feature,Unnamed: 2_level_9,Unnamed: 3_level_9,Unnamed: 4_level_9,Unnamed: 5_level_9,Unnamed: 6_level_9,Unnamed: 7_level_9,Unnamed: 8_level_9,Unnamed: 9_level_9,Unnamed: 10_level_9,Unnamed: 11_level_9,Unnamed: 12_level_9,Unnamed: 13_level_9,Unnamed: 14_level_9,Unnamed: 15_level_9
Weight?,Feature,Unnamed: 2_level_10,Unnamed: 3_level_10,Unnamed: 4_level_10,Unnamed: 5_level_10,Unnamed: 6_level_10,Unnamed: 7_level_10,Unnamed: 8_level_10,Unnamed: 9_level_10,Unnamed: 10_level_10,Unnamed: 11_level_10,Unnamed: 12_level_10,Unnamed: 13_level_10,Unnamed: 14_level_10,Unnamed: 15_level_10
Weight?,Feature,Unnamed: 2_level_11,Unnamed: 3_level_11,Unnamed: 4_level_11,Unnamed: 5_level_11,Unnamed: 6_level_11,Unnamed: 7_level_11,Unnamed: 8_level_11,Unnamed: 9_level_11,Unnamed: 10_level_11,Unnamed: 11_level_11,Unnamed: 12_level_11,Unnamed: 13_level_11,Unnamed: 14_level_11,Unnamed: 15_level_11
Weight?,Feature,Unnamed: 2_level_12,Unnamed: 3_level_12,Unnamed: 4_level_12,Unnamed: 5_level_12,Unnamed: 6_level_12,Unnamed: 7_level_12,Unnamed: 8_level_12,Unnamed: 9_level_12,Unnamed: 10_level_12,Unnamed: 11_level_12,Unnamed: 12_level_12,Unnamed: 13_level_12,Unnamed: 14_level_12,Unnamed: 15_level_12
Weight?,Feature,Unnamed: 2_level_13,Unnamed: 3_level_13,Unnamed: 4_level_13,Unnamed: 5_level_13,Unnamed: 6_level_13,Unnamed: 7_level_13,Unnamed: 8_level_13,Unnamed: 9_level_13,Unnamed: 10_level_13,Unnamed: 11_level_13,Unnamed: 12_level_13,Unnamed: 13_level_13,Unnamed: 14_level_13,Unnamed: 15_level_13
Weight?,Feature,Unnamed: 2_level_14,Unnamed: 3_level_14,Unnamed: 4_level_14,Unnamed: 5_level_14,Unnamed: 6_level_14,Unnamed: 7_level_14,Unnamed: 8_level_14,Unnamed: 9_level_14,Unnamed: 10_level_14,Unnamed: 11_level_14,Unnamed: 12_level_14,Unnamed: 13_level_14,Unnamed: 14_level_14,Unnamed: 15_level_14
Weight?,Feature,Unnamed: 2_level_15,Unnamed: 3_level_15,Unnamed: 4_level_15,Unnamed: 5_level_15,Unnamed: 6_level_15,Unnamed: 7_level_15,Unnamed: 8_level_15,Unnamed: 9_level_15,Unnamed: 10_level_15,Unnamed: 11_level_15,Unnamed: 12_level_15,Unnamed: 13_level_15,Unnamed: 14_level_15,Unnamed: 15_level_15
+0.767,the south,,,,,,,,,,,,,,
+0.665,life preservation,,,,,,,,,,,,,,
+0.586,in the,,,,,,,,,,,,,,
+0.241,effects on,,,,,,,,,,,,,,
+0.133,preservation in,,,,,,,,,,,,,,
-0.031,on marine,,,,,,,,,,,,,,
-0.159,south,,,,,,,,,,,,,,
-0.289,marine life,,,,,,,,,,,,,,
-0.336,effects,,,,,,,,,,,,,,
-0.445,<BIAS>,,,,,,,,,,,,,,

Weight?,Feature
0.767,the south
0.665,life preservation
0.586,in the
0.241,effects on
0.133,preservation in
-0.031,on marine
-0.159,south
-0.289,marine life
-0.336,effects
-0.445,<BIAS>

Weight?,Feature
+0.768,law enforcement
+0.716,on marine
+0.557,the south
+0.302,effects on
+0.295,enforcement effects
+0.239,in the
+0.153,life preservation
… 1 more positive …,… 1 more positive …
-0.203,south pacific
-0.310,life

Weight?,Feature
0.664,on marine
0.476,south pacific
0.328,law enforcement
0.22,effects on
0.087,life preservation
-0.044,preservation in
-0.344,life
-0.485,<BIAS>
-0.571,effects
-0.665,south

Weight?,Feature
0.288,on marine
0.275,in the
0.15,enforcement effects
0.124,preservation in
0.096,the south
-0.13,law enforcement
-0.236,south pacific
-0.418,<BIAS>
-0.419,in
-0.461,effects

Weight?,Feature
+0.631,in the
+0.576,law enforcement
+0.339,life preservation
+0.239,life
+0.231,south pacific
+0.207,the south
+0.206,enforcement effects
… 1 more positive …,… 1 more positive …
-0.424,on marine
-0.432,<BIAS>

Weight?,Feature
+0.993,on marine
+0.966,the south
+0.765,law enforcement
+0.492,marine life
+0.387,life preservation
+0.328,in the
+0.319,preservation in
+0.289,effects on
+0.274,south pacific
… 2 more negative …,… 2 more negative …

Weight?,Feature
+0.715,the south
+0.534,marine life
+0.510,in the
+0.501,enforcement effects
+0.462,law enforcement
+0.379,on marine
+0.358,south pacific
+0.345,effects on
… 1 more positive …,… 1 more positive …
-0.578,<BIAS>

Weight?,Feature
0.614,law enforcement
0.611,in the
0.483,preservation in
0.418,enforcement effects
0.247,effects on
0.235,south pacific
0.209,on marine
0.172,life preservation
0.149,marine life
-0.184,on

Weight?,Feature
+0.752,on marine
+0.654,effects on
+0.626,preservation in
+0.418,law enforcement
+0.357,life preservation
+0.334,marine life
+0.163,the south
+0.162,enforcement effects
… 2 more positive …,… 2 more positive …
-0.378,pacific

Weight?,Feature
0.398,in the
0.355,life preservation
0.287,law enforcement
0.242,the south
0.139,enforcement effects
0.07,effects on
-0.062,effects
-0.117,life
-0.123,law
-0.127,south

Weight?,Feature
0.97,law enforcement
0.852,preservation in
0.471,life preservation
0.254,in the
0.12,preservation
0.09,the south
0.087,effects on
-0.115,enforcement effects
-0.324,effects
-0.548,on

Weight?,Feature
+0.889,on marine
+0.806,south pacific
+0.790,the south
+0.524,life preservation
+0.518,effects on
+0.472,law enforcement
+0.374,preservation in
+0.319,marine life
+0.162,enforcement effects
… 1 more positive …,… 1 more positive …

Weight?,Feature
+0.758,preservation in
+0.669,law enforcement
+0.667,marine life
+0.598,south pacific
+0.515,the south
+0.424,effects on
+0.356,on marine
+0.179,life preservation
+0.069,marine
… 1 more positive …,… 1 more positive …

Weight?,Feature
+1.168,marine
+0.452,effects on
+0.387,the south
+0.382,law enforcement
+0.370,preservation in
+0.327,marine life
+0.262,enforcement effects
+0.225,on marine
+0.215,pacific
+0.103,south pacific

Weight?,Feature
+0.993,preservation
+0.610,law enforcement
+0.540,in the
+0.487,south pacific
+0.479,enforcement effects
+0.440,effects on
+0.401,marine life
+0.184,life preservation
+0.180,preservation in
… 2 more positive …,… 2 more positive …

Weight?,Feature
+1.009,law
+0.345,preservation in
+0.301,enforcement
+0.248,in the
+0.213,life preservation
+0.203,south pacific
+0.181,effects on
+0.114,the south
… 2 more positive …,… 2 more positive …
-0.074,marine life


In [22]:
pd.read_html(
    eli5.show_weights(te.clf_, 
                      target_names=[f"SDG {i+1}" for i in range(16)], 
                      feature_names=te.vec_.get_feature_names_out())
    .data
    )[14]

Unnamed: 0,Weight?,Feature
0,1.338,marine
1,0.937,law enforcement
2,0.912,life preservation
3,0.744,in the
4,0.7,enforcement effects
5,0.603,marine life
6,0.494,south pacific
7,0.311,effects on
8,0.017,on marine
9,-0.104,preservation


In [8]:
# class AsgardNormalized(tf.keras.Model):
#     def __init__(self, model, **kwargs):
#         super().__init__(**kwargs)
#         self.model = model
        
#     def call(self, inputs):
#         return self.model(inputs)
        
#     def predict(self, inputs):
#         if len(inputs) == 1:
#             inputs = [inputs]
#             inputs = [tf.constant([input], dtype=tf.string) for input in inputs]
#         # else: 
#             # inputs = [tf.constant(input, dtype=tf.string) for input in inputs]

#         probabilities = self.model(inputs)
#         norm = tf.reduce_sum(probabilities)
#         normalized_probabilities = tf.divide(probabilities, norm)
#         return normalized_probabilities
# normalized_model = AsgardNormalized(model)