In [28]:
## importing necessary libraries
import tensorflow as tf
import pandas as pd
import tensorflow_text as text
import wandb
import tensorflow_hub as hub
from wandb.keras import WandbCallback

## Initializing W&B run

In [2]:
PROJECT_NAME = "banking_77"
JOB_TYPE = "baseline"
ENTITY = "basha"
SPLIT_DATA = "preprocess"

In [3]:
run = wandb.init(project=PROJECT_NAME,job_type=JOB_TYPE,entity=ENTITY)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: basha. Use `wandb login --relogin` to force relogin


## Batching data of W&B Artifact
We are downloading preprocesed data from W&B and batching the dataset using tf.data. This will help to use GPU when try to use the dataset.

In [4]:
split = run.use_artifact(f'{SPLIT_DATA}:latest')
split_dir = split.download()

In [6]:
train_df = pd.read_csv(f'{split_dir}//train_split.csv')
valid_df = pd.read_csv(f'{split_dir}//valid_split.csv')
test_df = pd.read_csv(f'{split_dir}//test_split.csv')

In [8]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = tf.data.Dataset.from_tensor_slices((train_df['text'],train_df['label'])).batch(32).cache().prefetch(buffer_size=AUTOTUNE)
valid_ds = tf.data.Dataset.from_tensor_slices((valid_df['text'],valid_df['label'])).batch(32).cache().prefetch(buffer_size=AUTOTUNE)
test_ds = tf.data.Dataset.from_tensor_slices((test_df['text'],test_df['label'])).batch(32).cache().prefetch(buffer_size=AUTOTUNE)

In [10]:
text_batch_1, text_label_1 = next(iter(train_ds)) # pick first batch from dataset
text , label = text_batch_1[0],text_label_1[0] # taking first text and label from batch
text , label # prints first text and label

(<tf.Tensor: shape=(), dtype=string, numpy=b'Will you send me a new card in China?'>,
 <tf.Tensor: shape=(), dtype=int64, numpy=9>)

### Logging model configuration in W&B

In [23]:
from ml_collections import config_dict

cfg = config_dict.ConfigDict()
cfg.preprocessor = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'
cfg.encoder = 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/2'
cfg.bs = 32
cfg.seed = 42
cfg.arch = 'bert_L-2_H-128_A-2'
cfg.learning_rate = 1e-5

In [24]:
wandb.config.update(cfg.to_dict())

## Model building

In [25]:
def make_model():
    text_input = tf.keras.Input(shape=(),dtype=tf.string,name="input_layer")
    preprocessor = hub.KerasLayer(cfg.preprocessor,name="preprocessor")
    encoder_inputs = preprocessor(text_input)

    encoder = hub.KerasLayer(cfg.encoder,trainable=True,name='BERT_encoder')
    outputs = encoder(encoder_inputs)

    pooled_output = outputs['pooled_output']

    #create Dense layer for classifier
    # net = tf.keras.layers.Dropout(0.1)(pooled_output)
    net = tf.keras.layers.Dense(77,activation='softmax')(pooled_output) #predict 77 classes
    return tf.keras.Model(text_input, net)

###  Creating and compiling a model

In [26]:
model = make_model()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=cfg.learning_rate),
                         loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                         metrics=['accuracy'])

### Training a model

In [29]:
model.fit(train_ds,validation_data=valid_ds,epochs=5,callbacks=[WandbCallback()])



Instructions for updating:
Use `tf.compat.v1.graph_util.tensor_shape_from_node_def_name`


Instructions for updating:
Use `tf.compat.v1.graph_util.tensor_shape_from_node_def_name`


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x131b8390280>

### Predictions on Test set

In [30]:
preds = model.predict(test_ds)



In [31]:
preds = tf.math.argmax(preds,1)

In [37]:
tf.math.argmax(model.predict(tf.constant(['How do I locate my card?'])),1)



<tf.Tensor: shape=(1,), dtype=int64, numpy=array([39], dtype=int64)>

In [33]:
test_df['predicted'] = preds

### Logging Predictions table

In [40]:
run.log({'predictions_table': wandb.Table(dataframe=test_df)})

### Evaluating train set

In [41]:
model.evaluate(train_ds) # train set predicitions



[3.4553279876708984, 0.27282825112342834]

### Evaluating validation set

In [42]:
model.evaluate(valid_ds) # valid set predicitions



[3.4728615283966064, 0.2687312662601471]

### Shutting down W&B run

In [43]:
run.finish()

VBox(children=(Label(value='51.505 MB of 51.505 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
accuracy,▁▂▃▅█
epoch,▁▃▅▆█
loss,█▆▅▃▁
val_accuracy,▁▂▄▆█
val_loss,█▇▅▃▁

0,1
GFLOPS,0.00094
accuracy,0.16563
best_epoch,4.0
best_val_loss,3.47286
epoch,4.0
loss,3.77234
val_accuracy,0.26873
val_loss,3.47286
