## Creating a model of text-classification using ag_news_subset dataset

In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_hub as hub
from tensorflow.keras.layers import Dense, Lambda
from tensorflow.keras import Sequential




In [3]:
train_data, val_data, test_data = tfds.load(
    name="ag_news_subset",
    split=('train', 'test[:40%]', 'test[40:]'),
    as_supervised=True
)

In [5]:
train_example_batch, train_labels_batch = next(iter(train_data.batch(10)))

In [7]:
train_example_batch

<tf.Tensor: shape=(10,), dtype=string, numpy=
array([b'AMD #39;s new dual-core Opteron chip is designed mainly for corporate computing applications, including databases, Web services, and financial transactions.',
       b'Reuters - Major League Baseball\\Monday announced a decision on the appeal filed by Chicago Cubs\\pitcher Kerry Wood regarding a suspension stemming from an\\incident earlier this season.',
       b'President Bush #39;s  quot;revenue-neutral quot; tax reform needs losers to balance its winners, and people claiming the federal deduction for state and local taxes may be in administration planners #39; sights, news reports say.',
       b'Britain will run out of leading scientists unless science education is improved, says Professor Colin Pillinger.',
       b'London, England (Sports Network) - England midfielder Steven Gerrard injured his groin late in Thursday #39;s training session, but is hopeful he will be ready for Saturday #39;s World Cup qualifier against Austri

In [9]:
train_labels_batch

<tf.Tensor: shape=(10,), dtype=int64, numpy=array([3, 1, 2, 3, 1, 0, 3, 0, 0, 1])>

In [11]:
for example, label in train_data.take(5):
    print("Label:", label.numpy())

Label: 3
Label: 1
Label: 2
Label: 3
Label: 1


In [15]:
hub_layer = hub.KerasLayer("https://tfhub.dev/google/nnlm-en-dim50/2", trainable=True)

In [16]:
print("Hub layer config:", hub_layer.get_config())

Hub layer config: {'name': 'keras_layer_1', 'trainable': True, 'dtype': 'float32', 'handle': 'https://tfhub.dev/google/nnlm-en-dim50/2'}


In [19]:
def embed_text(input_text):
    return hub_layer(input_text)

In [23]:
model = Sequential([
    Lambda(embed_text, input_shape=[], dtype=tf.float32),
    Dense(16, activation='relu'),
    Dense(4, activation='softmax')
])

In [25]:
model.summary()

In [27]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

In [29]:
history = model.fit(
    train_data.shuffle(100).batch(10),
    epochs=25,
    validation_data=val_data.batch(10),
    verbose=1
)

Epoch 1/25
[1m12000/12000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 6ms/step - accuracy: 0.8221 - loss: 0.5186 - val_accuracy: 0.8566 - val_loss: 0.4079
Epoch 2/25
[1m12000/12000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 6ms/step - accuracy: 0.8525 - loss: 0.4112 - val_accuracy: 0.8589 - val_loss: 0.4023
Epoch 3/25
[1m12000/12000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 6ms/step - accuracy: 0.8548 - loss: 0.4040 - val_accuracy: 0.8609 - val_loss: 0.3980
Epoch 4/25
[1m12000/12000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 6ms/step - accuracy: 0.8557 - loss: 0.3993 - val_accuracy: 0.8609 - val_loss: 0.3963
Epoch 5/25
[1m12000/12000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 6ms/step - accuracy: 0.8573 - loss: 0.3961 - val_accuracy: 0.8655 - val_loss: 0.3953
Epoch 6/25
[1m12000/12000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 6ms/step - accuracy: 0.8583 - loss: 0.3938 - val_accuracy: 0.8658 - val_loss: 0.394

In [31]:
model.summary()

In [33]:
results = model.evaluate(test_data.batch(100), verbose=2)

76/76 - 1s - 12ms/step - accuracy: 0.8558 - loss: 0.3991


In [35]:
for name, value in zip(model.metrics_names, results):
    print("%s: %.3f" % (name, value))

loss: 0.399
compile_metrics: 0.856


### Checking the model with example

In [38]:
dummy_input = tf.constant(["This is a dummy text example"])
model(dummy_input)

<tf.Tensor: shape=(1, 4), dtype=float32, numpy=
array([[3.2596132e-03, 2.5743537e-04, 2.2198003e-02, 9.7428495e-01]],
      dtype=float32)>

In [40]:
model.summary()

## Checking lambda layer is even working or not?

In [43]:
sample_text = tf.constant(["This is a test sentence."])

In [45]:
lambda_layer = model.layers[0]
embedding_output = lambda_layer(sample_text)

In [47]:
print("Lambda Layer Output:", embedding_output)

Lambda Layer Output: tf.Tensor(
[[ 0.02556921  0.36595875  0.2743366   0.25243077 -0.07800581 -0.02379442
   0.07319438  0.18354312 -0.31369522 -0.0056898  -0.16465114  0.05714218
   0.12722859 -0.01569714  0.19789337 -0.33734787 -0.15531759  0.09683281
   0.0379135  -0.1120435  -0.14598507  0.0424215   0.01771358  0.06913725
   0.04719152 -0.01121329 -0.2587691  -0.00133109  0.10966404 -0.12126026
  -0.17014721  0.04834424 -0.14242361 -0.05091047  0.10296395  0.01767956
   0.13141309 -0.17820449  0.11892072  0.01697081 -0.13756834  0.05378134
   0.13654892  0.24253245 -0.49445114 -0.2375425   0.03911211  0.01126521
   0.0217822   0.21144976]], shape=(1, 50), dtype=float32)


In [49]:
print("Output dtype:", embedding_output.dtype)

Output dtype: <dtype: 'float32'>


In [51]:
print("Output shape:", embedding_output.shape)

Output shape: (1, 50)


## Checking the model output and data-types

In [54]:
model_output = model(sample_text)
print("Model output (after Dense layers):", model_output)

Model output (after Dense layers): tf.Tensor([[0.01599571 0.01882405 0.04537682 0.9198035 ]], shape=(1, 4), dtype=float32)


In [56]:
hub_output = hub_layer(sample_text)

In [58]:
print("Hub layer output (embeddings):", hub_output)

Hub layer output (embeddings): tf.Tensor(
[[ 0.02556921  0.36595875  0.2743366   0.25243077 -0.07800581 -0.02379442
   0.07319438  0.18354312 -0.31369522 -0.0056898  -0.16465114  0.05714218
   0.12722859 -0.01569714  0.19789337 -0.33734787 -0.15531759  0.09683281
   0.0379135  -0.1120435  -0.14598507  0.0424215   0.01771358  0.06913725
   0.04719152 -0.01121329 -0.2587691  -0.00133109  0.10966404 -0.12126026
  -0.17014721  0.04834424 -0.14242361 -0.05091047  0.10296395  0.01767956
   0.13141309 -0.17820449  0.11892072  0.01697081 -0.13756834  0.05378134
   0.13654892  0.24253245 -0.49445114 -0.2375425   0.03911211  0.01126521
   0.0217822   0.21144976]], shape=(1, 50), dtype=float32)


In [60]:
print("Hub output dtype:", hub_output.dtype)
print("Hub output shape:", hub_output.shape)

Hub output dtype: <dtype: 'float32'>
Hub output shape: (1, 50)
