<a href="https://colab.research.google.com/github/intimanjunath/deep-learning-keras-hub-teachable-machine/blob/main/keras_hub_text.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Easy Level

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
# Sample data for binary sentiment classification
texts_easy = [
    "I love this product!",
    "This is the worst thing I've ever bought.",
    "Not bad, could be better.",
    "Absolutely fantastic service."
]
labels_easy = [1, 0, 1, 1]  # 1 = positive, 0 = negative

In [None]:
# Tokenizer setup
max_vocab = 1000
max_len = 10
tokenizer = Tokenizer(num_words=max_vocab, oov_token="<UNK>")
tokenizer.fit_on_texts(texts_easy)
encoded = tokenizer.texts_to_sequences(texts_easy)
padded = pad_sequences(encoded, maxlen=max_len, padding='post')

In [None]:
# Define simple embedding-based model
model_easy = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=max_vocab, output_dim=16, input_length=max_len),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model_easy.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_easy.summary()



In [None]:
# Train the model
model_easy.fit(padded, np.array(labels_easy), epochs=20, batch_size=2, verbose=0)

<keras.src.callbacks.history.History at 0x79f18c765b10>

In [None]:
# Evaluate on test examples
test_texts = ["I really enjoyed this!", "Terrible experience."]
test_seq = tokenizer.texts_to_sequences(test_texts)
test_pad = pad_sequences(test_seq, maxlen=max_len, padding='post')

preds_easy = model_easy.predict(test_pad)
print("\n[Easy] Sentiment Predictions:")
for i, txt in enumerate(test_texts):
    print(f"Text: {txt} → Sentiment score: {preds_easy[i][0]:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 96ms/step

[Easy] Sentiment Predictions:
Text: I really enjoyed this! → Sentiment score: 0.59
Text: Terrible experience. → Sentiment score: 0.61


#intermediate

In [None]:
# This model fine-tunes BERT to predict sentiment from raw text.
# We force the BERT preprocessor to run on CPU to avoid resource conflicts.

# Sample training data.
intermediate_sentences = [
    "I loved the movie. It was fantastic!",
    "The film was boring and too long.",
    "What an excellent performance!",
    "Terrible movie. I hated it."
]
intermediate_labels = [1, 0, 1, 0]

import tensorflow_hub as hub
import tensorflow_text as text  # Make sure this version is compatible with your TF version

# Custom wrapper for BERT preprocessor.
class WrappedBERTPreprocessor(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(WrappedBERTPreprocessor, self).__init__(**kwargs)
        self.preprocessor_layer = hub.KerasLayer(
            "https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3",
            name="bert_preprocess"
        )
    def call(self, inputs):
        # Force the preprocessor to run on CPU.
        with tf.device("/CPU:0"):
            return self.preprocessor_layer(inputs)

# Define the BERT sentiment model.
input_text_bert = tf.keras.Input(shape=(), dtype=tf.string, name="input_text")
wrapped_preprocessor = WrappedBERTPreprocessor()(input_text_bert)
encoder = hub.KerasLayer(
    "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3",
    trainable=True, name="bert_encoder"
)
# Get the pooled output from the BERT encoder.
encoder_outputs = encoder(wrapped_preprocessor)
x_bert = encoder_outputs['pooled_output']
x_bert = tf.keras.layers.Dropout(0.1)(x_bert)
output_bert = tf.keras.layers.Dense(1, activation="sigmoid", name="classifier")(x_bert)
model_bert = tf.keras.Model(inputs=input_text_bert, outputs=output_bert, name="BERT_Sentiment_Model")
model_bert.build((None,))
model_bert.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5),
                   loss="binary_crossentropy", metrics=["accuracy"])
model_bert.summary()

# Train the BERT model.
model_bert.fit(tf.constant(intermediate_sentences), np.array(intermediate_labels), epochs=2, batch_size=2)
# Predict with the BERT model.
preds_bert = model_bert.predict(tf.constant([
    "An amazing experience, I would watch it again.",
    "It was a waste of time."
]))
print("\nIntermediate Level Model Predictions:")
print("Prediction shape:", preds_bert.shape, "DType:", preds_bert.dtype)
print("Predicted sentiment probabilities (closer to 1 indicates positive sentiment):")
print(preds_bert)


=== Intermediate Level: Fine-Tuning BERT for Sentiment Classification ===
Model: "BERT_Sentiment_Model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_text (InputLayer)        [(None,)]            0           []                               
                                                                                                  
 wrapped_bert_preprocessor_2 (W  {'input_type_ids':   0          ['input_text[0][0]']             
 rappedBERTPreprocessor)        (None, 128),                                                      
                                 'input_mask': (Non                                               
                                e, 128),                                                          
                                 'input_word_ids':                                                
    

#advance

In [None]:
import nltk

# Download required resources. We add 'punkt_tab' to resolve the error.
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('maxent_ne_chunker_tab')
nltk.download('words')

# Define a sample text.
text_for_ner = (
    "Apple Inc. is looking at buying a startup in the U.K. for $1 billion. "
    "Tim Cook, the CEO of Apple, stated that this acquisition will strengthen the company's market position."
)

# Tokenize the text into sentences.
sentences = nltk.sent_tokenize(text_for_ner)

# For each sentence, tokenize into words, perform POS tagging, and then perform NER.
print("\nDetected Named Entities:")
for sentence in sentences:
    tokens = nltk.word_tokenize(sentence)
    pos_tags = nltk.pos_tag(tokens)
    tree = nltk.ne_chunk(pos_tags)
    # Traverse the tree and print named entities.
    for subtree in tree:
        if hasattr(subtree, 'label'):
            entity = " ".join([token for token, pos in subtree.leaves()])
            entity_type = subtree.label()
            print(f"{entity}: {entity_type}")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] D


=== Advanced Level: Named Entity Recognition (NER) using NLTK ===

Detected Named Entities:
Apple: PERSON
Inc.: ORGANIZATION
Tim: PERSON
Cook: GPE
CEO: ORGANIZATION
Apple: GPE


#expert

In [None]:
# Install transformers if not already installed.
!pip install transformers --quiet
from transformers import pipeline

# Create a summarization pipeline using T5-small.
summarizer = pipeline("summarization", model="t5-small")
# Provide a long text to summarize.
text_to_summarize = (
    "Artificial intelligence (AI) is intelligence demonstrated by machines, "
    "in contrast to the natural intelligence displayed by humans and animals. "
    "Leading AI textbooks define the field as the study of 'intelligent agents': any device "
    "that perceives its environment and takes actions that maximize its chance of successfully "
    "achieving its goals. Colloquially, the term 'artificial intelligence' is often used to "
    "describe machines (or computers) that mimic cognitive functions that humans associate with "
    "the human mind, such as learning and problem-solving."
)
summary = summarizer(text_to_summarize, max_length=60, min_length=20, do_sample=False)
print("\nExpert Level Text Summarization Output:")
print("Summarized Text:", summary[0]['summary_text'])


=== Expert Level: Text Summarization with T5 ===


Device set to use cpu



Expert Level Text Summarization Output:
Summarized Text: leading AI textbooks define the field as the study of 'intelligent agents' the term 'artificial intelligence' is often used to describe machines that mimic cognitive functions that humans associate with the human mind .




---

