# TensorFlow Solutions for HuggingFace Exercises

This notebook provides TensorFlow solutions for three of the exercises from the HuggingFace exercises notebook.

## Exercise 1: Downloading and Prompting T5 with TensorFlow

In [None]:
!pip install tensorflow transformers

In [None]:

from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer

model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)


In [None]:

def translate_with_t5(text, model, tokenizer, source_lang="English", target_lang="French"):
    input_text = f"Translate {source_lang} to {target_lang}: {text}"
    inputs = tokenizer.encode(input_text, return_tensors="tf")
    outputs = model.generate(inputs)
    translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translated_text

# Example usage
translate_with_t5("Hello, world!", model, tokenizer)


## Exercise 2: Transfer Learning with BERT in TensorFlow

In [None]:
!pip install tensorflow tensorflow-datasets transformers

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
from transformers import TFBertForSequenceClassification, BertTokenizer, glue_convert_examples_to_features

# Loading the IMDB reviews dataset
data = tfds.load('imdb_reviews', split=['train', 'test'], as_supervised=True)
train_data, test_data = data[0], data[1]


In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def encode_examples(ds, limit=-1, batch_size=32):
    input_ids = []
    attention_masks = []
    labels = []

    for review, label in tfds.as_numpy(ds.take(limit)):
        bert_input = tokenizer.encode_plus(
            review.decode('utf-8'),
            add_special_tokens=True,
            max_length=128,
            truncation=True,
            padding='max_length',
            return_attention_mask=True,
            return_token_type_ids=False,
            return_tensors='tf'
        )

        input_ids.append(bert_input['input_ids'][0])
        attention_masks.append(bert_input['attention_mask'][0])
        labels.append(label)

    return tf.data.Dataset.from_tensor_slices(({
        'input_ids': input_ids,
        'attention_mask': attention_masks,
    }, labels)).shuffle(len(labels)).batch(batch_size)

# Apply the function to the train and test dataset
batch_size = 32
train_data_encoded = encode_examples(train_data, batch_size=batch_size, limit=10000)
test_data_encoded = encode_examples(test_data, batch_size=batch_size, limit=1000)

In [None]:
# Load a pre-trained BERT model
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')

In [None]:
model.summary()

In [None]:
model.layers[0].trainable = False

In [None]:
# Model compilation
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = tf.metrics.SparseCategoricalAccuracy()

model.compile(optimizer=optimizer, loss=loss, metrics=[metrics])

# Model training
epochs = 3  # Adjust as needed
model.fit(train_data_encoded, epochs=epochs, validation_data=test_data_encoded)


## Exercise 3: Distillation of BERT using TensorFlow

In [None]:

# Understanding model distillation
# Distillation involves training a smaller model (student) to mimic a larger model (teacher).
# Here we assume the use of a smaller BERT model as the student.
# The distillation process involves training the student model to replicate the teacher model's output.
# Detailed code for this process is complex and is not provided in this example.



## Exercise 4: Using ROUGE for Evaluation

In [None]:

!pip install tensorflow transformers rouge-score


In [None]:

import tensorflow as tf
from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer
from rouge_score import rouge_scorer

# Loading the model and tokenizer for summarization
model_name = 't5-small'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)

# Function to perform summarization
def summarize(text):
    inputs = tokenizer.encode("summarize: " + text, return_tensors="tf", max_length=512)
    outputs = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example text
example_text = "The quick brown fox jumps over the lazy dog. This is an example sentence to demonstrate text summarization."

# Summarize the text
summary = summarize(example_text)

# Evaluate using ROUGE
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
scores = scorer.score(example_text, summary)

summary, scores


## Exercise 5: Exploring BLEU for Machine Translation

In [None]:

!pip install tensorflow transformers sacrebleu


In [None]:

from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer
import sacrebleu

# Loading the model and tokenizer for translation
model_name = 't5-small'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)

# Function to perform translation
def translate(text, target_language="fr"):
    inputs = tokenizer.encode("translate English to " + target_language + ": " + text, return_tensors="tf", max_length=512)
    outputs = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example text
example_text = "The quick brown fox jumps over the lazy dog."

# Translate the text
translation = translate(example_text, "fr")

# Evaluate using BLEU
reference = ["Le rapide renard brun saute par-dessus le chien paresseux."]
bleu_score = sacrebleu.corpus_bleu([translation], [reference])

translation, bleu_score.score
