In [None]:

import tensorflow as tf
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TFAutoModelForSeq2SeqLM
!pip install transformers

# Load model directly
tokenizer = AutoTokenizer.from_pretrained("google-t5/t5-small")
model = AutoModelForSeq2SeqLM.from_pretrained("google-t5/t5-small")

# Save model in TensorFlow format
model.save_pretrained("t5_small_tf")
tokenizer.save_pretrained("t5_small_tf")

# Convert the Hugging Face model to a TensorFlow Keras model
tf_model = TFAutoModelForSeq2SeqLM.from_pretrained("t5_small_tf", from_pt=True)

# Representative dataset generator for quantization
def representative_dataset_gen():
  for _ in range(100): # Adjust number of samples as needed
    input_ids = tf.constant([[1, 2, 3, 4, 5]]) # Example input; replace with actual data
    yield [input_ids]


# Convert Keras model to a tflite model with quantization
converter = tf.lite.TFLiteConverter.from_keras_model(tf_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]  # Use DEFAULT for more aggressive quantization
converter.representative_dataset = representative_dataset_gen
converter.target_spec.supported_ops = [
  tf.lite.OpsSet.TFLITE_BUILTINS_INT8
]
converter.inference_input_type = tf.int8  # or tf.uint8
converter.inference_output_type = tf.int8  # or tf.uint8

tflite_model = converter.convert()
tflite_model_name = "t5_small_tflite_quantized" # Give a meaningful name to the quantized model
open(tflite_model_name + '.tflite', 'wb').write(tflite_model)

In [None]:

from google.colab import files
files.download('t5_small_tflite.tflite')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch

# Load pre-trained T5-small model and tokenizer
teacher_model = T5ForConditionalGeneration.from_pretrained("t5-small")
tokenizer = T5Tokenizer.from_pretrained("t5-small")

# Example fine-tuning dataset
inputs = tokenizer(["What should I do if the temperature is 35°C?"], max_length=32, truncation=True, return_tensors="pt").input_ids
labels = tokenizer(["Reduce heat and drink water."], max_length=32, truncation=True, return_tensors="pt").input_ids

# Fine-tune the teacher model (simple example)
outputs = teacher_model(input_ids=inputs, labels=labels)
loss = outputs.loss  # Compute loss
loss.backward()      # Backpropagation


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


In [None]:
from transformers import T5Config, T5ForConditionalGeneration

# Create a smaller T5 configuration
tiny_t5_config = T5Config(
    vocab_size=32128,  # Same vocab as T5-small
    d_model=256,       # Reduce hidden size
    d_ff=512,          # Reduce feed-forward layer size
    num_layers=4,      # Reduce encoder/decoder layers
    num_heads=4,        # Reduce attention heads
    decoder_start_token_id = tokenizer.pad_token_id # Set decoder_start_token_id to pad_token_id
)

# Initialize the student model
student_model = T5ForConditionalGeneration(tiny_t5_config)


In [None]:
import torch.nn.functional as F

# Example data
input_texts = ["Temperature is 45°C. What should I do?"]
inputs = tokenizer(input_texts, max_length=32, truncation=True, return_tensors="pt").input_ids

# Generate teacher outputs
with torch.no_grad():
    # Provide decoder_input_ids for the decoder
    decoder_input_ids = tokenizer(["What should I"], return_tensors="pt").input_ids
    teacher_logits = teacher_model(input_ids=inputs, decoder_input_ids=decoder_input_ids).logits  # Logits from the teacher

# Convert logits to probabilities (soft labels)
teacher_probs = F.softmax(teacher_logits, dim=-1)

In [None]:
from torch.nn import KLDivLoss

# Distillation loss function
kl_loss = KLDivLoss(reduction="batchmean")

# Generate student outputs
# Provide decoder_input_ids for the student model as well
student_logits = student_model(input_ids=inputs, decoder_input_ids=decoder_input_ids).logits

# Compute distillation loss (KL divergence between teacher and student logits)
distillation_loss = kl_loss(
    F.log_softmax(student_logits, dim=-1),  # Student predictions
    teacher_probs                          # Teacher's "soft labels"
)

# Backpropagation and optimization
distillation_loss.backward()
optimizer = torch.optim.Adam(student_model.parameters(), lr=1e-4)
optimizer.step()

In [None]:
# Ground-truth labels for fine-tuning
labels = tokenizer(["Stay hydrated and seek shade."], max_length=32, truncation=True, return_tensors="pt").input_ids

# Generate student outputs
# Provide decoder_input_ids for the student model as well
student_logits = student_model(input_ids=inputs, decoder_input_ids=decoder_input_ids).logits

# Combine distillation loss with supervised loss
# --- Change 1: Get outputs with labels in this step ---
outputs = student_model(input_ids=inputs, labels=labels)
supervised_loss = outputs.loss

# Distillation loss function
kl_loss = KLDivLoss(reduction="batchmean")

# Compute distillation loss (KL divergence between teacher and student logits)
distillation_loss = kl_loss(
    F.log_softmax(student_logits, dim=-1),  # Student predictions
    teacher_probs                          # Teacher's "soft labels"
)


# Total loss = Distillation loss + Supervised loss
total_loss = distillation_loss + supervised_loss

# --- Change 2: Call backward only once on the total loss ---
total_loss.backward()
optimizer = torch.optim.Adam(student_model.parameters(), lr=1e-4)
optimizer.step()

In [None]:
input_text = "The temperature is 38°C. What should I do?"
from transformers import T5Tokenizer

# Load the tokenizer (use the same tokenizer as the teacher)
tokenizer = T5Tokenizer.from_pretrained("t5-small")

# Tokenize the input text
inputs = tokenizer(input_text, return_tensors="pt", max_length=32, truncation=True).input_ids
# Run the student model to generate outputs
student_model.eval()  # Ensure the model is in evaluation mode
with torch.no_grad():
    student_outputs = student_model.generate(inputs, max_length=32, num_beams=2, early_stopping=True)

# Decode the generated output
decoded_output = tokenizer.decode(student_outputs[0], skip_special_tokens=True)

# Print the output
print("Student Model Output:", decoded_output)


Student Model Output: walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking walking


In [None]:
import csv

# Define temperature ranges and recommendations
training_data = [
    {"temperature": "Below -10°C", "recommendation": "Stay indoors and ensure heating systems are functional."},
    {"temperature": "-10°C to 0°C", "recommendation": "Wear heavy clothing and limit outdoor exposure."},
    {"temperature": "0°C to 10°C", "recommendation": "Wear warm clothing and keep an eye on weather changes."},
    {"temperature": "10°C to 20°C", "recommendation": "Enjoy moderate weather, but carry a light jacket."},
    {"temperature": "20°C to 30°C", "recommendation": "Stay hydrated and wear comfortable clothing."},
    {"temperature": "35°C", "recommendation": "Stay cool and drink plenty of water."},
     {"temperature": "30°C to 40°C", "recommendation": "Avoid outdoor activities during peak hours and drink plenty of water."},
    {"temperature": "Above 40°C", "recommendation": "Stay indoors, use cooling devices, and avoid strenuous activities."},
]

# Write training data to a CSV file
output_file = "temperature_training_data.csv"
with open(output_file, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["input", "output"])  # Headers
    for item in training_data:
        input_text = f"The temperature is {item['temperature']}. What should I do?# EN"
        writer.writerow([input_text, item["recommendation"]])

print(f"Training data saved to {output_file}")


Training data saved to temperature_training_data.csv


In [None]:
import pandas as pd

# Load the CSV file
data = pd.read_csv("temperature_training_data.csv")

# Split into inputs and outputs
inputs = data["input"].tolist()
outputs = data["output"].tolist()

# Print example
print("Example Input:", inputs[0])
print("Example Output:", outputs[0])


Example Input: The temperature is Below -10°C. What should I do?# EN
Example Output: Stay indoors and ensure heating systems are functional.


In [None]:
from transformers import T5Tokenizer

tokenizer = T5Tokenizer.from_pretrained("t5-small")

# Tokenize inputs and outputs
tokenized_inputs = tokenizer(inputs, padding=True, truncation=True, max_length=32, return_tensors="pt")
tokenized_outputs = tokenizer(outputs, padding=True, truncation=True, max_length=32, return_tensors="pt")

print("Tokenized Input Shape:", tokenized_inputs["input_ids"].shape)
print("Tokenized Output Shape:", tokenized_outputs["input_ids"].shape)


Tokenized Input Shape: torch.Size([8, 21])
Tokenized Output Shape: torch.Size([8, 16])


In [None]:
from transformers import T5ForConditionalGeneration, AdamW

# Load the student model
student_model = T5ForConditionalGeneration.from_pretrained("t5-small")

# Define optimizer
optimizer = AdamW(student_model.parameters(), lr=5e-5)

# Training loop
for epoch in range(30):  # Adjust epochs as needed
    optimizer.zero_grad()
    outputs = student_model(
        input_ids=tokenized_inputs["input_ids"],
        attention_mask=tokenized_inputs["attention_mask"],
        labels=tokenized_outputs["input_ids"],
    )
    loss = outputs.loss
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")




Epoch 1, Loss: 8.095419883728027
Epoch 2, Loss: 8.277545928955078
Epoch 3, Loss: 7.822876453399658
Epoch 4, Loss: 7.309116840362549
Epoch 5, Loss: 6.812974452972412
Epoch 6, Loss: 6.327001094818115
Epoch 7, Loss: 5.862964153289795
Epoch 8, Loss: 5.409661769866943
Epoch 9, Loss: 4.964130401611328
Epoch 10, Loss: 4.529732704162598
Epoch 11, Loss: 4.102562427520752
Epoch 12, Loss: 3.6939616203308105
Epoch 13, Loss: 3.332385301589966
Epoch 14, Loss: 3.041287660598755
Epoch 15, Loss: 2.812716007232666
Epoch 16, Loss: 2.630575656890869
Epoch 17, Loss: 2.4850666522979736
Epoch 18, Loss: 2.386723756790161
Epoch 19, Loss: 2.3300607204437256
Epoch 20, Loss: 2.291555404663086
Epoch 21, Loss: 2.26301908493042
Epoch 22, Loss: 2.2292487621307373
Epoch 23, Loss: 2.1898770332336426
Epoch 24, Loss: 2.1479175090789795
Epoch 25, Loss: 2.09694504737854
Epoch 26, Loss: 2.0463197231292725
Epoch 27, Loss: 1.9942480325698853
Epoch 28, Loss: 1.9428694248199463
Epoch 29, Loss: 1.8946683406829834
Epoch 30, Loss:

In [None]:
# Test input
test_input = "The temperature is 30°C. What should I do?# EN"
test_tokenized = tokenizer(test_input, return_tensors="pt", max_length=32, truncation=True).input_ids

# Generate prediction
student_model.eval()
with torch.no_grad():
    prediction = student_model.generate(test_tokenized, max_length=32, num_beams=2)
decoded_prediction = tokenizer.decode(prediction[0], skip_special_tokens=True)

print("Model Prediction:", decoded_prediction)


Model Prediction: The temperature is 30°C and the temperature is 30°C.
