In [1]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers.modeling_outputs import BaseModelOutput
import torch
from IPython.display import display, HTML

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model and tokenizer
model_name = "google/flan-t5-base"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [2]:
def add_noise_with_snr(encoder_output, target_snr_db):
    """
    Add noise to the encoder output based on a target SNR in dB.
    
    Parameters:
    - encoder_output: torch.Tensor, the encoder's output (last_hidden_state).
    - target_snr_db: float, the desired signal-to-noise ratio in dB.
    
    Returns:
    - noisy_encoder_output: torch.Tensor, encoder output with added noise.
    """
    # Convert SNR from dB to linear scale
    target_snr_linear = 10 ** (target_snr_db / 10)
    
    # Calculate power of the signal
    signal_power = torch.mean(encoder_output ** 2)
    
    # Calculate required noise power for the target SNR
    noise_power = signal_power / target_snr_linear
    noise = torch.randn_like(encoder_output) * torch.sqrt(noise_power)
    
    # Add noise to the encoder output
    noisy_encoder_output = encoder_output + noise
    return noisy_encoder_output


In [5]:
# Define input text and encode
# input_text = "Who is Donald Trump?"
# input_text = """
# Summarize the text: In telecommunications, orthogonal frequency-division multiplexing (OFDM) 
# is a type of digital transmission used in digital modulation for encoding digital (binary) data
# on multiple carrier frequencies. OFDM has developed into a popular scheme for wideband digital 
# communication, used in applications such as digital television and audio broadcasting, DSL internet 
# access, wireless networks, power line networks, and 4G/5G mobile communications.
# """.replace("\n", "")
# input_text = "Tranlate from English to Deutsche: How are you?"
# input_text = "Tranlate English to Deutsche: My name is Ziyue and I live in Sweden."
# input_text = "Translate English to Deutsche: Where is the nearest train station?"
# input_text = "Translate English to Deutsche: I would like to order a coffee, please."
# input_text = "Translate English to Deutsche: Can you help me find my hotel?"
# input_text = "Translate English to Deutsche: What time does the museum open?"
# input_text = "Translate English to Deutsche: How much does this ticket cost?"
# input_text = "Translate English to Deutsche: Do you speak English?"
# input_text = "Translate English to Deutsche: My favorite color is blue."
# input_text = "Translate English to Deutsche: I am a student learning German."
input_text = """Translate English to Deutsche: In telecommunications, orthogonal frequency-division multiplexing (OFDM) 
is a type of digital transmission used in digital modulation for encoding digital (binary) data
on multiple carrier frequencies. OFDM has developed into a popular scheme for wideband digital 
communication, used in applications such as digital television and audio broadcasting, DSL internet 
access, wireless networks, power line networks, and 4G/5G mobile communications."""
# input_text = "Translate English to Deutsche: I enjoy listening to classical music."
# input_text = "Translate English to Deutsche: I would like to check in to my room."
# input_text = "Translate English to Deutsche: The weather today is sunny and warm."
# input_text = "Translate English to Deutsche: I need to buy a ticket to Munich."
# input_text = "Translate English to Deutsche: Could you recommend a good restaurant nearby?"
# input_text = "Translate English to Deutsche: I will stay here for a week."
# input_text = "Translate Deutsche to English: Wo ist das nächste Restaurant?"
# input_text = "Translate Deutsche to English: Ich bin hier im Urlaub."
# input_text = "Translate Deutsche to English: Können Sie mir bitte helfen?"
# input_text = "Translate Deutsche to English: Ich habe meinen Schlüssel verloren."
# input_text = "Translate Deutsche to English: Die Landschaft hier ist wunderschön."
# input_text = "Translate Deutsche to English: Das Essen hier ist sehr lecker."
# input_text = "Translate Deutsche to English: Wie lange dauert die Fahrt?"
# input_text = "Translate Deutsche to English: Ich möchte gerne ein Zimmer reservieren."
# input_text = "Translate Deutsche to English: Ich komme aus Deutschland."
# input_text = "Translate Deutsche to English: Das ist mein Freund, er wohnt in Hamburg."
# input_text = "Translate Deutsche to English: Welche Sehenswürdigkeiten gibt es hier?"
# input_text = "Translate Deutsche to English: Ich suche einen Supermarkt in der Nähe."
# input_text = "Translate English to French: Can I get directions to the airport?"
# input_text = "Translate English to French: I am traveling with my family."
# input_text = "Translate English to French: I would like to book a table for two."
# input_text = "Translate English to French: How far is the beach from here?"
# input_text = "Translate English to French: I am allergic to peanuts."
# input_text = "Translate English to French: Could I have the bill, please?"
# input_text = "Translate English to French: I am learning French for my studies."
# input_text = "Translate English to French: This is a beautiful place to visit."

In [10]:
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)

# Get encoder output without noise
with torch.no_grad():
    encoder_outputs = model.encoder(input_ids=input_ids)

# Generate output without noise
baseline_outputs = model.generate(
    input_ids=None,
    encoder_outputs=encoder_outputs,
    max_length=500,
    do_sample=True,
    temperature=0.1
)
baseline_text = tokenizer.decode(baseline_outputs[0], skip_special_tokens=True)

# Add noise with a target SNR and generate noisy output
target_snr = 3  # Set target SNR
noisy_encoder_output = add_noise_with_snr(encoder_outputs.last_hidden_state, target_snr)
modified_encoder_outputs = BaseModelOutput(last_hidden_state=noisy_encoder_output)

# Generate output with the noisy encoder output
noisy_outputs = model.generate(
    input_ids=None,
    encoder_outputs=modified_encoder_outputs,
    max_length=200,
    do_sample=True,
    temperature=0.1
)
noisy_text = tokenizer.decode(noisy_outputs[0], skip_special_tokens=True)

# Display both outputs
display(HTML(f"<p style='font-size:15px;'>{input_text}</p>"))
print('Without Noise:')
display(HTML(f"<p style='font-size:20px; font-family:\"Comic Sans MS\", cursive;'> {baseline_text}</p>"))
print(f'With Noise (SNR = {target_snr} dB):')
display(HTML(f"<p style='font-size:20px; font-family:\"Comic Sans MS\", cursive;'> {noisy_text}</p>"))

Without Noise:


With Noise (SNR = 3 dB):


In [22]:
encoder_outputs.last_hidden_state[0]

torch.Size([768])

In [23]:
baseline_outputs[0]

tensor([    0,    86, 30068, 11760,   157,  1628,   229,    74, 26429,  5307,
          138,  5532,   835,    29,   172,    18,   308,    23,  6610, 16821,
          226,    53,    41,   667,   371,  7407,    61,   266,     3, 12575,
         2014,  4190, 31830,  8533,     6,    67,    16,  4190, 22763,   257,
          218,   177, 10285,  3194,  1125,    49,    41,  4517,  1208,    61,
         2679,   219,  7461,    29,     3, 17336,    29,     3,  4524, 16864,
           17,     5,  3347,  7407,     3,   547,   289,    16, 11844,    35,
          587,  4190, 19798, 29749,    64,  9607,    18,   279,    60,   155,
        10830,     6,   309,  5629,    18, 22912,    18, 31679,     6, 17202,
           18,  9688,   776,     6,  2621,    18, 21022,    18,  9688,   776,
           64,   314,   517, 16936,   517,  1156,    52,  8685,    76,  4953,
         1628, 13181,  3458,    15, 10322,     5,     1], device='cuda:0')

In [24]:
tokenizer(baseline_text, return_tensors="pt", truncation=True, max_length=1024)['input_ids']

tensor([[   86, 30068, 11760,   157,  1628,   229,    74, 26429,  5307,   138,
          5532,   835,    29,   172,    18,   308,    23,  6610, 16821,   226,
            53,    41,   667,   371,  7407,    61,   266,     3, 21725,  4190,
         31830,  8533,     6,    67,    16,  4190, 22763,   257,   218,   177,
         10285,  3194,  1125,    49,    41,  4517,  1208,    61,  2679,   219,
          7461,    29, 20012,     3,  4524, 16864,    17,     5,  3347,  7407,
             3,   547,   289,    16, 11844,    35,   587,  4190, 19798, 29749,
            64,  9607,    18,   279,    60,   155, 10830,     6,   309,  5629,
            18, 22912,    18, 31679,     6, 17202,    18,  9688,   776,     6,
          2621,    18, 21022,    18,  9688,   776,    64,   314,   517, 16936,
           517,  1156,    52,  8685,    76,  4953,  1628, 13181,  3458,    15,
         10322,     5,     1]])

In [27]:
tt = torch.tensor([0, 15, 10322,     5,     1,])
t = tokenizer.decode(tt)
t

'<pad> e entwickelt.</s>'

In [21]:
torch.set_printoptions(precision=10)
print(modified_encoder_outputs)

BaseModelOutput(last_hidden_state=tensor([[[ 0.0110645378,  0.0446379520, -0.1195041314,  ...,
           0.0239841975,  0.0657943934, -0.1541268975],
         [ 0.0011646717, -0.0006612060,  0.0024203842,  ...,
          -0.0139574334, -0.0003432746, -0.0014761132],
         [-0.0300152730, -0.0099295871,  0.0018708719,  ...,
           0.0180891100,  0.1408756226, -0.0221240111],
         ...,
         [-0.0028463011, -0.0528447069,  0.0717518777,  ...,
           0.0263906494, -0.1086646691, -0.1568258107],
         [ 0.0614422895, -0.0377765149,  0.0980087668,  ...,
          -0.0019372494,  0.0795445740, -0.1129229516],
         [-0.0311634671,  0.0125041567,  0.0268029757,  ...,
           0.0052387128,  0.0760847554, -0.0480498597]]], device='cuda:0'), hidden_states=None, attentions=None)


In [22]:
print(encoder_outputs)

BaseModelOutputWithPastAndCrossAttentions(last_hidden_state=tensor([[[ 0.0110638738,  0.0446386412, -0.1195035130,  ...,
           0.0239830501,  0.0657947734, -0.1541264057],
         [ 0.0011641044, -0.0006623128,  0.0024199435,  ...,
          -0.0139569221, -0.0003425943, -0.0014757399],
         [-0.0300164297, -0.0099294083,  0.0018707113,  ...,
           0.0180890486,  0.1408752650, -0.0221240781],
         ...,
         [-0.0028459774, -0.0528435819,  0.0717529729,  ...,
           0.0263907555, -0.1086634919, -0.1568261683],
         [ 0.0614417158, -0.0377769396,  0.0980087370,  ...,
          -0.0019358803,  0.0795432851, -0.1129235551],
         [-0.0311635714,  0.0125043727,  0.0268025026,  ...,
           0.0052372129,  0.0760850981, -0.0480505303]]], device='cuda:0'), past_key_values=None, hidden_states=None, attentions=None, cross_attentions=None)


torch.Size([1, 11])

In [101]:
# '58' <=> ?
# '0' <=> <start>
# '1' <=> <end>
a = torch.tensor([[58]])
text = tokenizer.decode(a[0], skip_special_tokens=True)
text

'?'

In [89]:
baseline_outputs[0]

tensor([    0, 17913, 30979,  7666,     1], device='cuda:0')

In [90]:
noisy_outputs

tensor([[ 0,  3, 76, 89, 32,  3, 89,  9,  3,  7, 17,  3, 63,  3,  7,  3, 17,  3,
          7,  3, 17,  3,  7,  3, 17,  3,  7,  3, 17,  3,  7,  3, 17,  3,  7,  3,
         17,  3,  7,  3, 17,  3,  7,  3, 17,  3,  7,  3, 17,  3,  7,  3, 17,  3,
          7,  3, 17,  3,  7,  3, 17,  3,  7,  3, 17,  3,  7,  3, 17,  3,  7,  3,
         17,  3,  7,  3, 17,  3,  7,  3, 17,  3,  7,  3, 17,  3,  7,  3, 17,  3,
          7,  3, 17,  3,  7,  3, 17,  3,  7,  3]], device='cuda:0')