In [1]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers.modeling_outputs import BaseModelOutput
import torch
from IPython.display import display, HTML

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model and tokenizer
model_name = "google/flan-t5-large"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [1]:
from transformers import BartTokenizer, BartForConditionalGeneration
from transformers.modeling_outputs import BaseModelOutput
from IPython.display import display, HTML
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_name = "facebook/bart-base" # Recommend this one if your computer is okay with larger models
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name).to(device)

In [2]:
def add_noise_with_snr(encoder_output, target_snr_db):
    """
    Add noise to the encoder output based on a target SNR in dB.
    
    Parameters:
    - encoder_output: torch.Tensor, the encoder's output (last_hidden_state).
    - target_snr_db: float, the desired signal-to-noise ratio in dB.
    
    Returns:
    - noisy_encoder_output: torch.Tensor, encoder output with added noise.
    """
    # Convert SNR from dB to linear scale
    target_snr_linear = 10 ** (target_snr_db / 10)
    
    # Calculate power of the signal
    signal_power = torch.mean(encoder_output ** 2)
    
    # Calculate required noise power for the target SNR
    noise_power = signal_power / target_snr_linear
    noise = torch.randn_like(encoder_output) * torch.sqrt(noise_power)
    
    # Add noise to the encoder output
    noisy_encoder_output = encoder_output + noise
    return noisy_encoder_output


In [3]:
# input_text = "Tranlate English to Deutsche: This is an experimental feature and is a subject to change at a moment's notice."
input_text = "Tranlate English to Deutsche: How are you?"

input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)

# Get encoder output without noise
with torch.no_grad():
    encoder_outputs = model.encoder(input_ids=input_ids) # BART
    # encoder_outputs = model.encoder(input_ids=input_ids) # T5

# Generate output without noise
baseline_outputs = model.generate(
    input_ids=None,
    encoder_outputs=encoder_outputs,
    output_hidden_states=True,
    return_dict_in_generate=True,
    max_length=500,
    do_sample=True,
    temperature=0.1
)
baseline_text = tokenizer.decode(baseline_outputs.sequences[0], skip_special_tokens=True)

# Add noise with a target SNR and generate noisy output
target_snr = 3  # Set target SNR
noisy_encoder_output = add_noise_with_snr(encoder_outputs.last_hidden_state, target_snr)
modified_encoder_outputs = BaseModelOutput(last_hidden_state=noisy_encoder_output)

# Generate output with the noisy encoder output
noisy_outputs = model.generate(
    input_ids=None,
    encoder_outputs=modified_encoder_outputs,
    output_hidden_states=True,
    return_dict_in_generate=True,
    max_length=200,
    do_sample=True,
    temperature=0.1
)
noisy_text = tokenizer.decode(noisy_outputs.sequences[0], skip_special_tokens=True)

# Display both outputs
display(HTML(f"<p style='font-size:15px;'>{input_text}</p>"))
print('Without Noise:')
display(HTML(f"<p style='font-size:20px; font-family:\"Comic Sans MS\", cursive;'> {baseline_text}</p>"))
print(f'With Noise (SNR = {target_snr} dB):')
display(HTML(f"<p style='font-size:20px; font-family:\"Comic Sans MS\", cursive;'> {noisy_text}</p>"))

Without Noise:


With Noise (SNR = 3 dB):


In [4]:
# Encoder output size
encoder_outputs.last_hidden_state.shape

torch.Size([1, 12, 1024])

In [5]:
len(baseline_outputs.decoder_hidden_states)

5

In [6]:
len(baseline_outputs.decoder_hidden_states[-1])

25

In [7]:
len(baseline_outputs.decoder_hidden_states[0])

25

In [8]:
len(noisy_outputs.decoder_hidden_states)

4

In [9]:
len(noisy_outputs.decoder_hidden_states[-1])

25

In [10]:
len(noisy_outputs.decoder_hidden_states[0])

25

In [11]:
last_column = [row[-1] for row in baseline_outputs.decoder_hidden_states]
len(last_column)

5

In [12]:
decoder_output = torch.stack(last_column)
decoder_output.shape

torch.Size([5, 1, 1, 1024])

In [13]:
decoder_output

tensor([[[[-1.2577e-01,  7.0460e-02, -6.4927e-02,  ...,  2.2854e-01,
           -3.5367e+00,  2.9031e-02]]],


        [[[-6.1239e-03, -1.8696e-02,  1.0281e-01,  ...,  5.3043e-02,
           -2.7600e+00,  1.2889e-01]]],


        [[[-1.4533e-01,  6.7206e-02,  3.9925e-02,  ...,  9.7330e-03,
           -2.8842e+00,  1.2087e-01]]],


        [[[ 2.1294e-03,  1.3861e-01,  2.7731e-02,  ...,  9.4162e-02,
           -2.8283e+00,  3.9442e-03]]],


        [[[ 6.8444e-02,  1.3694e-01,  3.3259e-03,  ...,  2.4101e-01,
           -2.8714e+00,  9.5911e-03]]]], device='cuda:0')

In [14]:
baseline_outputs.decoder_hidden_states[-1][-1]

tensor([[[ 0.0684,  0.1369,  0.0033,  ...,  0.2410, -2.8714,  0.0096]]],
       device='cuda:0')