## Use pipeline

In [1]:
from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer
from transformers.modeling_outputs import BaseModelOutput
import torch
from IPython.display import display, HTML

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize translation pipeline
translator = pipeline("translation_en_to_de", model="google/flan-t5-xl", device=device)

input_text = "My name is Ziyue and I live in Sweden."

output = translator(
    input_text,
    # max_length=100,      # Adjust max length for summary
    # min_length=30,       # Adjust min length for summary
    # do_sample=True,      # Enables sampling for temperature to take effect
    # temperature=0.1,     # Controls the randomness
    # top_k=50,            # Limits sampling to top K most likely options
    # top_p=0.9            # Limits sampling to top cumulative probability P
)

output

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

[{'translation_text': 'Ich bin Ziyue und ich wohne in Schweden.'}]

## Pipeline + Noise (fail)

In [1]:
from transformers import pipeline, T5ForConditionalGeneration, T5Tokenizer
from transformers.modeling_outputs import BaseModelOutput
import torch
from IPython.display import display, HTML

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize translation pipeline
translator = pipeline("translation_en_to_fr", model="google/flan-t5-base", device=device)

# Get model and tokenizer
model = translator.model.to(device)
tokenizer = translator.tokenizer

def add_noise_with_snr(encoder_output, target_snr_db):
    """
    Add noise to the encoder output based on the target Signal-to-Noise Ratio (SNR).

    Parameters:
    - encoder_output: torch.Tensor, the output of the encoder (last_hidden_state).
    - target_snr_db: float, the desired SNR (in dB).

    Returns:
    - noisy_encoder_output: torch.Tensor, the encoder output with added noise.
    """
    # Convert SNR from dB to linear scale
    target_snr_linear = 10 ** (target_snr_db / 10)
    
    # Calculate signal power
    signal_power = torch.mean(encoder_output ** 2)
    
    # Calculate the noise power required to achieve the target SNR
    noise_power = signal_power / target_snr_linear
    noise = torch.randn_like(encoder_output) * torch.sqrt(noise_power)
    
    # Add noise to the encoder output
    noisy_encoder_output = encoder_output + noise
    return noisy_encoder_output

input_text = "This is my first time visiting Berlin."

# Encode input text as input IDs
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)

# Get encoder output (without noise)
with torch.no_grad():
    encoder_outputs = model.encoder(input_ids=input_ids)

# Generate translation using the noise-free encoder output
baseline_outputs = model.generate(
    input_ids=None,
    encoder_outputs=encoder_outputs,
    max_length=200,
    do_sample=True,
    temperature=0.1
)
baseline_text = tokenizer.decode(baseline_outputs[0], skip_special_tokens=True)

# Add noise to the encoder output
target_snr = 100  # Set target SNR
noisy_encoder_output = add_noise_with_snr(encoder_outputs.last_hidden_state, target_snr)
modified_encoder_outputs = BaseModelOutput(last_hidden_state=noisy_encoder_output)

# Generate translation using the encoder output with added noise
noisy_outputs = model.generate(
    input_ids=None,
    encoder_outputs=modified_encoder_outputs,
    max_length=200,
    do_sample=True,
    temperature=0.1
)
noisy_text = tokenizer.decode(noisy_outputs[0], skip_special_tokens=True)

# Display results
display(HTML(f"<p style='font-size:15px;'>Original text: {input_text}</p>"))
print('Translation result without noise:')
display(HTML(f"<p style='font-size:20px; font-family:\"Comic Sans MS\", cursive;'> {baseline_text}</p>"))
print(f'Translation result with added noise (SNR = {target_snr} dB):')
display(HTML(f"<p style='font-size:20px; font-family:\"Comic Sans MS\", cursive;'> {noisy_text}</p>"))


Translation result without noise:


Translation result with added noise (SNR = 100 dB):
