In [1]:
from transformers import BartTokenizer, BartForConditionalGeneration
from transformers.modeling_outputs import BaseModelOutput
from IPython.display import display, HTML
import torch

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load BART model and tokenizer
model_name = "facebook/bart-base"  # Choose your preferred BART model
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name).to(device)

In [2]:
def add_noise_with_snr(encoder_output, target_snr_db):
    """
    Add noise to the encoder output based on a target SNR in dB.
    
    Parameters:
    - encoder_output: torch.Tensor, the encoder's output (last_hidden_state).
    - target_snr_db: float, the desired signal-to-noise ratio in dB.
    
    Returns:
    - noisy_encoder_output: torch.Tensor, encoder output with added noise.
    """
    # Convert SNR from dB to linear scale
    target_snr_linear = 10 ** (target_snr_db / 10)
    
    # Calculate power of the signal
    signal_power = torch.mean(encoder_output ** 2)
    
    # Calculate required noise power for the target SNR
    noise_power = signal_power / target_snr_linear
    noise = torch.randn_like(encoder_output) * torch.sqrt(noise_power)
    
    # Add noise to the encoder output
    noisy_encoder_output = encoder_output + noise
    return noisy_encoder_output


In [16]:
# Define input text and encode
# original_text = """
# Beginners BBQ Class Taking Place in Missoula! 
# Do you want to get better at making delicious BBQ?
# You will have the opportunity, put this on your calendar now. 
# Thursday, September 22nd join World Class BBQ Champion, Tony Balay from Lonestar Smoke Rangers. 
# He will be teaching a beginner level class for everyone who wants to get better with their culinary skills. 
# He will teach you everything you need to know to compete in a KCBS BBQ competition, including techniques, recipes, timelines, meat selection and trimming, plus smoker and fire information. 
# The cost to be in the class is $35 per person, and for spectators it is free. 
# Included in the cost will be either a t-shirt or apron and you will be tasting samples of each meat that is prepared.
# """
# input_text = """
# Beginners BBQ Class <mask> in Missoula! 
# Do you want to <mask> making delicious BBQ?
# You will have the opportunity, put this on your calendar now. 
# Thursday, September 22nd join World Class BBQ Champion, Tony Balay from Lonestar Smoke Rangers. 
# He will be teaching a beginner level class for everyone who wants to get better with their culinary skills. 
# He will teach you <mask> compete in a KCBS BBQ competition, including techniques, recipes, timelines, meat selection and trimming, plus smoker and fire information. 
# The <mask> the class is $35 per person, and for spectators it is free. 
# Included in the cost will be either a t-shirt or apron and you will be tasting samples of each meat that is prepared.
# """.replace("\n", "")


# original_text = """
# This November, embark on an exciting hiking adventure! 
# Explore the scenic mountain trails with an experienced guide, who will show you the best routes and hidden viewpoints. 
# This journey is suitable for all levels, from beginners to advanced hikers. 
# The hike covers approximately 10 miles and includes multiple rest stops with breathtaking views. 
# Participants should bring water, snacks, and comfortable hiking shoes. 
# The cost of the trip is $60, which includes a map and a group photo.
# """
# input_text = """
# This November, embark on an exciting <mask> adventure! 
# Explore the scenic mountain trails with an experienced guide, who will show you the best routes and hidden <mask>. 
# This journey is suitable for all levels, from beginners to advanced <mask>. 
# The hike covers approximately 10 miles and includes multiple rest stops with breathtaking <mask>. 
# Participants should bring water, snacks, and comfortable hiking shoes. 
# The <mask> is $60, which includes a map and a group photo.
# """.replace("\n", "")

original_text = """
Welcome to our online coding bootcamp program! 
Whether you're a complete beginner or looking to improve your programming skills, this course is designed for you. 
Throughout the course, you will learn essential coding languages such as Python and JavaScript. 
Our instructors will guide you through interactive projects and provide real-time feedback. 
Each student will receive a certificate of completion at the end of the program. 
The total cost for the bootcamp is $150, which includes all learning materials.
"""
input_text = """
Welcome to our online <mask> bootcamp program! 
Whether you're a complete beginner or looking to <mask> your programming skills, this course is designed for you. 
Throughout the course, you will learn essential <mask> such as Python and JavaScript. 
Our instructors will guide you through interactive projects and provide real-time <mask>. 
Each student will receive a certificate of completion at the end of the <mask>. 
The total cost for the bootcamp is $150, which <mask> all learning materials.
""".replace("\n", "")

In [18]:
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to(device)

# Get encoder output without noise
with torch.no_grad():
    encoder_outputs = model.model.encoder(input_ids=input_ids)

# Generate output without noise
baseline_outputs = model.generate(
    input_ids=None,
    encoder_outputs=encoder_outputs,
    max_length=200,
    do_sample=True,
    temperature=0.5
)
baseline_text = tokenizer.decode(baseline_outputs[0], skip_special_tokens=True)

# Add noise with a target SNR and generate noisy output
target_snr = 1  # Set target SNR
noisy_encoder_output = add_noise_with_snr(encoder_outputs.last_hidden_state, target_snr)
modified_encoder_outputs = BaseModelOutput(last_hidden_state=noisy_encoder_output)

# Generate output with the noisy encoder output
noisy_outputs = model.generate(
    input_ids=None,
    encoder_outputs=modified_encoder_outputs,
    max_length=200,
    do_sample=True,
    temperature=0.5,
)
noisy_text = tokenizer.decode(noisy_outputs[0], skip_special_tokens=True)

# Display both outputs
print('Original Text:')
display(HTML(f"<p style='font-size:15px; font-family:\"Comic Sans MS\", cursive;'> {original_text}</p>"))
print('Without Noise:')
display(HTML(f"<p style='font-size:15px; font-family:\"Comic Sans MS\", cursive;'>{baseline_text}</p>"))
print(f'With Noise (SNR = {target_snr} dB):')
display(HTML(f"<p style='font-size:15px; font-family:\"Comic Sans MS\", cursive;'>{noisy_text}</p>"))

Original Text:


Without Noise:


With Noise (SNR = 1 dB):
