In [None]:
from google.colab import drive

drive.mount("/content/gdrive")


Mounted at /content/gdrive


In [None]:
!pip install transformers
!pip install sentencepiece
!pip install datasets transformers rouge_score nltk
!pip install nlpaug

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.29.2-py3-none-any.whl (7.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m40.9 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m88.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.29.2
Looking in in

### Imports

In [None]:
import pandas as pd
import numpy as np
import torch
from sklearn import metrics
from sklearn.model_selection import train_test_split
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, Trainer, TrainingArguments, T5Tokenizer, T5ForConditionalGeneration, AdamW, get_linear_schedule_with_warmup
import re
import sys
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw
import random
import argparse
import textwrap
from tqdm.auto import tqdm
import matplotlib.pyplot as plt

In [None]:
import torch

# Confirm that the GPU is detected

assert torch.cuda.is_available()

# Get the GPU device name.
device_name = torch.cuda.get_device_name()
n_gpu = torch.cuda.device_count()
print(f"Found device: {device_name}, n_gpu: {n_gpu}")
device = torch.device("cuda")

Found device: Tesla T4, n_gpu: 1


In [None]:
def set_seed(seed):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

set_seed(42)

In [None]:
import re
import string
import nltk
from nltk.corpus import stopwords
nltk.download('punkt')
nltk.download('stopwords')

def clean_text(text):
    # Convert all text to lowercase
    text = text.lower()

    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))

    # Remove numbers
    text = re.sub(r'\d+', '', text)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = nltk.word_tokenize(text)
    tokens = [token for token in tokens if token not in stop_words]
    text = ' '.join(tokens)

    # Remove extra whitespaces
    text = re.sub(' +', ' ', text)

    return text


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


## Noise Functions

In [None]:
# Actions for char and word level noise functions
char_action = ['insert',
        'substitute',
        'delete',
        'swap',
]

word_action = ['substitute',
        'delete',
        'swap',
]


def get_action(type):
  if type=="char":
    return random.choice(char_action)
  elif type=="word":
    return random.choice(word_action)


def augment_tweet(tweet, p=0.7):
    """
    Augment a tweet with character-level and word-level noise.

    Args:
        tweet (str): The original tweet.
        p (float): The probability of applying the char level augmentation.

    Returns:
        str: The augmented tweet.
    """
    # Define a list of character-level augmentation techniques
    char_augmenters = [
        nac.OcrAug(),
        nac.KeyboardAug(aug_char_p=0.2, aug_word_p=0.2, include_special_char=False),
        nac.RandomCharAug(action=get_action("char"), aug_char_p=0.1, aug_word_p=0.1),
    ]

    # Define a list of word-level augmentation techniques
    word_augmenters = [
        naw.SpellingAug(),
        naw.SplitAug(),
        naw.SynonymAug(),
        naw.RandomWordAug(aug_p=0.2, action=get_action("word")),
    ]

    # Randomly apply a character-level or word-level augmentation with probability p
    if random.random() < p:
        aug = random.choice(char_augmenters)
        augmented_tweet = aug.augment(tweet)
    else:
        aug = random.choice(word_augmenters)
        augmented_tweet = aug.augment(tweet)
        
    return augmented_tweet[0]


In [None]:
def add_noise(df, augmentation_percentage, task):

  if task=="sentiment_analysis":
    # Sample 10% of the rows in the DataFrame
    augment_indices = df.sample(frac=augmentation_percentage).index

    # Apply the augment_tweet function to each tweet in the sampled rows
    for index in augment_indices:
        tweet = df.loc[index, 'text']
        augmented_tweet = augment_tweet(tweet)
        df.loc[index, 'text'] = augmented_tweet
    
    return df
  
  elif task=="question_answering":

    # TODO - noise functions for QA

    return df

## Data download and preprocess

In [None]:
# Download data
df = pd.read_csv('/content/gdrive/MyDrive/NLP Project/data/IMDB Dataset.csv')
df = df.rename(columns={'review':'text'})
df = df[['text', 'sentiment']]

df['text'] = df['text'].apply(clean_text)

# Find and delete any empty rows
empty_rows = df[df['text'].apply(lambda x: isinstance(x, str) and len(x.strip()) == 0)]
df.drop(empty_rows.index, inplace=True)

# How much of the dataset to use
data_size = 0.2
df = df.sample(frac=data_size)

In [None]:
## Add word and char level noise
# augmentation_percentage = 0.1
# df = add_noise(df, augmentation_percentage, task="sentiment_analysis")

# Randomly shuffle all rows
# df = df.sample(frac=1).reset_index(drop=True)

In [None]:

# print("full data: ")
# label_counts = df['sentiment'].value_counts()
# print(label_counts)
# print('\n\n ')


# # Separate the dataset into three subsets based on the sentiment labels
# positive_review = df[df['sentiment'] == 'positive']
# negative_review = df[df['sentiment'] == 'negative']

# # Shuffle each of the two subsets randomly
# positive_review = positive_review.sample(frac=1, random_state=42)
# negative_review = negative_review.sample(frac=1, random_state=42)

# # Divide each subset into training, validation, and test sets with a 70/20/10 ratio
# train_pos, val_pos_test_pos = train_test_split(positive_review, test_size=0.3, random_state=42)
# val_pos, test_pos = train_test_split(val_pos_test_pos, test_size=0.33, random_state=42)

# train_neg, val_neg_test_neg = train_test_split(negative_review, test_size=0.3, random_state=42)
# val_neg, test_neg = train_test_split(val_neg_test_neg, test_size=0.33, random_state=42)

# # Merge the corresponding subsets from each sentiment back together to form the final training, validation, and test sets
# train_set = pd.concat([train_pos, train_neg], ignore_index=True)
# val_set = pd.concat([val_pos, val_neg], ignore_index=True)
# test_set = pd.concat([test_pos, test_neg], ignore_index=True)

# print("train: ")
# label_counts = train_set['sentiment'].value_counts()
# print(label_counts)
# print('\n')

# print("val: ")
# label_counts = val_set['sentiment'].value_counts()
# print(label_counts)
# print('\n')

# print("test: ")
# label_counts = test_set['sentiment'].value_counts()
# print(label_counts)
# print('\n')

In [None]:
print("full data: ")
label_counts = df['sentiment'].value_counts()
print(label_counts)
print('\n\n ')

train_set, val_set = train_test_split(df, test_size=0.3, random_state=42)
val_set, test_set = train_test_split(val_set, test_size=0.33, random_state=42)

print("train: ")
label_counts = train_set['sentiment'].value_counts()
print(label_counts)
print('\n')

print("val: ")
label_counts = val_set['sentiment'].value_counts()
print(label_counts)
print('\n')

print("test: ")
label_counts = test_set['sentiment'].value_counts()
print(label_counts)
print('\n')

full data: 
positive    5039
negative    4961
Name: sentiment, dtype: int64


 
train: 
positive    3523
negative    3477
Name: sentiment, dtype: int64


val: 
positive    1019
negative     991
Name: sentiment, dtype: int64


test: 
positive    497
negative    493
Name: sentiment, dtype: int64




In [None]:
# Set the device as GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pretrained model and tokenizer
model = T5ForConditionalGeneration.from_pretrained('t5-small').to(device)
tokenizer = T5Tokenizer.from_pretrained('t5-small')

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/242M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

In [None]:
ids_neg = tokenizer.encode('negative </s>')
ids_pos = tokenizer.encode('positive </s>')
ids_neg, ids_pos



([2841, 1], [1465, 1])

## Dataset and Dataloaders


In [None]:
# Define custom class for our dataset
class ImdbDataset(torch.utils.data.Dataset):
  def __init__(self, df, tokenizer, max_len=512):
    self.df = df
    self.tokenizer = tokenizer
    self.max_len = max_len
    
    self.inputs = []
    self.targets = []

    self._build()

  def __len__(self):
    return len(self.inputs)

  def __getitem__(self, index):
    source_ids = self.inputs[index]["input_ids"].squeeze()
    target_ids = self.targets[index]["input_ids"].squeeze()

    src_mask    = self.inputs[index]["attention_mask"].squeeze()  # might need to squeeze
    target_mask = self.targets[index]["attention_mask"].squeeze()  # might need to squeeze

    return {"source_ids": source_ids, "source_mask": src_mask, "target_ids": target_ids, "target_mask": target_mask}

  def _build(self):
    self._build_examples(self.df[self.df['sentiment'] == 'positive'], 'positive')
    self._build_examples(self.df[self.df['sentiment'] == 'negative'], 'negative')

    random.shuffle(self.inputs)
    random.shuffle(self.targets)
    
  def _build_examples(self, df, sentiment):
    
    REPLACE_NO_SPACE = re.compile("[.;:!\'?,\"()\[\]]")
    REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")

    for item in df['text']:
      
      text = item.strip()
      text = REPLACE_NO_SPACE.sub("", text) 
      text = REPLACE_WITH_SPACE.sub("", text)
      text = text + ' '

      target = sentiment + " "

      # tokenize inputs
      tokenized_inputs = self.tokenizer.batch_encode_plus(
          [text], max_length=self.max_len, pad_to_max_length=True, return_tensors="pt"
      )
      # tokenize targets
      tokenized_targets = self.tokenizer.batch_encode_plus(
          [target], max_length=2, pad_to_max_length=True, return_tensors="pt"
      )

      self.inputs.append(tokenized_inputs)
      self.targets.append(tokenized_targets)

In [None]:
# Build the dataset for each train, val and test data
train_dataset = ImdbDataset(train_set, tokenizer)
val_dataset = ImdbDataset(val_set, tokenizer)
test_dataset = ImdbDataset(test_set, tokenizer)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [None]:
len(train_dataset), len(val_dataset), len(test_dataset)

(7000, 2010, 990)

In [None]:
data = train_dataset[34]
print(tokenizer.decode(data['source_ids'], skip_special_tokens=True))
print(tokenizer.decode(data['target_ids'], skip_special_tokens=True))

classed real life london producers must different planetbr br depressing suicidal dark dingy dross tvbr br everyone fighting everything nasty tones running nothing done genuine reasonsbr br want real life picture people london uk programme farthest realitybr br one good word say programme certainty great big fight christmas dinnerbr br even characters totally unbelievable
negative


In [None]:
args_dict = dict(
    model_name_or_path='t5-small',
    tokenizer_name_or_path='t5-small',
    max_seq_length=512,
    learning_rate=1e-5,
    weight_decay=0.0,
    adam_epsilon=1e-8,
    warmup_steps=0,
    train_batch_size=32,
    eval_batch_size=32,
    num_train_epochs=5,
    gradient_accumulation_steps=16,
    n_gpu=1,
    early_stop_callback=False,
    fp_16=False, # if you want to enable 16-bit training then install apex and set this to true
    opt_level='O1', # you can find out more on optimisation levels here https://nvidia.github.io/apex/amp.html#opt-levels-and-properties
    max_grad_norm=1.0, # if you enable 16-bit training then set this to a sensible value, 0.5 is a good default
    seed=42,
)

params = argparse.Namespace(**args_dict)

In [None]:
# Define the optimizer and the learning rate scheduler
optimizer = AdamW(model.parameters(), lr=params.learning_rate)
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps=params.warmup_steps, 
                                            num_training_steps=len(train_dataset)*10)



In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=params.train_batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=params.eval_batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=params.eval_batch_size, shuffle=True)

## Training

In [None]:
# # Define the training loop
# best_val_loss = float("inf")
# best_epoch = 0
# epochs = 20

# for epoch in range(epochs):
#   # Train the model
#   model.train()
#   train_loop = tqdm(train_loader, desc=f'Training Epoch {epoch+1}', leave=True)

#   train_loss = 0.0

#   for batch in train_loop:
#     input_ids = batch["source_ids"].to(device)
#     attention_mask = batch["source_mask"].to(device)
#     lm_labels = batch["target_ids"].to(device)
#     decoder_attention_mask = batch["target_mask"].to(device)

#     outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=lm_labels, decoder_attention_mask=decoder_attention_mask)

#     loss = outputs['loss']

#     optimizer.zero_grad()
#     loss.backward()
#     optimizer.step()
#     scheduler.step()

#     train_loss += loss.item()
#     train_loop.set_postfix(loss=loss.item())

#   train_loss /= len(train_loader)

#   # Evaluate the model on the validation set
#   model.eval()

#   val_loss = 0
#   with torch.no_grad():
#     for batch in val_loader:
#       input_ids = batch["source_ids"].to(device)
#       attention_mask = batch["source_mask"].to(device)
#       lm_labels = batch["target_ids"].to(device)
#       decoder_attention_mask = batch["target_mask"].to(device)

#       outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=lm_labels, decoder_attention_mask=decoder_attention_mask)

#       loss = outputs[0]
#       logits = outputs.logits.detach().cpu().numpy()

#       val_loss += loss.item()

#     val_loss /= len(val_loader)

#   if val_loss < best_val_loss:
#     best_epoch = epoch+1
#     best_val_loss = val_loss
#     # torch.save(model.state_dict(), "t5_sentiment_model.pt")
#     path = '/content/gdrive/MyDrive/NLP Project/models/T5_SA_clean20'

#     torch.save(model.state_dict(), path+'/model_parameters.pth')
  
#   print(f"Epoch {epoch + 1} - Val Loss: {val_loss:.4f}. \t Current best epoch is {best_epoch} with val loss - {best_val_loss:.4f}")

Training Epoch 1:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 1 - Val Loss: 0.9753. 	 Current best epoch is 1 with val loss - 0.9753


Training Epoch 2:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 2 - Val Loss: 0.4225. 	 Current best epoch is 2 with val loss - 0.4225


Training Epoch 3:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 3 - Val Loss: 0.3919. 	 Current best epoch is 3 with val loss - 0.3919


Training Epoch 4:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 4 - Val Loss: 0.3752. 	 Current best epoch is 4 with val loss - 0.3752


Training Epoch 5:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 5 - Val Loss: 0.3662. 	 Current best epoch is 5 with val loss - 0.3662


Training Epoch 6:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 6 - Val Loss: 0.3610. 	 Current best epoch is 6 with val loss - 0.3610


Training Epoch 7:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 7 - Val Loss: 0.3569. 	 Current best epoch is 7 with val loss - 0.3569


Training Epoch 8:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 8 - Val Loss: 0.3548. 	 Current best epoch is 8 with val loss - 0.3548


Training Epoch 9:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 9 - Val Loss: 0.3529. 	 Current best epoch is 9 with val loss - 0.3529


Training Epoch 10:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 10 - Val Loss: 0.3515. 	 Current best epoch is 10 with val loss - 0.3515


Training Epoch 11:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 11 - Val Loss: 0.3509. 	 Current best epoch is 11 with val loss - 0.3509


Training Epoch 12:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 12 - Val Loss: 0.3505. 	 Current best epoch is 12 with val loss - 0.3505


Training Epoch 13:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 13 - Val Loss: 0.3501. 	 Current best epoch is 13 with val loss - 0.3501


Training Epoch 14:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 14 - Val Loss: 0.3492. 	 Current best epoch is 14 with val loss - 0.3492


Training Epoch 15:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 15 - Val Loss: 0.3491. 	 Current best epoch is 15 with val loss - 0.3491


Training Epoch 16:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 16 - Val Loss: 0.3486. 	 Current best epoch is 16 with val loss - 0.3486


Training Epoch 17:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 17 - Val Loss: 0.3491. 	 Current best epoch is 16 with val loss - 0.3486


Training Epoch 18:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 18 - Val Loss: 0.3483. 	 Current best epoch is 18 with val loss - 0.3483


Training Epoch 19:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 19 - Val Loss: 0.3484. 	 Current best epoch is 18 with val loss - 0.3483


Training Epoch 20:   0%|          | 0/219 [00:00<?, ?it/s]

Epoch 20 - Val Loss: 0.3489. 	 Current best epoch is 18 with val loss - 0.3483


## Clean Testing

In [None]:
# Load model for evaluations

path = '/content/gdrive/MyDrive/NLP Project/models/T5_SA_clean20'
model.load_state_dict(torch.load(path+'/model_parameters.pth'))

<All keys matched successfully>

In [None]:
# Download data for Testing
df_test_clean = pd.read_csv('/content/gdrive/MyDrive/NLP Project/data/IMDB Dataset.csv')
df_test_clean = df_test_clean.rename(columns={'review':'text'})
df_test_clean = df_test_clean[['text', 'sentiment']]

df_test_clean['text'] = df_test_clean['text'].apply(clean_text)

# Find and delete any empty rows
empty_rows = df_test_clean[df_test_clean['text'].apply(lambda x: isinstance(x, str) and len(x.strip()) == 0)]
df_test_clean.drop(empty_rows.index, inplace=True)

# How much of the dataset to use
data_size = 0.2
df_test_clean = df_test_clean.sample(frac=data_size)

train_set, val_set = train_test_split(df_test_clean, test_size=0.3, random_state=42)
val_set, test_set = train_test_split(val_set, test_size=0.33, random_state=42)

test_dataset_clean = ImdbDataset(test_set, tokenizer)
test_loader_clean = torch.utils.data.DataLoader(test_dataset_clean, batch_size=params.eval_batch_size, shuffle=True)



In [None]:
model.eval()
outputs = []
targets = []
for batch in test_loader_clean:
  outs = model.generate(input_ids=batch['source_ids'].cuda(), 
                              attention_mask=batch['source_mask'].cuda(), 
                              max_length=2)

  dec = [tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]
  target = [tokenizer.decode(ids, skip_special_tokens=True) for ids in batch["target_ids"]]
  outputs.extend(dec)
  targets.extend(target)

In [None]:
print(f"Accuracy on Cleaned Data: {metrics.accuracy_score(targets, outputs)}")

Accuracy on Cleaned Data: 0.5070707070707071


In [None]:
print(f"Classification Report on Cleaned Data: \n\n{metrics.classification_report(targets, outputs)}")

Classification Report on Cleaned Data: 

              precision    recall  f1-score   support

    negative       0.52      0.50      0.51       505
    positive       0.50      0.52      0.51       485

    accuracy                           0.51       990
   macro avg       0.51      0.51      0.51       990
weighted avg       0.51      0.51      0.51       990



## X% NOISY TESTING

In [None]:
# Download data for Testing
df_test_noisy = pd.read_csv('/content/gdrive/MyDrive/NLP Project/data/IMDB Dataset.csv')
df_test_noisy = df_test_noisy.rename(columns={'review':'text'})
df_test_noisy = df_test_noisy[['text', 'sentiment']]

df_test_noisy['text'] = df_test_noisy['text'].apply(clean_text)

# Find and delete any empty rows
empty_rows = df_test_noisy[df_test_noisy['text'].apply(lambda x: isinstance(x, str) and len(x.strip()) == 0)]
df_test_noisy.drop(empty_rows.index, inplace=True)

# How much of the dataset to use
data_size = 0.2
df_test_noisy = df_test_noisy.sample(frac=data_size)

# Add NOISE
random.seed(42)
random_noise = random.uniform(0.05, 0.15)
print(f"Noise to be added: {random_noise}\n\n")

df_test_noisy = add_noise(df_test_noisy, augmentation_percentage=random_noise, task="sentiment_analysis")

train_set, val_set = train_test_split(df_test_noisy, test_size=0.3, random_state=42)
val_set, test_set = train_test_split(val_set, test_size=0.33, random_state=42)

test_dataset_noisy = ImdbDataset(test_set, tokenizer)
test_loader_noisy = torch.utils.data.DataLoader(test_dataset_noisy, batch_size=params.eval_batch_size, shuffle=True)

Noise to be added: 0.11394267984578837




[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [None]:
model.eval()
outputs = []
targets = []
for batch in test_loader_noisy:
  outs = model.generate(input_ids=batch['source_ids'].cuda(), 
                              attention_mask=batch['source_mask'].cuda(), 
                              max_length=2)

  dec = [tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]
  target = [tokenizer.decode(ids, skip_special_tokens=True) for ids in batch["target_ids"]]
  outputs.extend(dec)
  targets.extend(target)

In [None]:
print(f"Accuracy on {random_noise:.2f}% Noisy Data: {metrics.accuracy_score(targets, outputs)}")

Accuracy on 0.11% Noisy Data: 0.5161616161616162


In [None]:
print(f"Classification Report on {random_noise:.2f}% Noisy Data: \n\n{metrics.classification_report(targets, outputs)}")

Classification Report on 0.11% Noisy Data: 

              precision    recall  f1-score   support

    negative       0.53      0.49      0.51       505
    positive       0.51      0.54      0.52       485

    accuracy                           0.52       990
   macro avg       0.52      0.52      0.52       990
weighted avg       0.52      0.52      0.52       990



## JUST PREDICING STUFF

In [None]:
it = iter(val_loader)

In [None]:
batch = next(it)
batch["source_ids"].shape

torch.Size([32, 512])

In [None]:
outs = model.generate(input_ids=batch['source_ids'].cuda(), 
                              attention_mask=batch['source_mask'].cuda(), 
                              max_length=2)

dec = [tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]

texts = [tokenizer.decode(ids, skip_special_tokens=True) for ids in batch['source_ids']]
targets = [tokenizer.decode(ids, skip_special_tokens=True) for ids in batch['target_ids']]

In [None]:
for i in range(params.eval_batch_size):
    lines = textwrap.wrap("Review:\n%s\n" % texts[i], width=100)
    print("\n".join(lines))
    print("\nActual sentiment: %s" % targets[i])
    print("Predicted sentiment: %s" % dec[i])
    print("=====================================================================\n")

Review: poor film standard story match point certain intrigue direction writing certain fascination
woody allen mixing culture classic english murder exploring done itbr br scoop however none poorly
written two leads hopelessly wooden story interest genre spoofs requires least sort subplot witty
explanations tieups tarot cards keys kept french horns locked roomsbr br allens delightful witty
versions various hollywood genres curse jade scorpionpurple rose cairo etc given us much pleasure
years even hollywood ending great central idea sadly inspiration deserted time

Actual sentiment: negative
Predicted sentiment: negative

Review: finished watching film honestly say work art surprised see overall rating br br guy bring
together b listish movie cast make glorious characters given us movie fantastically diverse story
line much left imaginationbr br far many people wanting movies plot understood handed plateyet films
get poor reviews far predictablebr br film special get

Actual sentiment: