# Text Generation Demo Using the T5 model with encoder-decoder soft prompts

In [3]:
import os
import json
import gzip
import pandas as pd
from urllib.request import urlopen
import string


import numpy as np
import statistics as st
import glob
import sys
import io

import zipfile
import tarfile

import logging
from collections import Counter

import matplotlib
import matplotlib.pyplot as plt
from sklearn.utils import shuffle

from sklearn.model_selection import train_test_split
import torch
from sklearn.metrics import accuracy_score, f1_score
from tqdm.notebook import tqdm

import warnings
warnings.filterwarnings("ignore")
import math

In [4]:
#Set one cuda visible device if multiple GPUs are avialable
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0


In [5]:
#!pip install transformers

In [7]:
from transformers import (
    T5TokenizerFast,
    get_scheduler
)
import torch

from transformers.optimization import Adafactor, AdafactorSchedule
sys.path.append("..")

from model_classes.model_t5_encoder_decoder_prompt import T5PromptTuningLM


In [8]:
# Fix the seed to be able to get the same randomness across runs and hence reproducible outcomes
def get_device_and_set_seed(seed):
    """ Set all seeds to make results reproducible """
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    np.random.seed(seed)
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")
    return device
    
SEED = 123
device = get_device_and_set_seed(SEED)

In [9]:
class Config:
    
    num_train_epochs = 20
    learning_rate = 0.15
    warmup_steps = 500
    max_train_steps = num_train_epochs
    weight_decay=0.01
    batch_size = 10
    # Prompt-tuning
    # number of prompt tokens
    n_prompt_tokens = 20
    # If True, soft prompt will be initialized from vocab 
    # Otherwise, you can set `random_range` to initialize by randomization.
    init_from_vocab = True
    # random_range = 0.5
args = Config()

In [17]:
#load models

In [None]:
tokenizer = T5TokenizerFast.from_pretrained("google/t5-small-lm-adapt")
# Load the model
model_neg = T5PromptTuningLM.from_pretrained(
    "google/t5-small-lm-adapt",
    encoder_soft_prompt_path="./trained_models/t5_encoder_decoder/negative/encoder_soft_prompt_T5_neg.model",
    decoder_soft_prompt_path="./trained_models/t5_encoder_decoder/negative/decoder_soft_prompt_T5_neg.model",
    device=device
).to(device)
model_pos = T5PromptTuningLM.from_pretrained(
    "google/t5-small-lm-adapt",
    encoder_soft_prompt_path="./trained_models/t5_encoder_decoder/positive/encoder_soft_prompt_T5_pos.model",
    decoder_soft_prompt_path="./trained_models/t5_encoder_decoder/positive/decoder_soft_prompt_T5_pos.model",
    device=device
).to(device)
model_neg.eval()
model_pos.eval()
print(1)

# Positive review generation

In [13]:
test = "the movie was"

call = tokenizer(test, return_tensors="pt").input_ids

beam_outputs = model_pos.generate(
    input_ids=torch.tensor([call.tolist()[0][:-1]]).to(device), 
    decoder_input_ids=torch.zeros([1,1]).long().to(device), 
    min_length=200,
    max_length=200,
    num_beams=10,
    do_sample=True,
    no_repeat_ngram_size=1,  
    temperature = 1.0,
    top_k = 0,
    top_p = 0.8,
    repetition_penalty = 1.0,
    use_cache=False,
    early_stopping=True,
)

In [14]:
print(tokenizer.decode(beam_outputs[0], skip_special_tokens=True))

movie was a lot better than the previous one. I would recommend it to anyone who is interested in learning more about this series of movies and will be watching them again next time they are on DVD or Blu-ray, so please let me know what you think as we haven't seen anything like that yet! This film has been very popular with collectors for many years now but not quite sure how much money can go into making these films even though there were no major changes being made during their release period (or at least never had any problems). It really does give us an idea why people should buy books from other authors too...but only because Harry Potter fans could find some interesting stuff here....it just makes our lives easier when everyone else gets bored by reading every single book up until after Christmas!! Watching “The Star Wars: The End Of World War II” goes back over 2 decades since George Lucas lost his home country last year which gave him two billion dollars off


# Negative review generation

In [15]:
test = "the movie was"
call = tokenizer(test, return_tensors="pt").input_ids


beam_outputs = model_neg.generate(
    input_ids=torch.tensor([call.tolist()[0][:-1]]).to(device), 
    decoder_input_ids=torch.zeros([1,1]).long().to(device), 
    min_length=200,
    max_length=200,
    num_beams=10,
    do_sample=True,
    no_repeat_ngram_size=1,  
    temperature = 1.0,
    top_k = 0,
    top_p = 0.8,
    repetition_penalty = 1.0,
    use_cache=False,
    early_stopping=True,
)

In [16]:
print(tokenizer.decode(beam_outputs[0], skip_special_tokens=True))

movie was a waste of time and money in the first place, it would have been much better. It's not good for me but I think there is more to be said about this film than anything else that you can find on Netflix or any other online store like Amazon etc...I had no idea what they were doing at all! There are so many people who don’t watch movies without watching their own films....you really should check out my review here!! You guys did an amazing job with them as well!!! The Movie Was AWESOME OF MY FAVORITE WEEKEND DEFENSE CORRECTLY SHOPPING THE DIFFERENT PEOPLE HAD TO RECEIVE THROUGH THIS MOVIE AND WAS SO GOOD FOR EVERYONE WHO COULD HAVE BEEN OBTAINED BECAUSE IT IS LEGAL THAT WERE EXCITED WITH YOUR LIFE BUT YOU KNOW
