# Text Generator Out of The Box

### Loading Libraries

In [3]:
# Numerical Computing
import numpy as np
import math

# Time
import time

# Data Manipulation
import polars as pl
import pandas as pd

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# Network
import networkx as nx

# Scikit-Learn
import sklearn
from sklearn.manifold import TSNE

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torchviz import make_dot
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import dataset
from torchvision import datasets, transforms
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from torch.nn import TransformerEncoder, TransformerEncoderLayer

### Placing HyperParameters

In [4]:
torch.manual_seed(799)
tkz = GPT2Tokenizer.from_pretrained("gpt2")
mdl = GPT2LMHeadModel.from_pretrained('gpt2')
ln = 10
cue = "They"
gen = tkz(cue, return_tensors="pt")
to_ret = gen["input_ids"][0]

In [5]:
prv=None

for i in range(ln):
    outputs = mdl(**gen)
    next_token_logits = torch.argmax(outputs.logits[-1, :])
    to_ret = torch.cat([to_ret, next_token_logits.unsqueeze(0)])
    gen = {"input_ids": to_ret}

seq = tkz.decode(to_ret)

print(seq)

They are not the only ones who are being targeted.


In [6]:
ip_ids = tkz.encode(cue, return_tensors='pt')
op_greedy = mdl.generate(ip_ids, max_length=ln, pad_token_id=tkz.eos_token_id)
seq = tkz.decode(op_greedy[0], skip_special_tokens=True)

print(seq)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


They are not the only ones who are being targeted


In [7]:
op_beam = mdl.generate(
    ip_ids, 
    max_length=5, 
    num_beams=3, 
    num_return_sequences=3,
    pad_token_id=tkz.eos_token_id
)

for op_beam_cur in op_beam:
    print(tkz.decode(op_beam_cur, skip_special_tokens=True))

They have a lot of
They have a lot to
They are not the only


In [8]:
for i in range(3):
    torch.manual_seed(i+10)
    op = mdl.generate(
        ip_ids, 
        do_sample=True, 
        max_length=5, 
        top_k=2,
        pad_token_id=tkz.eos_token_id
    )

    seq = tkz.decode(op[0], skip_special_tokens=True)
    print(seq)

They are the most important
They have a lot to
They are not going to


In [9]:
for i in range(3):
    torch.manual_seed(i+10)
    op_greedy = mdl.generate(ip_ids, max_length=5, pad_token_id=tkz.eos_token_id)
    seq = tkz.decode(op_greedy[0], skip_special_tokens=True)
    print(seq)

They are not the only
They are not the only
They are not the only


In [10]:
for i in range(3):
    torch.manual_seed(i+10)
    op = mdl.generate(
        ip_ids, 
        do_sample=True, 
        max_length=5, 
        top_p=0.75, 
        top_k=0,
        pad_token_id=tkz.eos_token_id
    )

    seq = tkz.decode(op[0], skip_special_tokens=True)
    print(seq)

They got them here in
They have also challenged foreign
They said it would be
