# Generator File: GPT2-Medium Finetuned

## 1. Importing libraries

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
# Install required libraries

!pip install tokenizers
!pip install transformers 

!pip install fastai==2.0.15
!pip install fastai2==0.0.30
!pip install fastcore==1.0.16

!pip install -Uqq fastbook

#!pip install torch==1.6.0 torchvision==0.7.0 

Collecting fastai==2.0.15
  Using cached https://files.pythonhosted.org/packages/98/2e/d4dcc69f67b4557c8543a4c65d3e136b1929b01136b227ceb986e2596825/fastai-2.0.15-py3-none-any.whl
[31mERROR: fastbook 0.0.16 has requirement fastai>=2.1, but you'll have fastai 2.0.15 which is incompatible.[0m
Installing collected packages: fastai
  Found existing installation: fastai 2.3.1
    Uninstalling fastai-2.3.1:
      Successfully uninstalled fastai-2.3.1
Successfully installed fastai-2.0.15
Collecting fastcore==1.0.16
  Using cached https://files.pythonhosted.org/packages/99/c9/bd299caa1f1c002495bc9ffb98d31605e78a131a2ba3ba66a2682a7ab245/fastcore-1.0.16-py3-none-any.whl
[31mERROR: nbdev 1.1.14 has requirement fastcore>=1.3.19, but you'll have fastcore 1.0.16 which is incompatible.[0m
[31mERROR: fastrelease 0.1.11 has requirement fastcore>=1.3.13, but you'll have fastcore 1.0.16 which is incompatible.[0m
[31mERROR: fastbook 0.0.16 has requirement fastai>=2.1, but you'll have fastai 2.0.15 w

In [3]:
# Importing required libraries
import pandas as pd

from fastai.text.all import *

import fastbook
from fastbook import *
fastbook.setup_book()

## 2. Importing model

In [4]:
# Import GPT2 tokenizer
from transformers import GPT2TokenizerFast

# Load pre-trained model (weights)
pretrained_weights = 'gpt2-medium'

# Define tokenizer and model
tokenizer = GPT2TokenizerFast.from_pretrained(pretrained_weights, add_prefix_space=True)    # for documentation: https://huggingface.co/transformers/_modules/transformers/tokenization_gpt2.html

In [6]:
# To process this data to train a model, we need to build a Transform that will be applied lazily.

class TransformersTokenizer(Transform):
    def __init__(self, tokenizer): self.tokenizer = tokenizer
    def encodes(self, x): 
        toks = self.tokenizer.tokenize(x)
        return tensor(self.tokenizer.convert_tokens_to_ids(toks))
    def decodes(self, x): return TitledStr(self.tokenizer.decode(x.cpu().numpy()))

In [7]:
# We use callbacks in case we want to alter the behavior of the training loop 
class DropOutput(Callback):
    def after_pred(self): self.learn.pred = self.pred[0]

In [8]:
# tbd if to be removed
#class Status(Enum):
#    STATUS_OK=0
#    STATUS_ERR_NULL_POINTER=1
#    STATUS_ERR_INVALID_PARAMETER=2

#str(Status._value2member_map_[1])

'Status.STATUS_ERR_NULL_POINTER'

In [9]:
# Load model
model_path = "/content/gdrive/MyDrive/NLP/gpt2-finedtuned.pkl"

gpt2_tuned = load_learner(model_path)

## 3. Generating Text

In [10]:
# Generate Output

TEMP = 0.9     # Temperature is used to control the randomness of predictions by scaling the logits before applying softmax (small (0.2): model is more confident but also more conservative, large( 1.0): more diversity but also more mistakes)
TOP_K = 40
TOP_P = 0.85
NUM_SEQ = 1

def gen_story(my_model, seed, max_len):

  # take input
  prompt_ids = tokenizer.encode(seed)
  inp = tensor(prompt_ids)[None]#.cuda() # un-do .cuda() if no GPU available
 
  # generate output
  sample_outputs = my_model.generate(
                              inp,
                              do_sample = True, 
                              max_length = max_len,     
                              temperature = TEMP,
                              top_k = TOP_K, 
                              top_p = TOP_P, 
                              num_return_sequences = NUM_SEQ,

  )

  # Print Output
  for i, sample_output in enumerate(sample_outputs):
    print("{}: {}...".format(i, tokenizer.decode(sample_output, skip_special_tokens = True)))
    return

In [11]:
# Run if using GPU and if RuntimeError: Input, output and indices must be on the current device 
#device = "cuda:0"
#gpt2 = gpt2.to(device)

In [12]:
# Provided by evaluation/story-generator application
#seed = "Our story begins with an atrocious King, none of his laws ever made any sense. “Tuesday?” he laughed, “There is no such day, and I’ll hear no more of it as long as I am king.”"
#max_len = 350

In [13]:
# Generate output
#my_model = gpt2_tuned                   # insert model for text generation

#gen_story(my_model,seed,max_len)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


0:  Our story begins with an atrocious King, none of his laws ever made any sense. “Tuesday?” he laughed, “There is no such day, and I’ll hear no more of it as long as I am king.” “Now you are King, and I will listen to nothing but what you say.” “So, I say, what do you want me to do?” “Tell me how you want me to kill him, and I will kill him.” “I think you have the right idea,” he said. “Now I want you to take the dagger I have left for you, and make your way to the tower, and kill the king.” “Now, how do you do that?” “I don't know,” he said, shaking his head. “It may be hard to imagine,” he said, “but” “there was a way to do it.” “And it wasn't easy.” “It was difficult, and dangerous, and very risky.” “And, though it was easy,” “it was hard.” “But, eventually,” “I had to do it.” “And it was easy.” “And it was a simple thing,” he said, shaking his head. “But, eventually,” “I had to do it.” “And it was easy.” “And it was a simple thing,” he said. “And it...
