# General Libraries & functions for Generator Files: GPT2...



## 0. Create progress reporting mechanism

In [None]:
from IPython.display import clear_output

progress_output = "generator_gpt2_libraries.ipynb:"

def update_report(progress_output, update_text):
  progress_output = progress_output + "\n" + update_text
  clear_output()
  print(progress_output)
  return progress_output

## 1. Importing and including libraries

In [None]:
print("Installing required libraries...")

!pip install tokenizers
!pip install transformers 

!pip install fastai==2.0.15
!pip install fastai2==0.0.30
!pip install fastcore==1.0.16

!pip install -Uqq fastbook

#Update progress
progress_output = update_report(progress_output,"Libraries Installed.")

In [None]:
print("Importing required libraries...")
import pandas as pd

from fastai.text.all import *

import fastbook
from fastbook import *
fastbook.setup_book()

# Import GPT2 tokenizer
from transformers import GPT2TokenizerFast # for documentation: https://huggingface.co/transformers/_modules/transformers/tokenization_gpt2.html

#Update progress
progress_output = update_report(progress_output,"Libraries Imported.")

## 2. Loading functions

In [None]:
# To process this data to train a model, we need to build a Transform that will be applied lazily.

class TransformersTokenizer(Transform):
    def __init__(self, tokenizer): self.tokenizer = tokenizer
    def encodes(self, x): 
        toks = self.tokenizer.tokenize(x)
        return tensor(self.tokenizer.convert_tokens_to_ids(toks))
    def decodes(self, x): return TitledStr(self.tokenizer.decode(x.cpu().numpy()))

In [None]:
# We use callbacks in case we want to alter the behavior of the training loop 
class DropOutput(Callback):
    def after_pred(self): self.learn.pred = self.pred[0]

In [None]:
# Generate Output
def gen_story(my_model, my_tokenizer, seed, max_len, 
              TEMP = 0.6,
              TOP_K = 40,
              TOP_P = 0.85):

  # take input
  prompt_ids = my_tokenizer.encode(seed)
  inp = tensor(prompt_ids)[None]#.cuda() # un-do .cuda() if no GPU available
 
  # generate output
  sample_outputs = my_model.generate(
                              inp,
                              do_sample = True, 
                              max_length = max_len,     
                              temperature = TEMP,
                              top_k = TOP_K, 
                              top_p = TOP_P, 
                              num_return_sequences = 1
                              )

  # Temperature is used to control the randomness of predictions by scaling the logits before applying softmax 
  # (small (0.2): model is more confident but also more conservative, large( 1.0): more diversity but also more mistakes)

  return my_tokenizer.decode(sample_outputs[0], skip_special_tokens = True)

In [None]:
model_path = data_path + "models/"

In [None]:
#Update progress
progress_output = update_report(progress_output,"Functions Loaded and available for use:\n    gen_story(my_model, my_tokenizer, seed, max_len)")

## 0. Clean up progress reporting mechanism

In [None]:
gpt2_libraries_progress = progress_output
del(progress_output)
del(update_report)