In [1]:
pip list

Package              Version
-------------------- -------------------
absl-py              0.11.0
appnope              0.1.0
argon2-cffi          20.1.0
astor                0.8.1
async-generator      1.10
attrs                20.2.0
backcall             0.2.0
bleach               3.2.1
certifi              2020.6.20
cffi                 1.14.3
chardet              3.0.4
decorator            4.4.2
defusedxml           0.6.0
entrypoints          0.3
gast                 0.2.2
google-pasta         0.2.0
gpt-2-simple         0.7.1
grpcio               1.31.0
h5py                 2.10.0
idna                 2.10
importlib-metadata   2.0.0
ipykernel            5.3.4
ipython              7.19.0
ipython-genutils     0.2.0
jedi                 0.17.2
Jinja2               2.11.2
jsonschema           3.2.0
jupyter-client       6.1.7
jupyter-core         4.6.3
jupyterlab-pygments  0.1.2
Keras-Applications   1.0.8
Keras-Preprocessing  1.1.0
Markdown             3.3.2
MarkupSafe           1.1.1
mis

## Generate text locally with GPT2 Simple

This tutorial assumes that you have trained/fine-tuned a new language model in our Google Colab example. Additionally, you should have created a new Anaconda environment (in this case called nlp) with Tensor Flow 1.15 and gpt-2-spimple installed.

<br>
You can disregard the TF2.0 warnings that will result

This following cell is required to prevent an annoying duplicate library error.

In [66]:
import gpt_2_simple as gpt2

In [67]:
import os
import time
import glob
import re
from subprocess import Popen, PIPE
os.environ['KMP_DUPLICATE_LIB_OK']='True'

The following cell is not required, but good to confirm that your python environment has properly installed `gpt-2-simple 0.7.1` and `tensorflow 1.15.0`

If this notebook is located in your new project folder `gpt2`, you will not need to change directories. Here we Print Working Directory `pwd` to confirm.

In [68]:
pwd

'/Users/ben/Dev/gpt2'

If you are not in the correct directory that contains your checkpoint folder, you can use `os.chdir()` to navigate to the proper dir

In [69]:
workdir = '/Users/ben/Dev/gpt2'

In [70]:
os.chdir(workdir) 

Make sure you have unzipped the .tar archive generated during the fine-tuning process. In this example, the checkpoint directory is `.../gpt2/checkpoint/ghandi` Regardless, the `run_name` should correspond with the folder name *inside* of 'checkpoint.' So you may need to change `run_name` to `'run1'` 

<br> 
Starting this TF session will take a minute or so, then you should get feedback that the chackpoint has been loaded

In [71]:
def gpt_text(prompt, run, sess):
    text = gpt2.generate(sess,
                        run_name=run,
                        length=30,
                        temperature=0.7,
                        prefix=prompt,
                        nsamples=1,
                        batch_size=1,
                        return_as_list=True
                        )
    return text

In [72]:
def split_text(text):
    stop = re.compile("\.")
    comma = re.compile(",")
    num_stops = len(stop.findall(text))
    if num_stops >= 2:
        return text.split(".")
    num_commas = len(comma.findall(text))
    if num_commas >= 2:
        return text.split(",")
    words = text.split()
    middle = int(len(words)/2)
    rtn = list([" ".join(words[:middle]), " ".join(words[middle:])])
    return rtn
    

In [73]:
def gpt_popen(prompt, run):
    cmd = workdir + "/gpt_2_simple generate --run_name " + run
    cmd += " --length 30 --temperature 0.7 --prefix \"" + prompt + "\""
    cmd += " --nsamples 1 --batch_size 1"
    p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
    (out, err) = p.communicate()
    if p.returncode != 0:
        print(out)
        print(err)
        print("ERROR!")
        
        return False
    gendir = os.path.join(workdir, "gen")
    list_of_files = glob.glob('{0}/gen/*'.format(workdir))
    latest_file = max(list_of_files, key=os.path.getctime)
    fcontents = open(latest_file).read()
    os.unlink(latest_file)
    rtn = split_text(fcontents)
    return rtn


In [74]:
def say_it(phrase):
    cmd = "say " + phrase
    os.system(cmd)

In [75]:
# sess = gpt2.start_tf_sess()
# gpt2.load_gpt2(sess, run_name='guardian1')
# gpt2.load_gpt2(sess, run_name='goldman1')
# this doesn't work because tensorflow won't allow two thingies at once
# needs a separate interpreter

Define a new string that will be the prompt for GPT2

In [76]:
prompt = 'A protest is a'

Ok now it's just my stuff I guess

In [77]:
turns = 5
for i in range(turns):
    text = gpt_popen(prompt, "guardian1")
    sentence = text[0]
    say_it(sentence)
    print(f'Guardian: "{sentence}"')
    four_words = " ".join(sentence.split()[0:4])
    prompt = four_words
    
    text = gpt_popen(prompt, "goldman1")
    sentence = text[1]
    say_it(sentence)
    print(f'Goldman: "{sentence}"')
    four_words = " ".join(sentence.split()[0:4])
    prompt = four_words
    

/Users/ben/Dev/gpt2/gpt_2_simple generate --run_name guardian1 --length 30 --temperature 0.7 --prefix "A protest is a" --nsamples 1 --batch_size 1
/Users/ben/Dev/gpt2/gpt_2_simple generate --run_name goldman1 --length 30 --temperature 0.7 --prefix "A protest is a" --nsamples 1 --batch_size 1
/Users/ben/Dev/gpt2/gpt_2_simple generate --run_name guardian1 --length 30 --temperature 0.7 --prefix "I went to see" --nsamples 1 --batch_size 1
/Users/ben/Dev/gpt2/gpt_2_simple generate --run_name goldman1 --length 30 --temperature 0.7 --prefix "I went to see" --nsamples 1 --batch_size 1
/Users/ben/Dev/gpt2/gpt_2_simple generate --run_name guardian1 --length 30 --temperature 0.7 --prefix "could have foreseen the" --nsamples 1 --batch_size 1
/Users/ben/Dev/gpt2/gpt_2_simple generate --run_name goldman1 --length 30 --temperature 0.7 --prefix "could have foreseen the" --nsamples 1 --batch_size 1
/Users/ben/Dev/gpt2/gpt_2_simple generate --run_name guardian1 --length 30 --temperature 0.7 --prefix "Bu