## 1.Dowloading Pretrained GPT-2 Small Model and Tokenizer from HuggingFace Transformers Library  

In [1]:
from transformers import GPT2Tokenizer, TFGPT2LMHeadModel
import tensorflow as tf
import numpy as np 
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = TFGPT2LMHeadModel.from_pretrained('gpt2',use_cache=False)

All model checkpoint weights were used when initializing TFGPT2LMHeadModel.

All the weights of TFGPT2LMHeadModel were initialized from the model checkpoint at gpt2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


In [2]:
# tokenizer encodes the text into Ids that we need to send to the model
ids = tokenizer.encode("who is a member of the National Security Council,and is a member of",return_tensors = 'tf')

## 2.As we want to generate Three sentences we have to pass three sentences same time so helps in Parallel Processing.

In [4]:
x = list()
for i in ["She was really good girl","She was really good girl","She was really good girl"]:
    ids = tokenizer.encode(i,return_tensors = 'tf')
    x.append(ids[0])

### 2.1. Sentences after Encoding.

In [5]:
np.array(x)

array([[3347,  373, 1107,  922, 2576],
       [3347,  373, 1107,  922, 2576],
       [3347,  373, 1107,  922, 2576]])

### 3.Constructing a TensorFlow Graph from the custom Tensorflow Model Code so we can use it with TF.Serving

In [7]:
callable = tf.function(model.call)
concrete_function = callable.get_concrete_function(tf.TensorSpec([None,None], tf.int32, name="input_ids"))

In [8]:
# We are only interested in the last word predicted 
logits = concrete_function(np.array(x))[0][:,-1]

In [9]:
# Here we are decoding the sample prediction sentence.
tokenizer.decode([np.argmax(concrete_function(np.array(x))[0][0][-1])])

'.'

<p>Now problem with above saved model is it generates an array of length 50,527 words per every request those are logits but we need only 1 best word and other disadvantage is payload as number of words increases payload also increases so to avoid that we need to select best word in the server itself and send the top sentences on every request to do that we need to do post processsing in the graph itself after prediciton below code does it for us.</p>

<h2>Function Performs Top_k sampling </h2>

In [10]:
@tf.function
def Top_k(logits,temperature=0.9,k=50):
    'Removes all the least considered words and keeps only the top k words'
    logits = tf.divide(logits,temperature)
#    Take top 50 and then divide the logits by its temperature so we can make low probalabe values even more probabale and less probable values even less probable
    values,_=tf.math.top_k(logits, k=50, sorted=True,name = "top50") #selecting Top 50 logits
#     select the minimum value from each axis and divide with temperature
    minimum_values = values[:,-1][:,tf.newaxis]
#     Now make all the values below this -1e*10 in the logits this happens by broadcasting 
    return tf.where(logits<minimum_values,tf.ones_like(logits, dtype=logits.dtype) * -1e10,logits)

<h2>Function to perform Top_p or Neucleus Sampling After Top_k sampling</h2>

In [11]:
@tf.function
def Top_p(logits, p=0.95):
    """Nucleus sampling This will only consider the words utill the specified probabilities"""
    batch = 3
    sorted_logits = tf.sort(logits, direction='DESCENDING', axis=-1)
    cumulative_probs = tf.cumsum(tf.nn.softmax(sorted_logits, axis=-1), axis=-1)
    indices = tf.stack([
        tf.range(0, batch),
        # number of indices to include
        tf.maximum(tf.reduce_sum(tf.cast(cumulative_probs <= p, tf.int32), axis=-1) - 1, 0),
    ], axis=-1)
    min_values = tf.gather_nd(sorted_logits, indices)[:,tf.newaxis]
    return tf.where(
        logits < min_values,
        tf.ones_like(logits) * -1e10,
        logits,
    )  

In [49]:
# tf.random.categorical(Top_p(Top_k(concrete_function(np.array(x))[0][:,-1],.9,50)),num_samples = 1)[:,-1,tf.newaxis]

<tf.Tensor: shape=(3, 1), dtype=int64, numpy=
array([[2474],
       [ 553],
       [ 553]], dtype=int64)>

### This Function Generates New Sentences and based on Given Input ids

In [2]:
@tf.function
def get_output(input_ids,top_k = 45,len = 7,top_p = .9,temp = .9):
    tokens = input_ids
    outputs = tf.random.categorical(Top_p(Top_k(concrete_function(tokens)[0][:,-1],temp,top_k)),num_samples = 1)
    tokens = tf.concat([tokens,tf.cast(outputs[:,-1,tf.newaxis],dtype = tf.int32)],1)
    for i in tf.range(len-1):
        tf.autograph.experimental.set_loop_options(
        shape_invariants=[(outputs, tf.TensorShape([None,None])),((tokens, tf.TensorShape([None,None])))])
        x = tf.random.categorical(Top_p(Top_k(concrete_function(tokens)[0][:,-1],temp,top_k),top_p),num_samples = 1)
        outputs = tf.concat([outputs,x],1)
        tokens = tf.concat([tokens,tf.cast(outputs[:,-1,tf.newaxis],dtype = tf.int32)],1)
    return outputs

In [225]:
tokenizer.decode(get_output(np.array(x))[1])

', and the young girl, she'

### generates Graph from above Get Output Function

In [226]:
function = get_output.get_concrete_function(tf.TensorSpec([None,None], tf.int32, name="input_ids"))

In [227]:
# sample output
function(np.array(x))

<tf.Tensor: shape=(3, 7), dtype=int64, numpy=
array([[   11,   257, 16365,   351,   257, 16365,   338],
       [  290,   257,  2933,    11,   475,   673,  1422],
       [   11,   508,    11,  1864,   284,   607,    11]], dtype=int64)>

# Saving the Model into tensorflow SavedModel Format so It can be used In TensorFlow serving

In [228]:
tf.saved_model.save(model,'Top_sentences_best/001',signatures = function)

INFO:tensorflow:Assets written to: Top_sentences_best/001\assets


In [229]:
model_ = tf.saved_model.load('Top_sentences_best/001')

In [230]:
model_.signatures['serving_default'](input_ids = tf.constant(np.array(x)))

{'output_0': <tf.Tensor: shape=(3, 7), dtype=int64, numpy=
 array([[ 508,  373,  531,  284,  423,  587, 2407],
        [  11,  508,  550,  587,  612,  329,  617],
        [ 290,  257, 4957,  508,  547, 3595,   13]], dtype=int64)>}

In [96]:
x = list()
for i in ["There lived a old witch","There lived a old witch","There lived a old witch"]:
    ids = tokenizer.encode(i,return_tensors = 'tf')
    x.append(ids[0])

In [91]:
tokens = tf.constant(np.array(x),dtype = tf.int32)
for i in range(10):
    outputs = tf.random.categorical(Top_p(Top_k(concrete_function(tokens)[0][:,-1]),0.95),num_samples = 1)
    tokens = tf.concat([tokens,tf.cast(outputs,dtype = tf.int32)],1)

In [92]:
for i in tokens:    
    print(tokenizer.decode(i))

There lived a old witch, but it was no longer able to live.
There lived a old witch named P. A. Taney who took over
There lived a old witch who lived with the white man in the castle.


In [650]:
callable = tf.function(model.call)

In [651]:
concrete_function = callable.get_concrete_function(tf.TensorSpec([None,None], tf.int32, name="input_ids"))

In [887]:
get_output(np.array(x)[0])

In [888]:
function = get_output.get_concrete_function(tf.TensorSpec([None], tf.int32, name="input_ids"))

In [828]:
outputs = function(np.array(x))

In [829]:
print(tokenizer.decode(outputs[0]))

There lived a old witch who lived in a castle, and a man of


In [890]:
tokenizer.decode(outputs[2])

'There lived a old witch, who died of her illness. And what the'

In [831]:
tf.saved_model.save(model,'Top_sentences_1',signatures = function)

INFO:tensorflow:Assets written to: Top_sentences_1\assets


In [832]:
model_ = tf.saved_model.load('Top_sentences_1')

In [835]:
words=model_.signatures['serving_default'](input_ids=tf.constant(np.array(x)))['output_0']

In [836]:
for i in words:
    print(tokenizer.decode(i))

There lived a old witch with her husband who used to tell her about her
There lived a old witch that lived in the house of the witches and the
There lived a old witch and her husband who would be executed for witchcraft.


In [157]:
import json

In [231]:
import requests
server_url = "http://localhost:8501/v1/models/TypeAhead:predict"

In [232]:
import numpy as np
np.array(x).shape

(3, 5)

In [252]:
input_data_json = json.dumps({"instances":np.array(x).tolist()})

In [253]:
input_data_json

'{"instances": [[1858, 5615, 257, 1468, 16365], [1858, 5615, 257, 1468, 16365], [1858, 5615, 257, 1468, 16365]]}'

In [266]:
response = requests.post(server_url,data = input_data_json)

In [267]:
response.text

'{\n    "predictions": [[508, 550, 1716, 257, 16365, 13, 1375], [508, 5615, 287, 262, 17012, 286, 465], [3706, 9074, 13, 376, 7737, 508, 550]\n    ]\n}'

In [268]:
# tf.constant(5).as_list()

In [269]:
for i in np.array(response.json()['predictions']):
    print(tokenizer.decode(i))

 who had become a witch. She
 who lived in the basement of his
 named Mrs. Fanny who had


In [186]:
x = list()
for i in ["Once upon a time there lived a king without fear whatsoever","Once upon a time there lived a king without fear whatsoever","Once upon a time there lived a king without fear whatsoever"]:
    ids = tokenizer.encode(i,return_tensors = 'tf')
    x.append(ids)

In [188]:
np.array(x)

array([[[ 7454,  2402,   257,   640,   612,  5615,   257,  5822,  1231,
          3252, 16014]],

       [[ 7454,  2402,   257,   640,   612,  5615,   257,  5822,  1231,
          3252, 16014]],

       [[ 7454,  2402,   257,   640,   612,  5615,   257,  5822,  1231,
          3252, 16014]]])

In [109]:
concrete_function(np.array(x))[0][0][-1]

<tf.Tensor: shape=(50257,), dtype=float32, numpy=
array([-74.626976, -75.38203 , -82.51564 , ..., -87.272064, -83.40596 ,
       -77.62991 ], dtype=float32)>

In [80]:
model(np.array(x[0]))

(<tf.Tensor: shape=(5, 50257), dtype=float32, numpy=
 array([[ -37.276787,  -36.41671 ,  -41.166496, ...,  -44.37204 ,
          -43.39425 ,  -37.471313],
        [-113.54544 , -113.27844 , -122.17086 , ..., -118.59855 ,
         -120.95412 , -117.01207 ],
        [-106.12589 , -104.31913 , -108.76358 , ..., -110.14669 ,
         -107.425575, -105.50971 ],
        [ -84.54696 ,  -82.34526 ,  -88.21153 , ...,  -88.45912 ,
          -89.08511 ,  -85.38495 ],
        [ -87.48384 ,  -89.14494 ,  -94.514565, ...,  -98.71225 ,
          -96.36986 ,  -91.682816]], dtype=float32)>,)

In [178]:
tokenizer.decode([np.argmax(model_.signatures['serving_default'](ids[0])['output_0'])])

'!'

In [179]:
tokenizer.decode(model_.signatures['serving_default'](ids[0])['output_1'])

',. of; and for in:." that to who\n but—,"... as the about at ; from or (!… on [ — which-- against.... among except. because upon - a before with" -- – when over? save'

In [69]:
tf.nn.softmax(tf.math.top_k(model(ids)[0][0][-1], k=50, sorted=True,name = "top50")[0])

<tf.Tensor: shape=(), dtype=float32, numpy=0.8558431>