# Overview
* load model<br>
https://www.tensorflow.org/tutorials/keras/save_and_load#save_the_entire_model
* pass to function

# Dependencies

In [1]:
# general
from itertools import groupby
import re

# data
import numpy as np

# ml
import tensorflow as tf
from tensorflow import keras

# Constants

In [2]:
PATH_MODEL = "../nba_api_chatbot/models/ner"
PATH_VOCABULARY = "../nba_api_chatbot/data/vocabulary.txt"

MAPPING = {0: '[PAD]', 1: 'O', 2: 'B-PLAYER', 3: 'I-PLAYER', 4: 'B-STAT', 5: 'I-STAT'}  # via make_tag_lookup_table()

# Load Model
* https://stackoverflow.com/questions/48373845/loading-model-with-custom-loss-keras

In [3]:
ner_model = keras.models.load_model(PATH_MODEL, compile=False)

In [4]:
ner_model.summary()

Model: "ner_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 token_and_position_embeddin  multiple                 644096    
 g (TokenAndPositionEmbeddin                                     
 g)                                                              
                                                                 
 transformer_block (Transfor  multiple                 21120     
 merBlock)                                                       
                                                                 
 dropout_2 (Dropout)         multiple                  0         
                                                                 
 dense_2 (Dense)             multiple                  2112      
                                                                 
 dropout_3 (Dropout)         multiple                  0         
                                                         

In [5]:
# get vocabulary
with open(PATH_VOCABULARY, "r") as f:
    vocabulary = f.read().splitlines()

In [6]:
# get lookup layer
lookup_layer = keras.layers.StringLookup(
    vocabulary=vocabulary
)

# process text input
def tokenize_and_convert_to_ids(text):
    tokens = text.split(" ")
    return lookup_layer(tf.strings.lower(tokens))

# Predict

In [7]:
# Sample inference using the trained model

text = "How many field goals does Michael Jordan have?"

text_scrub = text.rstrip("?")

sample_input = tokenize_and_convert_to_ids(text_scrub)

sample_input = tf.reshape(sample_input, shape=[1, -1])
print(sample_input)

output = ner_model.predict(sample_input)
prediction = np.argmax(output, axis=-1)[0]
prediction = [MAPPING[i] for i in prediction]

# eu -> B-ORG, german -> B-MISC, british -> B-MISC
print(prediction)

tf.Tensor([[  1   2  19  23   5  90 108   3]], shape=(1, 8), dtype=int64)
['O', 'O', 'B-STAT', 'I-STAT', 'O', 'B-PLAYER', 'I-PLAYER', 'O']


# Extract

In [8]:
# regex = r"(?<=-).*"
# res = []
# i = 0
# for tag, chunk in groupby(re.findall(regex, tag) for tag in prediction):
    
#     entity = []
#     for j in chunk:
#         # print(i, j)
#         entity.append(tokens[i])
#         i += 1
    
#     if tag != []:
#         res.append((tag[0], " ".join(entity)))

# print(text)
# print(prediction)
# print(res)

In [9]:
def extract_tagged_values(text, prediction):
    
    tokens = text.split(" ")
    
    regex = r"(?<=-).*"  # get text after hyphen (e.g., "STAT" from "B-STAT")
    
    res = []
    i = 0
    for tag, chunk in groupby(re.findall(regex, tag) for tag in prediction):

        if tag != []:
            entity = []
            for j in chunk:
                # print(i, j)
                entity.append(tokens[i])
                i += 1

            res.append((tag[0], " ".join(entity)))
            
        else:
            i += len(list(chunk))

    return res

In [10]:
res = extract_tagged_values(text, prediction)
res

[('STAT', 'field goals'), ('PLAYER', 'Michael Jordan')]

In [15]:
# convert to dictionary
# assume only one of each
# if not, just take the first one
d_res = {}
for tup in res:
    if tup[0] not in d_res:
        d_res[tup[0]] = tup[1]
        
d_res

{'STAT': 'field goals', 'PLAYER': 'Michael Jordan'}

# Get NBA Data

In [16]:
from data import generate_data

In [14]:
import importlib  
nah = importlib.import_module("nba-api-chatbot.nba_api_helper")

In [18]:
# get stat_col
stat_col = generate_data.STATS_LOOKUP[d_res['STAT']]
stat_col

'FGM'

In [19]:
nah.get_total_stat_for_one_player(
    player_name=d_res['PLAYER'],
    stat=stat_col
)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): stats.nba.com:443
DEBUG:urllib3.connectionpool:https://stats.nba.com:443 "GET /stats/playercareerstats?LeagueID=&PerMode=Totals&PlayerID=893 HTTP/1.1" 200 3273


12192