In [None]:
import json
import numpy as np
from tensorflow import keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

CATEGORICAL_CLASSIFIER_MODEL_PATH = 'categorical_classifier.h5'
INTENSITY_PREDICTOR_MODEL_PATH = 'intensity_predictor.h5'

categorical_classifier = keras.models.load_model(CATEGORICAL_CLASSIFIER_MODEL_PATH)
intensity_predictor = keras.models.load_model(INTENSITY_PREDICTOR_MODEL_PATH)

def preprocess_query(query, tokenizer, max_length=100):
    sequences = tokenizer.texts_to_sequences([query])
    padded = pad_sequences(sequences, maxlen=max_length)
    return padded

def predict_category_and_intensity(json_input):
    data = json.loads(json_input)
    query = data['query']

    tokenizer = Tokenizer(num_words=30000) #Since we are generating about 1000 queries, we can assume 30-ish words per query? If each word (worst-case) is distinct, the tokenizer needs a dictionary of 30000 words.

    processed_query = preprocess_query(query, tokenizer)

    category_pred = np.argmax(categorical_classifier.predict(processed_query), axis=-1)
    intensity_pred = np.argmax(intensity_predictor.predict(processed_query), axis=-1)

    return {
        "category_prediction": int(category_pred),
        "intensity_prediction": int(intensity_pred)
    }


