In [43]:
import pickle
import spacy
import numpy as np
import json,re
MODEL_PATH = "../model/classifier.p"
INDEX_TO_LABEL_FILE_PATH = "../metadata/index_to_label.json"


In [44]:
print ("Loading spacy model")
spacy_model = spacy.load("en_core_web_md")
print ("Done loading spacy model")

Loading spacy model
Done loading spacy model


In [45]:
def get_index_label_dict():
    
    index_to_label_dict = None
    try:
        with open(INDEX_TO_LABEL_FILE_PATH) as json_data:
            index_to_label_dict = json.load(json_data)
    except:
        print("Unable to read index to index to label dict")
    return index_to_label_dict

In [46]:
def lemmatize(text):
        sent = []
        doc = spacy_model(text)
        for word in doc:
            sent.append(word.lemma_)
        return " ".join(sent)

In [47]:
def clean_sentence(sent):
    #Remove all special characters except space,?
    sent = sent.lower()
    sent = sent.replace('\n', ' ')
    sent = re.sub('[^A-Za-z0-9 ?]+', ' ', sent)
    sent = re.sub(' +', ' ', sent)      #Removing adjascent spaces
    sent = re.sub('\?+', '?', sent)      #Removing adjascent spaces
    return sent.strip(" ")


In [48]:
##Not intended for production
##Ideally should be a singleton
def get_classifier(): 
        classifier = None
        print("Loading classifier")
        try:
            f = open(MODEL_PATH, 'rb')   # 'rb' for reading binary file
            classifier = pickle.load(f)
            print("Loaded classifier")
            f.close()
        except Exception as e :
            print("Exception in loading classifier:" + str(e))
        return classifier

In [53]:
def predict(query):
    """
     Accepts a query and classifies it using the trained model
     Applies the same preprocessing pipleine of clean, lemmatize before inference

    """
       
    model = get_classifier()
    if(model is not None):
        query_cleaned = clean_sentence(str(query))
        query_lemmatized = lemmatize(str(query_cleaned))
        prediction = model.predict_proba(np.array(spacy_model(query_lemmatized).vector).reshape(1,300))
        index_to_label_dict = get_index_label_dict()
        label_index = np.argmax(prediction)
        label = index_to_label_dict[str(label_index)]
        class_score = prediction[0][label_index]
        return {"label":label,
                "probability":class_score}
    else:
        print("Unable to load classifier")


predict("fake people")

Loading classifier
Loaded classifier


{'label': 'NEGATIVE', 'probability': 0.8922557220999955}