In [27]:
from nltk import tokenize
from joblib import load
# import warnings simplefilter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

In [35]:
texts = ["It was a lovely weekend.",
        "if you want a longterm relationship, no way",
        "You love this person in a way your parents dont!",
        "The president frequently injects his own words into statements he claims his supporters said on Fox News or elsewhere.",
        "Mr. T’s habit of putting words in the mouths of others are not just limited to impeachment."
        ]

In [None]:
def print_pred(texts, pred):
    for txt, p in zip(texts, pred):
        print("formal: ", txt) if p == 0 else print("informal: ", txt)

## Naíve Bayes using raw text

In [36]:
pipeline_validated = load("model/raw_nb.jbl")
pred = pipeline_validated.predict(texts)
print_pred(texts, pred)

formal:  It was a lovely weekend.
informal:  if you want a longterm relationship, no way
informal:  You love this person in a way your parents dont!
formal:  The president frequently injects his own words into statements he claims his supporters said on Fox News or elsewhere.
formal:  Mr. T’s habit of putting words in the mouths of others are not just limited to impeachment.


## Logistic Regression using raw text

In [37]:
pipeline_validated = load("model/raw_lg.jbl")
pred = pipeline_validated.predict(texts)
print_pred(texts, pred)

formal:  It was a lovely weekend.
informal:  if you want a longterm relationship, no way
informal:  You love this person in a way your parents dont!
formal:  The president frequently injects his own words into statements he claims his supporters said on Fox News or elsewhere.
formal:  Mr. T’s habit of putting words in the mouths of others are not just limited to impeachment.


## Desicion Tree using raw text

In [38]:
pipeline_validated = load("model/raw_dt.jbl")
pred = pipeline_validated.predict(texts)
print_pred(texts, pred)

informal:  It was a lovely weekend.
informal:  if you want a longterm relationship, no way
informal:  You love this person in a way your parents dont!
informal:  The president frequently injects his own words into statements he claims his supporters said on Fox News or elsewhere.
informal:  Mr. T’s habit of putting words in the mouths of others are not just limited to impeachment.


## LSTM 

In [39]:
from keras.models import load_model 
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical
import numpy as np

tokenizer = Tokenizer(num_words = 20000)
tokenizer.fit_on_texts(texts)
sequences_new = tokenizer.texts_to_sequences(texts)
new_texts = pad_sequences(sequences_new, maxlen = 1000) 

model = load_model("model/LSTM.h5") 
pred = np.argmax(model.predict(new_texts), axis=1)
print_pred(texts, pred)

formal:  It was a lovely weekend.
formal:  if you want a longterm relationship, no way
formal:  You love this person in a way your parents dont!
informal:  The president frequently injects his own words into statements he claims his supporters said on Fox News or elsewhere.
formal:  Mr. T’s habit of putting words in the mouths of others are not just limited to impeachment.


# features

In [None]:
vect_paths = ["vectorizer/ent_count_vectorizer.jbl",
              "vectorizer/ngram_count_vectorizer.jbl",
              ]

In [None]:
nb_paths = ["model/entity_nb.jbl",
            "model/entity_length_nb.jbl",
            "model/fast_number_nb.jbl"
            "model/n_gram_nb.jbl"
            "model/readability_nb.jbl"
            ]

lr_paths = ["model/entity_lr.jbl",
            "model/entity_length_lr.jbl",
            "model/fast_number_lr.jbl"
            "model/n_gram_lr.jbl"
            "model/readability_lr.jbl"
            ]

dt_paths = ["model/entity_dt.jbl",
            "model/entity_length_dt.jbl",
            "model/fast_number_dt.jbl"
            "model/n_gram_dt.jbl"
            "model/readability_dt.jbl"
            ]

In [None]:
def predict(clf_path, vect_path, text):
    """predict the category"""
    # load the clf and vectorizer
    clf = load(clf_path)
    loaded_vectorizer = load(vect_path)
    # vecotorizing
    X_input = loaded_vectorizer.transform(text)
    return clf.predict(X_input)[0]