# Carga de modelos entrenados

A continuación procedemos a realizar la carga del modelo entrenado y realizar una prediccion

In [1]:
# Librerias
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf

# DataFrame
import pandas as pd

# Matplot
import matplotlib.pyplot as plt
%matplotlib inline

# Scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.manifold import TSNE
from sklearn.feature_extraction.text import TfidfVectorizer

# Keras
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, Embedding, Flatten, Conv1D, MaxPooling1D, LSTM
from keras import utils
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

# nltk
import nltk
from nltk.corpus import stopwords
from  nltk.stem import SnowballStemmer

# Word2vec
import gensim

# Utility
import re
import numpy as np
import os
from collections import Counter
import logging
import time
import pickle
import itertools

# Set log
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
tf.keras.backend.clear_session()  # Para restablecer fácilmente el estado del portátil.

In [2]:
# Cargar los objetos en binario
ruta_tokenizer = 'model/tokenizer.pkl'
ruta_encoder = 'model/encoder.pkl'

with open(ruta_tokenizer, 'rb') as tk:
    tokenizer = pickle.load(file=tk)
    
with open(ruta_encoder, 'rb') as ec:
    encoder = pickle.load(file=ec)

In [3]:
# Cargar el modelo de tensorflow
h5_file = 'model/model.h5'
model = tf.keras.models.load_model(h5_file)

In [4]:
# Cargar el modelo Word2Vec
w2v_file = 'model/model.w2v'
w2v_model = gensim.models.word2vec.Word2Vec.load(w2v_file)

2020-11-12 00:17:38,473 : INFO : loading Word2Vec object from model/model.w2v
2020-11-12 00:17:38,483 : INFO : loading wv recursively from model/model.w2v.wv.* with mmap=None
2020-11-12 00:17:38,483 : INFO : setting ignored attribute vectors_norm to None
2020-11-12 00:17:38,484 : INFO : loading vocabulary recursively from model/model.w2v.vocabulary.* with mmap=None
2020-11-12 00:17:38,486 : INFO : loading trainables recursively from model/model.w2v.trainables.* with mmap=None
2020-11-12 00:17:38,487 : INFO : setting ignored attribute cum_table to None
2020-11-12 00:17:38,488 : INFO : loaded model/model.w2v


A continuacion vamos a hacer unas pruebas rapidas para predecir un texto

In [5]:
# DATASET
DATASET_COLUMNS = ["target", "ids", "date", "flag", "user", "text"]
DATASET_ENCODING = "ISO-8859-1"
TRAIN_SIZE = 0.8

# TEXT CLENAING
TEXT_CLEANING_RE = "@\S+|https?:\S+|http?:\S|[^A-Za-z0-9]+"

# WORD2VEC 
W2V_SIZE = 300
W2V_WINDOW = 7
W2V_EPOCH = 32
W2V_MIN_COUNT = 10

# KERAS
SEQUENCE_LENGTH = 300
EPOCHS = 8
BATCH_SIZE = 1024

# SENTIMENT
POSITIVE = "POSITIVE"
NEGATIVE = "NEGATIVE"
NEUTRAL = "NEUTRAL"
SENTIMENT_THRESHOLDS = (0.4, 0.7)

# EXPORT
KERAS_MODEL = "model.h5"
WORD2VEC_MODEL = "model.w2v"
TOKENIZER_MODEL = "tokenizer.pkl"
ENCODER_MODEL = "encoder.pkl"

In [6]:
def decode_sentiment(score, include_neutral=True):
    if include_neutral:        
        label = NEUTRAL
        if score <= SENTIMENT_THRESHOLDS[0]:
            label = NEGATIVE
        elif score >= SENTIMENT_THRESHOLDS[1]:
            label = POSITIVE

        return label
    else:
        return NEGATIVE if score < 0.5 else POSITIVE

In [7]:
def predict(text, include_neutral=True):
    start_at = time.time()
    # Tokenize text
    x_test = pad_sequences(tokenizer.texts_to_sequences([text]), maxlen=SEQUENCE_LENGTH)
    # Predict
    score = model.predict([x_test])[0]
    # Decode sentiment
    label = decode_sentiment(score, include_neutral=include_neutral)

    return {"label": label, "score": float(score),
       "elapsed_time": time.time()-start_at}

In [8]:
# Computar prediccion
predict("Masacre en el cauca deja 10 heridos")

{'label': 'NEUTRAL',
 'score': 0.5317609906196594,
 'elapsed_time': 0.5492067337036133}