In [29]:
import os
import sys
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.layers import Dense, Input, GlobalMaxPooling1D
from keras.layers import Conv1D, MaxPooling1D, Embedding
from keras.models import Model
from keras.initializers import Constant
from keras.models import model_from_json
import tensorflow as tf
from tensorflow import keras
import pickle


config = tf.ConfigProto(
    device_count={'GPU': 1},
    intra_op_parallelism_threads=1,
    allow_soft_placement=True
)

config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.6

session = tf.Session(config=config)

keras.backend.set_session(session)

BASE_DIR = ''
GLOVE_DIR = os.path.join(BASE_DIR, 'glove.6B')
TEXT_DATA_DIR = os.path.join(BASE_DIR, '20_newsgroup')
MAX_SEQUENCE_LENGTH = 1000
MAX_NUM_WORDS = 20000
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.2

# second, prepare text samples and their labels
print('Processing text dataset')

texts = []  # list of text samples
labels_index = {}  # dictionary mapping label name to numeric id
index_to_label_dict = {}
labels = []  # list of label ids
for name in sorted(os.listdir(TEXT_DATA_DIR)):
    path = os.path.join(TEXT_DATA_DIR, name)
    if os.path.isdir(path):
        label_id = len(labels_index)
        labels_index[name] = label_id
        index_to_label_dict[label_id] = name
        for fname in sorted(os.listdir(path)):
            if fname.isdigit():
                fpath = os.path.join(path, fname)
                args = {} if sys.version_info < (3,) else {'encoding': 'latin-1'}
                with open(fpath, **args) as f:
                    t = f.read()
                    i = t.find('\n\n')  # skip header
                    if 0 < i:
                        t = t[i:]
                    texts.append(t)
                labels.append(label_id)

print('Found %s texts.' % len(texts))
#print(texts.shape)
print(labels[0])

print("loading model .....")
#set_session(session)

# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")
 
# evaluate loaded model on test data
loaded_model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
loaded_model._make_predict_function()
print("done")
#finally, vectorize the text samples into a 2D integer tensor
tokenizer = Tokenizer(num_words=MAX_NUM_WORDS)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)




def getNewsClassification(news_data):
    
     with session.as_default():
            with session.graph.as_default():
                newsList = []
                newsList.append(news_data)
                test_sequences = tokenizer.texts_to_sequences(newsList)
                test_data = pad_sequences(test_sequences, maxlen=MAX_SEQUENCE_LENGTH)
                nn_output = loaded_model.predict(test_data)
                print(nn_output)
                i=0
                news_clssification = {}
                for idx in np.argmax(nn_output, axis=1):
                    print("Category: ", index_to_label_dict[idx])
                    print("text: " , newsList[i])
                    print("=====================================")
                    news_clssification[index_to_label_dict[idx]] = newsList[i]
                    i = i + 1
            return news_clssification
pickle.dump(getNewsClassification, open('model_predict.plk', 'wb'), protocol=2)

Processing text dataset
Found 19997 texts.
0
loading model .....
Loaded model from disk
done


In [30]:
getn = pickle.load(open('model_predict.plk', 'rb'))

In [31]:
getn(news)

[[0.02514832 0.02083086 0.04633236 0.02526926 0.05171706 0.06452749
  0.01145351 0.02898723 0.09786203 0.01074342 0.00594416 0.0211697
  0.03547251 0.37474245 0.06428679 0.00505396 0.02395642 0.00696495
  0.02781193 0.05172555]]
Category:  sci.med
text:  Ther was a severe accident leading many people dead


{'sci.med': 'Ther was a severe accident leading many people dead'}

In [32]:
news="Ther was a severe accident leading many people dead"
getNewsClassification(news)

[[0.02514832 0.02083086 0.04633236 0.02526926 0.05171706 0.06452749
  0.01145351 0.02898723 0.09786203 0.01074342 0.00594416 0.0211697
  0.03547251 0.37474245 0.06428679 0.00505396 0.02395642 0.00696495
  0.02781193 0.05172555]]
Category:  sci.med
text:  Ther was a severe accident leading many people dead


{'sci.med': 'Ther was a severe accident leading many people dead'}