# Train Classifier For News Classification
> ## * fastText

In [None]:
import pickle
import html
import multiprocessing
from collections import namedtuple, OrderedDict
import re
import sys
import os
from glob import glob

from numba import jit

os.environ['KERAS_BACKEND']='tensorflow'

import numpy as np
from tqdm import tqdm
tqdm.pandas(desc="progress-bar")
import pandas as pd

from gensim.models import FastText, KeyedVectors
from gensim.models.doc2vec import TaggedDocument

from konlpy.utils import pprint

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, roc_curve,  accuracy_score, auc
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import scale, MinMaxScaler, LabelEncoder
from sklearn.manifold import TSNE
from sklearn.multiclass import OneVsRestClassifier

import keras.backend.tensorflow_backend as K
from keras.preprocessing import sequence
from keras_tqdm import TQDMCallback, TQDMNotebookCallback
from keras.models import Model, Sequential
from keras.regularizers import l2
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.layers import Input, Flatten, Dense, Embedding, embeddings, merge, Dropout, Activation,  LSTM, Bidirectional, SimpleRNN, GRU
from keras.layers.convolutional import Conv1D, Conv2D
from keras.layers.pooling import MaxPooling1D, GlobalMaxPooling1D
from keras.layers.normalization import BatchNormalization
from keras.layers.core import SpatialDropout1D
from keras.utils import np_utils
from tensorflow.python.client import device_lib
from keras.layers.merge import dot

import xgboost as xgb

import matplotlib.pyplot as plt


import bokeh.plotting as bp
from bokeh.models import HoverTool, BoxSelectTool
from bokeh.plotting import figure, show, output_notebook

In [None]:
print (device_lib.list_local_devices())

In [None]:
import Basic_Module as bm

In [None]:
cores = multiprocessing.cpu_count()
print (cores)

## Document Labeling

In [None]:
TaggedDocument = namedtuple('TaggedDocument', 'words tags category')

> * words : 기사에서 나온 단어들 or keywords
> * tags : 문서 tag
> * classes : category
>> 기사분류가 daum보다 naver에서 더 세분화되어 있기 때문에 네이버의 category 분류를 이용하기로 함

## Category

In [None]:
if os.path.isfile('./data/pre_data/news_tagged_data/pre_data_category_label_encoder_by_ct_for_fastText_news_classification.pickled'):
    le = pickle.load(open('./data/pre_data/news_tagged_data/pre_data_category_label_encoder_by_ct_for_fastText_news_classification.pickled','rb'))
else:
    le = LabelEncoder()
    le.fit(naverData['category'])
    pickle.dump(le, open('./data/pre_data/news_tagged_data/pre_data_category_label_encoder_by_ct_for_fastText_news_classification.pickled','wb'))
print (le.classes_)

In [None]:
if sys.platform =='darwin':
    loadModelPath = './news_model/'
elif sys.platform =='win32':
    loadModelPath = 'd:/news_model/'
saveTrainPath = './data/pre_data/news_train_test_Data2/'
saveClassifierPath = './data/pre_data/news_classifier/'

## Tagging Twitter

> 전 단계에서 필요한 사전 데이터는 만들어 둠

### Train Data Set & Test Data Set

In [None]:
trainName = './data/pre_data/news_train_test_Data/pre_data_fastText_train_for_news_classification_by_ct.pickled'
testName = './data/pre_data/news_train_test_Data/pre_data_fastText_test_for_news_classification_by_ct.pickled'

In [None]:
if os.path.isfile(trainName) & os.path.isfile(testName):
    train = pickle.load(open(trainName, 'rb'))
    test = pickle.load(open(testName, 'rb'))
else:
    train, test = train_test_split(w2v_docs, test_size = 0.15)
    pickle.dump(train,open(trainName,'wb'))
    pickle.dump(test,open(testName,'wb'))

### train set을 사용하여 Tf-Idf vectorizer을 만듦

In [None]:
tfidf = bm.Build_tfidf(train)

In [None]:
x_train = [ x.words for x in tqdm(train)] 
y_train = np.array([ x.category for x in tqdm(train)])
x_test = [ x.words for x in tqdm(test)] 
y_test = np.array([ x.category for x in tqdm(test)])

In [None]:
train_y2, test_y2 = bm.ReMake_Outcome(y_train, y_test)

### Load Model

In [None]:
model1 = FastText.load(loadModelPath+'fastText_size-500_epoch-20_ngrams-3_window-10_negative-7_hs-0_sg-0_cbow_mean-0_min_count-2_by-ct.model')
model2 = FastText.load(loadModelPath+'fastText_size-500_epoch-20_ngrams-3_window-10_negative-7_hs-0_sg-0_cbow_mean-1_min_count-2_by-ct.model')
model3 = FastText.load(loadModelPath+'fastText_size-500_epoch-20_ngrams-3_window-10_negative-7_hs-0_sg-1_cbow_mean-0_min_count-2_by-ct.model')

### model 1

In [None]:
wv1, train_vecs_w2v, test_vecs_w2v = bm.Make_Pre_Data(model1, tfidf, 500, train, test)
modelName = bm.Return_ModelName('fastText', model1,'ct')

#### t-SNE
> * t-분포 확률적 임베딩
> * 데이터의 차원 축소에 사용되는 기계 학습 알고리즘
> * 비선형 차원 축소 기법으로 고차원 데이터를 특히 2, 3차원 등으로 줄여 가시화하는데에 유용하게 사용
> * 비슷한 데이터는 근접한 2, 3차원의 지점으로, 다른 데이터는 멀리 떨어진 지점으로 맵핑
##### word : 10000

In [None]:
%%time
bm.Make_TSNE2(2, model1, wv1, 10000)

#### 분류모델 :  Logistic Regression

In [None]:
%%time
classifier = LogisticRegression(max_iter = 250, n_jobs = cores)
classifier.fit(train_vecs_w2v, train_y2)
print (classifier.get_params())
print( 'score : {}'.format(classifier.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier.predict(test_vecs_w2v)))
pickle.dump(classifier,open(saveClassifierPath+'LogisticRegression_'+modelName, 'wb'))

In [None]:
roc_auc_out1 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier, le)

#### 분류모델 : Random Forest Classifier

In [None]:
%%time
classifier2 = RandomForestClassifier(n_estimators = 75, n_jobs = cores)
classifier2.fit(train_vecs_w2v, train_y2)
print (classifier2.get_params())
print( 'score : {}'.format(classifier2.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier2.predict(test_vecs_w2v)))
pickle.dump(classifier2,open(saveClassifierPath+'RandomForestClassifier_'+modelName, 'wb'))

In [None]:
roc_auc_out2 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier2, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier2, le)

#### 분류모델 : C-Support Vector Classification

In [None]:
%%time
classifier3 =  SVC(kernel = 'rbf',
        cache_size= 1024, max_iter = 1500, verbose = True) 
classifier3.fit(train_vecs_w2v, train_y2)
print (classifier3.get_params())
print( 'score : {}'.format(classifier3.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier3.predict(test_vecs_w2v)))
pickle.dump(classifier3,open(saveClassifierPath+'SVC_'+modelName, 'wb'))

In [None]:
roc_auc_out3 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier3, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier3, le)

#### 분류모델 : XGBoost

In [None]:
%%time
dtrain = xgb.DMatrix(train_vecs_w2v, y_train)
dvalid = xgb.DMatrix(test_vecs_w2v, y_test)

In [None]:
%%time
max_depth = 5
subsample = 0.7
colsample_bytree = 0.7
params = {
    'objective' : 'multi:softmax', 
    'booster' : 'gbtree',
    'max_depth' : max_depth, 
    'subsample' : subsample,
    #'eval_metric' : 'auc', 
    'eval_metric' : 'mlogloss',
    'colsample_bytree' : colsample_bytree,
    'silent' : 1, 
    'eta' : 0.175,
    'nthread' : cores,
    'num_class' : 8
}
num_boost_round = 200
early_stopping_rounds = 10
test_size = 0.15

watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
gbm = xgb.train(params, dtrain, num_boost_round, evals = watchlist,
                early_stopping_rounds = early_stopping_rounds, verbose_eval = True)

test_prediction = gbm.predict(xgb.DMatrix(test_vecs_w2v))
test_acc = accuracy_score(y_test, test_prediction)
print (test_acc)
gbm.save_model(saveClassifierPath+'XGBoost_'+modelName)

In [None]:
roc_auc_out4 = bm.Roc_Curve_MultiClass(xgb.DMatrix(test_vecs_w2v), test_y2, gbm, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(xgb.DMatrix(train_vecs_w2v), train_y2, xgb.DMatrix(test_vecs_w2v), test_y2, gbm, le)

In [None]:
del classifier
del classifier2
del classifier3
del gbm

In [None]:
bm.Plot_Roc_Curver_Micro_Macro(roc_auc_out1, roc_auc_out2, roc_auc_out3, roc_auc_out4)

#### 분류모델  : Neural Network

In [None]:
y_test2 = np_utils.to_categorical(y_test,8)
y_train2 = np_utils.to_categorical(y_train,8)

In [None]:
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=500))
model.add(Dropout(0.3))
model.add(Dense(32, activation = 'sigmoid', kernel_regularizer=l2(0.1)))
model.add(Dropout(0.15))
model.add(Dense(8, activation='softmax'))
model.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_vecs_w2v, y_train2, epochs=300,  verbose=0,
          validation_split=0.2,
          callbacks=[TQDMNotebookCallback(show_inner=False)])
score, acc = model.evaluate(test_vecs_w2v, y_test2, verbose=0)
print('Score: %1.4f' % score)
print('Accuracy: %1.4f' % acc)
model.save(saveClassifierPath+'NeuralNetwork_1_'+modelName)

In [None]:
bm.plot_history(history)

In [None]:
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=500))
model.add(Dense(64, activation='sigmoid', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.15))
model.add(Dense(32, activation = 'relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.15))
model.add(Dense(8, activation='softmax'))
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_vecs_w2v, y_train2, epochs=300,  verbose=0,
          validation_split=0.2,
          callbacks=[TQDMNotebookCallback(show_inner=False)])
score, acc = model.evaluate(test_vecs_w2v, y_test2, verbose=0)
print('Score: %1.4f' % score)
print('Accuracy: %1.4f' % acc)
model.save(saveClassifierPath+'NeuralNetwork_2_'+modelName)

In [None]:
bm.plot_history(history)

### model 2

In [None]:
wv1, train_vecs_w2v, test_vecs_w2v = bm.Make_Pre_Data(model2, tfidf, 500, train, test)
modelName = bm.Return_ModelName('fastText', model2,'ct')

#### t-SNE
> * t-분포 확률적 임베딩
> * 데이터의 차원 축소에 사용되는 기계 학습 알고리즘
> * 비선형 차원 축소 기법으로 고차원 데이터를 특히 2, 3차원 등으로 줄여 가시화하는데에 유용하게 사용
> * 비슷한 데이터는 근접한 2, 3차원의 지점으로, 다른 데이터는 멀리 떨어진 지점으로 맵핑
##### word : 10000

In [None]:
%%time
bm.Make_TSNE2(2, model2, wv1, 10000)

#### 분류모델 :  Logistic Regression

In [None]:
%%time
classifier = LogisticRegression(max_iter = 250, n_jobs = cores)
classifier.fit(train_vecs_w2v, train_y2)
print (classifier.get_params())
print( 'score : {}'.format(classifier.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier.predict(test_vecs_w2v)))
pickle.dump(classifier,open(saveClassifierPath+'LogisticRegression_'+modelName, 'wb'))

In [None]:
roc_auc_out1 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier, le)

#### 분류모델 : Random Forest Classifier

In [None]:
%%time
classifier2 = RandomForestClassifier(n_estimators = 75, n_jobs = cores)
classifier2.fit(train_vecs_w2v, train_y2)
print (classifier2.get_params())
print( 'score : {}'.format(classifier2.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier2.predict(test_vecs_w2v)))
pickle.dump(classifier2,open(saveClassifierPath+'RandomForestClassifier_'+modelName, 'wb'))

In [None]:
roc_auc_out2 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier2, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier2, le)

#### 분류모델 : C-Support Vector Classification

In [None]:
%%time
classifier3 =  SVC(kernel = 'rbf',
        cache_size= 1024, max_iter = 1500, verbose = True) 
classifier3.fit(train_vecs_w2v, train_y2)
print (classifier3.get_params())
print( 'score : {}'.format(classifier3.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier3.predict(test_vecs_w2v)))
pickle.dump(classifier3,open(saveClassifierPath+'SVC_'+modelName, 'wb'))

In [None]:
roc_auc_out3 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier3, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier3, le)

#### 분류모델 : XGBoost

In [None]:
%%time
dtrain = xgb.DMatrix(train_vecs_w2v, y_train)
dvalid = xgb.DMatrix(test_vecs_w2v, y_test)

In [None]:
%%time
max_depth = 5
subsample = 0.7
colsample_bytree = 0.7
params = {
    'objective' : 'multi:softmax', 
    'booster' : 'gbtree',
    'max_depth' : max_depth, 
    'subsample' : subsample,
    #'eval_metric' : 'auc', 
    'eval_metric' : 'mlogloss',
    'colsample_bytree' : colsample_bytree,
    'silent' : 1, 
    'eta' : 0.175,
    'nthread' : cores,
    'num_class' : 8
}
num_boost_round = 200
early_stopping_rounds = 10
test_size = 0.15

watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
gbm = xgb.train(params, dtrain, num_boost_round, evals = watchlist,
                early_stopping_rounds = early_stopping_rounds, verbose_eval = True)

test_prediction = gbm.predict(xgb.DMatrix(test_vecs_w2v))
test_acc = accuracy_score(y_test, test_prediction)
print (test_acc)
gbm.save_model(saveClassifierPath+'XGBoost_'+modelName)

In [None]:
roc_auc_out4 = bm.Roc_Curve_MultiClass(xgb.DMatrix(test_vecs_w2v), test_y2, gbm, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(xgb.DMatrix(train_vecs_w2v), train_y2, xgb.DMatrix(test_vecs_w2v), test_y2, gbm, le)

In [None]:
del classifier
del classifier2
del classifier3
del gbm

In [None]:
bm.Plot_Roc_Curver_Micro_Macro(roc_auc_out1, roc_auc_out2, roc_auc_out3, roc_auc_out4)

#### 분류모델  : Neural Network

In [None]:
y_test2 = np_utils.to_categorical(y_test,8)
y_train2 = np_utils.to_categorical(y_train,8)

In [None]:
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=500))
model.add(Dropout(0.3))
model.add(Dense(32, activation = 'sigmoid', kernel_regularizer=l2(0.1)))
model.add(Dropout(0.15))
model.add(Dense(8, activation='softmax'))
model.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_vecs_w2v, y_train2, epochs=300,  verbose=0,
          validation_split=0.2,
          callbacks=[TQDMNotebookCallback(show_inner=False)])
score, acc = model.evaluate(test_vecs_w2v, y_test2, verbose=0)
print('Score: %1.4f' % score)
print('Accuracy: %1.4f' % acc)
model.save(saveClassifierPath+'NeuralNetwork_1_'+modelName)

In [None]:
bm.plot_history(history)

In [None]:
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=500))
model.add(Dense(64, activation='sigmoid', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.15))
model.add(Dense(32, activation = 'relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.15))
model.add(Dense(8, activation='softmax'))
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_vecs_w2v, y_train2, epochs=300,  verbose=0,
          validation_split=0.2,
          callbacks=[TQDMNotebookCallback(show_inner=False)])
score, acc = model.evaluate(test_vecs_w2v, y_test2, verbose=0)
print('Score: %1.4f' % score)
print('Accuracy: %1.4f' % acc)
model.save(saveClassifierPath+'NeuralNetwork_2_'+modelName)

In [None]:
bm.plot_history(history)

#### model 3

In [None]:
wv1, train_vecs_w2v, test_vecs_w2v = bm.Make_Pre_Data(model3, tfidf, 500, train, test)
modelName = bm.Return_ModelName('fastText', model3,'ct')

#### t-SNE
> * t-분포 확률적 임베딩
> * 데이터의 차원 축소에 사용되는 기계 학습 알고리즘
> * 비선형 차원 축소 기법으로 고차원 데이터를 특히 2, 3차원 등으로 줄여 가시화하는데에 유용하게 사용
> * 비슷한 데이터는 근접한 2, 3차원의 지점으로, 다른 데이터는 멀리 떨어진 지점으로 맵핑
##### word : 10000

In [None]:
%%time
bm.Make_TSNE2(2, model3, wv1, 10000)

#### 분류모델 :  Logistic Regression

In [None]:
%%time
classifier = LogisticRegression(max_iter = 250, n_jobs = cores)
classifier.fit(train_vecs_w2v, train_y2)
print (classifier.get_params())
print( 'score : {}'.format(classifier.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier.predict(test_vecs_w2v)))
pickle.dump(classifier,open(saveClassifierPath+'LogisticRegression_'+modelName, 'wb'))

In [None]:
roc_auc_out1 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier, le)

#### 분류모델 : Random Forest Classifier

In [None]:
%%time
classifier2 = RandomForestClassifier(n_estimators = 75, n_jobs = cores)
classifier2.fit(train_vecs_w2v, train_y2)
print (classifier2.get_params())
print( 'score : {}'.format(classifier2.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier2.predict(test_vecs_w2v)))
pickle.dump(classifier2,open(saveClassifierPath+'RandomForestClassifier_'+modelName, 'wb'))

In [None]:
roc_auc_out2 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier2, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier2, le)

#### 분류모델 : C-Support Vector Classification

In [None]:
%%time
classifier3 =  SVC(kernel = 'rbf',
        cache_size= 1024, max_iter = 1500, verbose = True) 
classifier3.fit(train_vecs_w2v, train_y2)
print (classifier3.get_params())
print( 'score : {}'.format(classifier3.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier3.predict(test_vecs_w2v)))
pickle.dump(classifier3,open(saveClassifierPath+'SVC_'+modelName, 'wb'))

In [None]:
roc_auc_out3 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier3, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier3, le)

#### 분류모델 : XGBoost

In [None]:
%%time
dtrain = xgb.DMatrix(train_vecs_w2v, y_train)
dvalid = xgb.DMatrix(test_vecs_w2v, y_test)

In [None]:
%%time
max_depth = 5
subsample = 0.7
colsample_bytree = 0.7
params = {
    'objective' : 'multi:softmax', 
    'booster' : 'gbtree',
    'max_depth' : max_depth, 
    'subsample' : subsample,
    #'eval_metric' : 'auc', 
    'eval_metric' : 'mlogloss',
    'colsample_bytree' : colsample_bytree,
    'silent' : 1, 
    'eta' : 0.175,
    'nthread' : cores,
    'num_class' : 8
}
num_boost_round = 200
early_stopping_rounds = 10
test_size = 0.15

watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
gbm = xgb.train(params, dtrain, num_boost_round, evals = watchlist,
                early_stopping_rounds = early_stopping_rounds, verbose_eval = True)

test_prediction = gbm.predict(xgb.DMatrix(test_vecs_w2v))
test_acc = accuracy_score(y_test, test_prediction)
print (test_acc)
gbm.save_model(saveClassifierPath+'XGBoost_'+modelName)

In [None]:
roc_auc_out4 = bm.Roc_Curve_MultiClass(xgb.DMatrix(test_vecs_w2v), test_y2, gbm, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(xgb.DMatrix(train_vecs_w2v), train_y2, xgb.DMatrix(test_vecs_w2v), test_y2, gbm, le)

In [None]:
del classifier
del classifier2
del classifier3
del gbm

In [None]:
bm.Plot_Roc_Curver_Micro_Macro(roc_auc_out1, roc_auc_out2, roc_auc_out3, roc_auc_out4)

#### 분류모델  : Neural Network

In [None]:
y_test2 = np_utils.to_categorical(y_test,8)
y_train2 = np_utils.to_categorical(y_train,8)

In [None]:
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=500))
model.add(Dropout(0.3))
model.add(Dense(32, activation = 'sigmoid', kernel_regularizer=l2(0.1)))
model.add(Dropout(0.15))
model.add(Dense(8, activation='softmax'))
model.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_vecs_w2v, y_train2, epochs=300,  verbose=0,
          validation_split=0.2,
          callbacks=[TQDMNotebookCallback(show_inner=False)])
score, acc = model.evaluate(test_vecs_w2v, y_test2, verbose=0)
print('Score: %1.4f' % score)
print('Accuracy: %1.4f' % acc)
model.save(saveClassifierPath+'NeuralNetwork_1_'+modelName)

In [None]:
bm.plot_history(history)

In [None]:
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=500))
model.add(Dense(64, activation='sigmoid', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.15))
model.add(Dense(32, activation = 'relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.15))
model.add(Dense(8, activation='softmax'))
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_vecs_w2v, y_train2, epochs=300,  verbose=0,
          validation_split=0.2,
          callbacks=[TQDMNotebookCallback(show_inner=False)])
score, acc = model.evaluate(test_vecs_w2v, y_test2, verbose=0)
print('Score: %1.4f' % score)
print('Accuracy: %1.4f' % acc)
model.save(saveClassifierPath+'NeuralNetwork_2_'+modelName)

In [None]:
bm.plot_history(history)

## Tagging Mecab

> 전 단계에서 필요한 사전 데이터는 만들어 둠

### Train Data Set & Test Data Set

In [None]:
trainName = './data/pre_data/news_train_test_Data/pre_data_fastText_train_for_news_classification_by_mecab.pickled'
testName = './data/pre_data/news_train_test_Data/pre_data_fastText_test_for_news_classification_by_mecab.pickled'

In [None]:
if os.path.isfile(trainName) & os.path.isfile(testName):
    train = pickle.load(open(trainName, 'rb'))
    test = pickle.load(open(testName, 'rb'))
else:
    train, test = train_test_split(w2v_docs, test_size = 0.15)
    pickle.dump(train,open(trainName,'wb'))
    pickle.dump(test,open(testName,'wb'))

### train set을 사용하여 Tf-Idf vectorizer을 만듦

In [None]:
tfidf = bm.Build_tfidf(train)

In [None]:
x_train = [ x.words for x in tqdm(train)] 
y_train = np.array([ x.category for x in tqdm(train)])
x_test = [ x.words for x in tqdm(test)] 
y_test = np.array([ x.category for x in tqdm(test)])

In [None]:
train_y2, test_y2 = bm.ReMake_Outcome(y_train, y_test)

### Load Model

In [None]:
model1 = FastText.load(loadModelPath+'fastText_size-500_epoch-20_ngrams-3_window-10_negative-7_hs-0_sg-0_cbow_mean-0_min_count-2_by-mecab.model')
model2 = FastText.load(loadModelPath+'fastText_size-500_epoch-20_ngrams-3_window-10_negative-7_hs-0_sg-0_cbow_mean-1_min_count-2_by-mecab.model')
model3 = FastText.load(loadModelPath+'fastText_size-500_epoch-20_ngrams-3_window-10_negative-7_hs-0_sg-1_cbow_mean-0_min_count-2_by-mecab.model')

### model 1

In [None]:
wv1, train_vecs_w2v, test_vecs_w2v = bm.Make_Pre_Data(model1, tfidf, 500, train, test)
modelName = bm.Return_ModelName('fastText', model1,'mecab')

#### t-SNE
> * t-분포 확률적 임베딩
> * 데이터의 차원 축소에 사용되는 기계 학습 알고리즘
> * 비선형 차원 축소 기법으로 고차원 데이터를 특히 2, 3차원 등으로 줄여 가시화하는데에 유용하게 사용
> * 비슷한 데이터는 근접한 2, 3차원의 지점으로, 다른 데이터는 멀리 떨어진 지점으로 맵핑
##### word : 10000

In [None]:
%%time
bm.Make_TSNE2(2, model1, wv1, 10000)

#### 분류모델 :  Logistic Regression

In [None]:
%%time
classifier = LogisticRegression(max_iter = 250, n_jobs = cores)
classifier.fit(train_vecs_w2v, train_y2)
print (classifier.get_params())
print( 'score : {}'.format(classifier.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier.predict(test_vecs_w2v)))
pickle.dump(classifier,open(saveClassifierPath+'LogisticRegression_'+modelName, 'wb'))

In [None]:
roc_auc_out1 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier, le)

#### 분류모델 : Random Forest Classifier

In [None]:
%%time
classifier2 = RandomForestClassifier(n_estimators = 75, n_jobs = cores)
classifier2.fit(train_vecs_w2v, train_y2)
print (classifier2.get_params())
print( 'score : {}'.format(classifier2.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier2.predict(test_vecs_w2v)))
pickle.dump(classifier2,open(saveClassifierPath+'RandomForestClassifier_'+modelName, 'wb'))

In [None]:
roc_auc_out2 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier2, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier2, le)

#### 분류모델 : C-Support Vector Classification

In [None]:
%%time
classifier3 =  SVC(kernel = 'rbf',
        cache_size= 1024, max_iter = 1500, verbose = True) 
classifier3.fit(train_vecs_w2v, train_y2)
print (classifier3.get_params())
print( 'score : {}'.format(classifier3.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier3.predict(test_vecs_w2v)))
pickle.dump(classifier3,open(saveClassifierPath+'SVC_'+modelName, 'wb'))

In [None]:
roc_auc_out3 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier3, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier3, le)

#### 분류모델 : XGBoost

In [None]:
%%time
dtrain = xgb.DMatrix(train_vecs_w2v, y_train)
dvalid = xgb.DMatrix(test_vecs_w2v, y_test)

In [None]:
%%time
max_depth = 5
subsample = 0.7
colsample_bytree = 0.7
params = {
    'objective' : 'multi:softmax', 
    'booster' : 'gbtree',
    'max_depth' : max_depth, 
    'subsample' : subsample,
    #'eval_metric' : 'auc', 
    'eval_metric' : 'mlogloss',
    'colsample_bytree' : colsample_bytree,
    'silent' : 1, 
    'eta' : 0.175,
    'nthread' : cores,
    'num_class' : 8
}
num_boost_round = 200
early_stopping_rounds = 10
test_size = 0.15

watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
gbm = xgb.train(params, dtrain, num_boost_round, evals = watchlist,
                early_stopping_rounds = early_stopping_rounds, verbose_eval = True)

test_prediction = gbm.predict(xgb.DMatrix(test_vecs_w2v))
test_acc = accuracy_score(y_test, test_prediction)
print (test_acc)
gbm.save_model(saveClassifierPath+'XGBoost_'+modelName)

In [None]:
roc_auc_out4 = bm.Roc_Curve_MultiClass(xgb.DMatrix(test_vecs_w2v), test_y2, gbm, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(xgb.DMatrix(train_vecs_w2v), train_y2, xgb.DMatrix(test_vecs_w2v), test_y2, gbm, le)

In [None]:
del classifier
del classifier2
del classifier3
del gbm

In [None]:
bm.Plot_Roc_Curver_Micro_Macro(roc_auc_out1, roc_auc_out2, roc_auc_out3, roc_auc_out4)

#### 분류모델  : Neural Network

In [None]:
y_test2 = np_utils.to_categorical(y_test,8)
y_train2 = np_utils.to_categorical(y_train,8)

In [None]:
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=500))
model.add(Dropout(0.3))
model.add(Dense(32, activation = 'sigmoid', kernel_regularizer=l2(0.1)))
model.add(Dropout(0.15))
model.add(Dense(8, activation='softmax'))
model.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_vecs_w2v, y_train2, epochs=300,  verbose=0,
          validation_split=0.2,
          callbacks=[TQDMNotebookCallback(show_inner=False)])
score, acc = model.evaluate(test_vecs_w2v, y_test2, verbose=0)
print('Score: %1.4f' % score)
print('Accuracy: %1.4f' % acc)
model.save(saveClassifierPath+'NeuralNetwork_1_'+modelName)

In [None]:
bm.plot_history(history)

In [None]:
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=500))
model.add(Dense(64, activation='sigmoid', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.15))
model.add(Dense(32, activation = 'relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.15))
model.add(Dense(8, activation='softmax'))
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_vecs_w2v, y_train2, epochs=300,  verbose=0,
          validation_split=0.2,
          callbacks=[TQDMNotebookCallback(show_inner=False)])
score, acc = model.evaluate(test_vecs_w2v, y_test2, verbose=0)
print('Score: %1.4f' % score)
print('Accuracy: %1.4f' % acc)
model.save(saveClassifierPath+'NeuralNetwork_2_'+modelName)

In [None]:
bm.plot_history(history)

### model 2

In [None]:
wv1, train_vecs_w2v, test_vecs_w2v = bm.Make_Pre_Data(model2, tfidf, 500, train, test)
modelName = bm.Return_ModelName('fastText', model2,'mecab')

#### t-SNE
> * t-분포 확률적 임베딩
> * 데이터의 차원 축소에 사용되는 기계 학습 알고리즘
> * 비선형 차원 축소 기법으로 고차원 데이터를 특히 2, 3차원 등으로 줄여 가시화하는데에 유용하게 사용
> * 비슷한 데이터는 근접한 2, 3차원의 지점으로, 다른 데이터는 멀리 떨어진 지점으로 맵핑
##### word : 10000

In [None]:
%%time
bm.Make_TSNE2(2, model2, wv1, 10000)

#### 분류모델 :  Logistic Regression

In [None]:
%%time
classifier = LogisticRegression(max_iter = 250, n_jobs = cores)
classifier.fit(train_vecs_w2v, train_y2)
print (classifier.get_params())
print( 'score : {}'.format(classifier.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier.predict(test_vecs_w2v)))
pickle.dump(classifier,open(saveClassifierPath+'LogisticRegression_'+modelName, 'wb'))

In [None]:
roc_auc_out1 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier, le)

#### 분류모델 : Random Forest Classifier

In [None]:
%%time
classifier2 = RandomForestClassifier(n_estimators = 75, n_jobs = cores)
classifier2.fit(train_vecs_w2v, train_y2)
print (classifier2.get_params())
print( 'score : {}'.format(classifier2.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier2.predict(test_vecs_w2v)))
pickle.dump(classifier2,open(saveClassifierPath+'RandomForestClassifier_'+modelName, 'wb'))

In [None]:
roc_auc_out2 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier2, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier2, le)

#### 분류모델 : C-Support Vector Classification

In [None]:
%%time
classifier3 =  SVC(kernel = 'rbf',
        cache_size= 1024, max_iter = 1500, verbose = True) 
classifier3.fit(train_vecs_w2v, train_y2)
print (classifier3.get_params())
print( 'score : {}'.format(classifier3.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier3.predict(test_vecs_w2v)))
pickle.dump(classifier3,open(saveClassifierPath+'SVC_'+modelName, 'wb'))

In [None]:
roc_auc_out3 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier3, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier3, le)

#### 분류모델 : XGBoost

In [None]:
%%time
dtrain = xgb.DMatrix(train_vecs_w2v, y_train)
dvalid = xgb.DMatrix(test_vecs_w2v, y_test)

In [None]:
%%time
max_depth = 5
subsample = 0.7
colsample_bytree = 0.7
params = {
    'objective' : 'multi:softmax', 
    'booster' : 'gbtree',
    'max_depth' : max_depth, 
    'subsample' : subsample,
    #'eval_metric' : 'auc', 
    'eval_metric' : 'mlogloss',
    'colsample_bytree' : colsample_bytree,
    'silent' : 1, 
    'eta' : 0.175,
    'nthread' : cores,
    'num_class' : 8
}
num_boost_round = 200
early_stopping_rounds = 10
test_size = 0.15

watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
gbm = xgb.train(params, dtrain, num_boost_round, evals = watchlist,
                early_stopping_rounds = early_stopping_rounds, verbose_eval = True)

test_prediction = gbm.predict(xgb.DMatrix(test_vecs_w2v))
test_acc = accuracy_score(y_test, test_prediction)
print (test_acc)
gbm.save_model(saveClassifierPath+'XGBoost_'+modelName)

In [None]:
roc_auc_out4 = bm.Roc_Curve_MultiClass(xgb.DMatrix(test_vecs_w2v), test_y2, gbm, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(xgb.DMatrix(train_vecs_w2v), train_y2, xgb.DMatrix(test_vecs_w2v), test_y2, gbm, le)

In [None]:
del classifier
del classifier2
del classifier3
del gbm

In [None]:
bm.Plot_Roc_Curver_Micro_Macro(roc_auc_out1, roc_auc_out2, roc_auc_out3, roc_auc_out4)

#### 분류모델  : Neural Network

In [None]:
y_test2 = np_utils.to_categorical(y_test,8)
y_train2 = np_utils.to_categorical(y_train,8)

In [None]:
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=500))
model.add(Dropout(0.3))
model.add(Dense(32, activation = 'sigmoid', kernel_regularizer=l2(0.1)))
model.add(Dropout(0.15))
model.add(Dense(8, activation='softmax'))
model.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_vecs_w2v, y_train2, epochs=300,  verbose=0,
          validation_split=0.2,
          callbacks=[TQDMNotebookCallback(show_inner=False)])
score, acc = model.evaluate(test_vecs_w2v, y_test2, verbose=0)
print('Score: %1.4f' % score)
print('Accuracy: %1.4f' % acc)
model.save(saveClassifierPath+'NeuralNetwork_1_'+modelName)

In [None]:
bm.plot_history(history)

In [None]:
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=500))
model.add(Dense(64, activation='sigmoid', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.15))
model.add(Dense(32, activation = 'relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.15))
model.add(Dense(8, activation='softmax'))
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_vecs_w2v, y_train2, epochs=300,  verbose=0,
          validation_split=0.2,
          callbacks=[TQDMNotebookCallback(show_inner=False)])
score, acc = model.evaluate(test_vecs_w2v, y_test2, verbose=0)
print('Score: %1.4f' % score)
print('Accuracy: %1.4f' % acc)
model.save(saveClassifierPath+'NeuralNetwork_2_'+modelName)

In [None]:
bm.plot_history(history)

#### model 3

In [None]:
wv1, train_vecs_w2v, test_vecs_w2v = bm.Make_Pre_Data(model3, tfidf, 500, train, test)
modelName = bm.Return_ModelName('fastText', model3,'mecab')

#### t-SNE
> * t-분포 확률적 임베딩
> * 데이터의 차원 축소에 사용되는 기계 학습 알고리즘
> * 비선형 차원 축소 기법으로 고차원 데이터를 특히 2, 3차원 등으로 줄여 가시화하는데에 유용하게 사용
> * 비슷한 데이터는 근접한 2, 3차원의 지점으로, 다른 데이터는 멀리 떨어진 지점으로 맵핑
##### word : 10000

In [None]:
%%time
bm.Make_TSNE2(2, model3, wv1, 10000)

#### 분류모델 :  Logistic Regression

In [None]:
%%time
classifier = LogisticRegression(max_iter = 250, n_jobs = cores)
classifier.fit(train_vecs_w2v, train_y2)
print (classifier.get_params())
print( 'score : {}'.format(classifier.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier.predict(test_vecs_w2v)))
pickle.dump(classifier,open(saveClassifierPath+'LogisticRegression_'+modelName, 'wb'))

In [None]:
roc_auc_out1 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier, le)

#### 분류모델 : Random Forest Classifier

In [None]:
%%time
classifier2 = RandomForestClassifier(n_estimators = 75, n_jobs = cores)
classifier2.fit(train_vecs_w2v, train_y2)
print (classifier2.get_params())
print( 'score : {}'.format(classifier2.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier2.predict(test_vecs_w2v)))
pickle.dump(classifier2,open(saveClassifierPath+'RandomForestClassifier_'+modelName, 'wb'))

In [None]:
roc_auc_out2 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier2, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier2, le)

#### 분류모델 : C-Support Vector Classification

In [None]:
%%time
classifier3 =  SVC(kernel = 'rbf',
        cache_size= 1024, max_iter = 1500, verbose = True) 
classifier3.fit(train_vecs_w2v, train_y2)
print (classifier3.get_params())
print( 'score : {}'.format(classifier3.score(test_vecs_w2v, test_y2)))
print ('classification report')
print (classification_report(test_y2, classifier3.predict(test_vecs_w2v)))
pickle.dump(classifier3,open(saveClassifierPath+'SVC_'+modelName, 'wb'))

In [None]:
roc_auc_out3 = bm.Roc_Curve_MultiClass(test_vecs_w2v, test_y2, classifier3, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(train_vecs_w2v, train_y2, test_vecs_w2v, test_y2, classifier3, le)

#### 분류모델 : XGBoost

In [None]:
%%time
dtrain = xgb.DMatrix(train_vecs_w2v, y_train)
dvalid = xgb.DMatrix(test_vecs_w2v, y_test)

In [None]:
%%time
max_depth = 5
subsample = 0.7
colsample_bytree = 0.7
params = {
    'objective' : 'multi:softmax', 
    'booster' : 'gbtree',
    'max_depth' : max_depth, 
    'subsample' : subsample,
    #'eval_metric' : 'auc', 
    'eval_metric' : 'mlogloss',
    'colsample_bytree' : colsample_bytree,
    'silent' : 1, 
    'eta' : 0.175,
    'nthread' : cores,
    'num_class' : 8
}
num_boost_round = 200
early_stopping_rounds = 10
test_size = 0.15

watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
gbm = xgb.train(params, dtrain, num_boost_round, evals = watchlist,
                early_stopping_rounds = early_stopping_rounds, verbose_eval = True)

test_prediction = gbm.predict(xgb.DMatrix(test_vecs_w2v))
test_acc = accuracy_score(y_test, test_prediction)
print (test_acc)
gbm.save_model(saveClassifierPath+'XGBoost_'+modelName)

In [None]:
roc_auc_out4 = bm.Roc_Curve_MultiClass(xgb.DMatrix(test_vecs_w2v), test_y2, gbm, le, np.unique(train_y2))

In [None]:
bm.ConfusionMatrix_To_Heatmap(xgb.DMatrix(train_vecs_w2v), train_y2, xgb.DMatrix(test_vecs_w2v), test_y2, gbm, le)

In [None]:
del classifier
del classifier2
del classifier3
del gbm

In [None]:
bm.Plot_Roc_Curver_Micro_Macro(roc_auc_out1, roc_auc_out2, roc_auc_out3, roc_auc_out4)

#### 분류모델  : Neural Network

In [None]:
y_test2 = np_utils.to_categorical(y_test,8)
y_train2 = np_utils.to_categorical(y_train,8)

In [None]:
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=500))
model.add(Dropout(0.3))
model.add(Dense(32, activation = 'sigmoid', kernel_regularizer=l2(0.1)))
model.add(Dropout(0.15))
model.add(Dense(8, activation='softmax'))
model.compile(optimizer='adadelta',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_vecs_w2v, y_train2, epochs=300,  verbose=0,
          validation_split=0.2,
          callbacks=[TQDMNotebookCallback(show_inner=False)])
score, acc = model.evaluate(test_vecs_w2v, y_test2, verbose=0)
print('Score: %1.4f' % score)
print('Accuracy: %1.4f' % acc)
model.save(saveClassifierPath+'NeuralNetwork_1_'+modelName)

In [None]:
bm.plot_history(history)

In [None]:
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=500))
model.add(Dense(64, activation='sigmoid', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.15))
model.add(Dense(32, activation = 'relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.15))
model.add(Dense(8, activation='softmax'))
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
history = model.fit(train_vecs_w2v, y_train2, epochs=300,  verbose=0,
          validation_split=0.2,
          callbacks=[TQDMNotebookCallback(show_inner=False)])
score, acc = model.evaluate(test_vecs_w2v, y_test2, verbose=0)
print('Score: %1.4f' % score)
print('Accuracy: %1.4f' % acc)
model.save(saveClassifierPath+'NeuralNetwork_2_'+modelName)

In [None]:
bm.plot_history(history)