# 수집된 뉴스 기사 및 댓글에 대한 감정 분석
## * Word2Vec
* 데이터 
> 2017년 12월 1일부터 2018년 2월 1일까지 63일간 [네이버](http://www.naver.com)와 [다음](http://www.daum.net)의 랭킹뉴스와 뉴스의 댓글을 크롤링함.

In [None]:
import pickle
import html
import multiprocessing
from collections import namedtuple, OrderedDict
import re
import sys
import os
from glob import glob
from numba import jit
import warnings

os.environ['KERAS_BACKEND']='tensorflow'

import numpy as np
from tqdm import tqdm
tqdm.pandas(desc="progress-bar")
import pandas as pd

from gensim.models import Word2Vec, KeyedVectors
from gensim.models.doc2vec import TaggedDocument

from konlpy.utils import pprint

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, roc_curve,  accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import scale, MinMaxScaler, LabelEncoder
from sklearn.manifold import TSNE

import keras.backend.tensorflow_backend as K
from keras.preprocessing import sequence
from keras_tqdm import TQDMCallback, TQDMNotebookCallback
from keras.models import Model, Sequential
from keras.regularizers import l2
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.layers import Input, Flatten, Dense, Embedding, embeddings, merge, Dropout, Activation,  LSTM, Bidirectional, SimpleRNN, GRU
from keras.layers.convolutional import Conv1D, Conv2D
from keras.layers.pooling import MaxPooling1D, GlobalMaxPooling1D
from keras.layers.normalization import BatchNormalization
from keras.layers.core import SpatialDropout1D
from keras.utils import np_utils
from tensorflow.python.client import device_lib
from keras.layers.merge import dot

import xgboost as xgb

import matplotlib.pyplot as plt

import bokeh.plotting as bp
from bokeh.models import HoverTool, BoxSelectTool
from bokeh.plotting import figure, show, output_notebook

In [None]:
import Database_Handler as dh
import Basic_Module as bm

In [None]:
from ckonlpy.tag import Twitter
from konlpy.tag import Mecab
ct = Twitter()
mecab = Mecab()

## Stopwords

In [None]:
stopwords = open('./data/stopwordsList.txt',encoding='utf-8').readlines()
stopwords = list(map(lambda x: x.strip(), stopwords))

## TaggedDocument

In [None]:
TaggedDocument = namedtuple('TaggedDocument', 'words tags sentiment')

## Load Data

#### Path

In [None]:
if sys.platform =='darwin':
    loadModelPath = '/Volumes/disk1/model/'
    classifierPath = '/Volumes/disk1/data/pre_data/classifier/'
    news_senti_outcome = '/Volumes/disk1/outcome_for_News_sentiment_analysis/'
    daumCommentsPath = '/Volumes/disk1/data/daum_Comments/'
    naverCommentsPath = '/Volumes/disk1/data/naver_Comments/'
    outcomeDaumCommentsPath = '/Volumes/disk1/outcome_comments_for_daum/'
    outcomeNaverCommentsPath = '/Volumes/disk1/outcome_comments_for_naver/'
    outcome_predata = '/Volumes/disk1/pre_data_for_comments/'
    outcome_tagged_data = '/Volumes/disk1/pre_data_for_comments2/'
    outcome_vectorized_data = '/Volumes/disk1/pre_data_for_comments3/'
elif sys.platform =='win32':
    loadModelPath = 'd:/model/'
    classifierPath = 'd:/data/pre_data/classifier/'
    newsPath = './data/pre_data/news_sentiment/'
    news_senti_outcome = './outcome_for_News_sentiment_analysis/'
    daumCommentsPath = 'd:/data/daum_Comments/'
    naverCommentsPath = 'd:/data/naver_Comments/'
    outcomeDaumCommentsPath = 'd:/outcome_comments_for_daum/'
    outcomeNaverCommentsPath = 'd:/outcome_comments_for_naver/'
    outcome_predata = 'd:/pre_data_for_comments/'
    outcome_tagged_data = 'd:/pre_data_for_comments2/'
    outcome_vectorized_data = 'd:/pre_data_for_comments3/'

#### News

In [None]:
os.listdir(news_senti_outcome)

In [None]:
# Naver
naverData = pd.read_csv(os.path.join(news_senti_outcome, 'naver_news_sentiment_analysis.csv'), index_col=0, header= 0, encoding = 'utf-8')
naverData['site'] = ['Naver'] * naverData.shape[0]
reNaverData = naverData[naverData.number_of_crawled_comment != 0]
print (reNaverData.shape)
reNaverData.head()

In [None]:
# Daum
daumData = pd.read_csv(os.path.join(news_senti_outcome, 'daum_news_sentiment_analysis.csv'), index_col=0, header= 0, encoding = 'utf-8')
daumData['site'] = ['daum'] * daumData.shape[0]
reDaumData = daumData[daumData.number_of_crawled_comment != 0]
print (reDaumData.shape)
reDaumData.head()

### 댓글

In [None]:
os.listdir(outcome_predata)

In [None]:
%%time
predata_naver = outcome_predata +  'filtered_predata_for_naver_news_comment.csv'
dfNaver = pd.read_csv(predata_naver, header = 0, index_col = 0, encoding = 'utf-8')
print (dfNaver.shape)

In [None]:
#dfNaver = dfNaver[dfNaver._id == '5a29c445588c132954d1973a']

In [None]:
extDfNaver = dfNaver.loc[:,['_id', 'category', 'date', 'rank', 'site', '공감', '비공감']]

In [None]:
%%time
predata_daum = outcome_predata +  'filtered_predata_for_daum_news_comment.csv'
dfDaum = pd.read_csv(predata_daum, header = 0, index_col = 0, encoding = 'utf-8')
print (dfDaum.shape)

In [None]:
#dfDaum = dfDaum[dfDaum._id =='5a2a61bf588c13481c229d1e']

In [None]:
extDfDaum = dfDaum.loc[:,['_id', 'category', 'date', 'rank', 'site', '공감', '비공감']]

## Word2Vec Model

### Twitter

#### News to tagged Document

In [None]:
%%time
tagged_by_ct_daum_file = outcome_tagged_data+'word2vec_tagged_data_by_ct_for_daum_news_comment.pickled'
if os.path.isfile(tagged_by_ct_daum_file):
    tagged_daum_ct = pickle.load(open(tagged_by_ct_daum_file, 'rb'))
else:
    tagged_daum_ct = bm.MakeTaggedData_For_Comments(dfDaum, TaggedDocument, ct, stopwords)
    pickle.dump(tagged_daum_ct, open(tagged_by_ct_daum_file, 'wb'))

tagged_by_ct_naver_file = outcome_tagged_data+'word2vec_tagged_data_by_ct_for_naver_news_comment.pickled' 
if os.path.isfile(tagged_by_ct_naver_file):
    tagged_naver_ct = pickle.load(open(tagged_by_ct_naver_file, 'rb'))
else:
    tagged_naver_ct = bm.MakeTaggedData_For_Comments(dfNaver, TaggedDocument, ct, stopwords)
    pickle.dump(tagged_naver_ct, open(tagged_by_ct_naver_file, 'wb'))

#### Train data set으로부터 TF-IDF Vectorizer을 만듦

In [None]:
trainName_ct = './data/pre_data/train_test_Data/pre_data_train_for_word2vec_sentiment_by_ct.pickled'
train_ct = pickle.load(open(trainName_ct, 'rb'))
tfidf_ct = bm.Build_tfidf(train_ct)
del train_ct

#### Model 1

##### Load Model

In [None]:
taggerName_ct = 'ct'
print ( '{}'.format(taggerName_ct))
model1_ct = Word2Vec.load(loadModelPath+'word2vec_size-1000_epoch-20_window-10_negative-7_hs-0_sg-0_cbow_mean-0_min_count-2_by-ct.model')
model1_ct_Name = bm.Return_ModelName('word2vec', model1_ct, taggerName_ct)

##### Vectorization

In [None]:
if os.path.isfile(outcome_vectorized_data+model1_ct_Name+'-daum'):
    daum_vecs_by_model1 = pickle.load(open(outcome_vectorized_data+model1_ct_Name+'-daum', 'rb'))
else:
    wv1, daum_vecs_by_model1 = bm.Make_Pre_Data_For_DAUM(model1_ct, tfidf_ct, 1000, tagged_daum_ct)
    pickle.dump(daum_vecs_by_model1, open(outcome_vectorized_data+model1_ct_Name+'-daum', 'wb'))
    del wv1#, daum_vecs_by_model1

In [None]:
if os.path.isfile(outcome_vectorized_data+model1_ct_Name+'-naver'):
    naver_vecs_by_model1 = pickle.load(open(outcome_vectorized_data+model1_ct_Name+'-naver', 'rb'))
else:
    wv1, naver_vecs_by_model1 = bm.Make_Pre_Data_For_DAUM(model1_ct, tfidf_ct, 1000, tagged_naver_ct)
    pickle.dump(naver_vecs_by_model1, open(outcome_vectorized_data+model1_ct_Name+'-naver', 'wb'))
    del wv1#, naver_vecs_by_model1

##### Load Classifier

In [None]:
classifier_by_model1 = glob(classifierPath+'*'+model1_ct_Name)
load_Classifier_by_model1_Dict = dict(map(lambda x:bm.LoadClassifier(x), classifier_by_model1))

In [None]:
%%time
warnings.filterwarnings('ignore')
predict_Outcome_daum = dict(map(lambda x: bm.PredictSentiment(daum_vecs_by_model1, x, load_Classifier_by_model1_Dict[x]), load_Classifier_by_model1_Dict))
predict_Outcome_daum = pd.DataFrame.from_dict(predict_Outcome_daum)
predict_Outcome_daum = extDfDaum.merge(predict_Outcome_daum,
                                   left_index = True, right_index = True)
predict_Outcome_daum.to_csv(outcomeDaumCommentsPath+'outcome_comments_sentiment_daum_'+model1_ct_Name,index=None, encoding='utf-8')

In [None]:
%%time
warnings.filterwarnings('ignore')
predict_Outcome_naver = dict(map(lambda x: bm.PredictSentiment(naver_vecs_by_model1, x, load_Classifier_by_model1_Dict[x]), load_Classifier_by_model1_Dict))
predict_Outcome_naver = pd.DataFrame.from_dict(predict_Outcome_naver)
predict_Outcome_naver = extDfNaver.merge(predict_Outcome_naver,
                                   left_index = True, right_index = True)
predict_Outcome_naver.to_csv(outcomeNaverCommentsPath+'outcome_comments_sentiment_naver_'+model1_ct_Name,index=None, encoding='utf-8')

In [None]:
del model1_ct
del model1_ct_Name
del classifier_by_model1
del load_Classifier_by_model1_Dict
del daum_vecs_by_model1
del naver_vecs_by_model1
del predict_Outcome_naver
del predict_Outcome_daum

#### Model 2

##### Load Model

In [None]:
taggerName_ct = 'ct'
print ( '{}'.format(taggerName_ct))
model2_ct = Word2Vec.load(loadModelPath+'word2vec_size-1000_epoch-20_window-10_negative-7_hs-0_sg-0_cbow_mean-1_min_count-2_by-ct.model')
model2_ct_Name = bm.Return_ModelName('word2vec', model2_ct, taggerName_ct)

##### Vectorization

In [None]:
if os.path.isfile(outcome_vectorized_data+model2_ct_Name+'-daum'):
    daum_vecs_by_model2 = pickle.load(open(outcome_vectorized_data+model2_ct_Name+'-daum', 'rb'))
else:
    wv1, daum_vecs_by_model2 = bm.Make_Pre_Data_For_DAUM(model2_ct, tfidf_ct, 1000, tagged_daum_ct)
    pickle.dump(daum_vecs_by_model2, open(outcome_vectorized_data+model2_ct_Name+'-daum', 'wb'))
    del wv1#, daum_vecs_by_model2

In [None]:
if os.path.isfile(outcome_vectorized_data+model2_ct_Name+'-naver'):
    naver_vecs_by_model2 = pickle.load(open(outcome_vectorized_data+model2_ct_Name+'-naver', 'rb'))
else:
    wv1, naver_vecs_by_model2 = bm.Make_Pre_Data_For_DAUM(model2_ct, tfidf_ct, 1000, tagged_naver_ct)
    pickle.dump(naver_vecs_by_model2, open(outcome_vectorized_data+model2_ct_Name+'-naver', 'wb'))
    del wv1#, naver_vecs_by_model2

##### Load Classifier

In [None]:
classifier_by_model2 = glob(classifierPath+'*'+model2_ct_Name)
load_Classifier_by_model2_Dict = dict(map(lambda x:bm.LoadClassifier(x), classifier_by_model2))

In [None]:
%%time
warnings.filterwarnings('ignore')
predict_Outcome_daum = dict(map(lambda x: bm.PredictSentiment(daum_vecs_by_model2, x, load_Classifier_by_model2_Dict[x]), load_Classifier_by_model2_Dict))
predict_Outcome_daum = pd.DataFrame.from_dict(predict_Outcome_daum)
predict_Outcome_daum = extDfDaum.merge(predict_Outcome_daum,
                                   left_index = True, right_index = True)
predict_Outcome_daum.to_csv(outcomeDaumCommentsPath+'outcome_comments_sentiment_daum_'+model2_ct_Name,index=None, encoding='utf-8')

In [None]:
%%time
warnings.filterwarnings('ignore')
predict_Outcome_naver = dict(map(lambda x: bm.PredictSentiment(naver_vecs_by_model2, x, load_Classifier_by_model2_Dict[x]), load_Classifier_by_model2_Dict))
predict_Outcome_naver = pd.DataFrame.from_dict(predict_Outcome_naver)
predict_Outcome_naver = extDfNaver.merge(predict_Outcome_naver,
                                   left_index = True, right_index = True)
predict_Outcome_naver.to_csv(outcomeNaverCommentsPath+'outcome_comments_sentiment_naver_'+model2_ct_Name,index=None, encoding='utf-8')

In [None]:
del model2_ct
del model2_ct_Name
del classifier_by_model2
del load_Classifier_by_model2_Dict
del daum_vecs_by_model2
del naver_vecs_by_model2
del predict_Outcome_naver
del predict_Outcome_daum

#### Model 3

##### Load Model

In [None]:
taggerName_ct = 'ct'
print ( '{}'.format(taggerName_ct))
model3_ct = Word2Vec.load(loadModelPath+'word2vec_size-1000_epoch-20_window-10_negative-7_hs-0_sg-1_cbow_mean-0_min_count-2_by-ct.model')
model3_ct_Name = bm.Return_ModelName('word2vec', model3_ct, taggerName_ct)

##### Vectorization

In [None]:
if os.path.isfile(outcome_vectorized_data+model3_ct_Name+'-daum'):
    daum_vecs_by_model3 = pickle.load(open(outcome_vectorized_data+model3_ct_Name+'-daum', 'rb'))
else:
    wv1, daum_vecs_by_model3 = bm.Make_Pre_Data_For_DAUM(model3_ct, tfidf_ct, 1000, tagged_daum_ct)
    pickle.dump(daum_vecs_by_model3, open(outcome_vectorized_data+model3_ct_Name+'-daum', 'wb'))
    del wv1#, daum_vecs_by_model3

In [None]:
if os.path.isfile(outcome_vectorized_data+model3_ct_Name+'-naver'):
    naver_vecs_by_model3 = pickle.load(open(outcome_vectorized_data+model3_ct_Name+'-naver', 'rb'))
else:
    wv1, naver_vecs_by_model3 = bm.Make_Pre_Data_For_DAUM(model3_ct, tfidf_ct, 1000, tagged_naver_ct)
    pickle.dump(naver_vecs_by_model3, open(outcome_vectorized_data+model3_ct_Name+'-naver', 'wb'))
    del wv1#, naver_vecs_by_model3

##### Load Classifier

In [None]:
classifier_by_model3 = glob(classifierPath+'*'+model3_ct_Name)
load_Classifier_by_model3_Dict = dict(map(lambda x:bm.LoadClassifier(x), classifier_by_model3))

In [None]:
%%time
warnings.filterwarnings('ignore')
predict_Outcome_daum = dict(map(lambda x: bm.PredictSentiment(daum_vecs_by_model3, x, load_Classifier_by_model3_Dict[x]), load_Classifier_by_model3_Dict))
predict_Outcome_daum = pd.DataFrame.from_dict(predict_Outcome_daum)
predict_Outcome_daum = extDfDaum.merge(predict_Outcome_daum,
                                   left_index = True, right_index = True)
predict_Outcome_daum.to_csv(outcomeDaumCommentsPath+'outcome_comments_sentiment_daum_'+model3_ct_Name,index=None, encoding='utf-8')

In [None]:
%%time
warnings.filterwarnings('ignore')
predict_Outcome_naver = dict(map(lambda x: bm.PredictSentiment(naver_vecs_by_model3, x, load_Classifier_by_model3_Dict[x]), load_Classifier_by_model3_Dict))
predict_Outcome_naver = pd.DataFrame.from_dict(predict_Outcome_naver)
predict_Outcome_naver = extDfNaver.merge(predict_Outcome_naver,
                                   left_index = True, right_index = True)
predict_Outcome_naver.to_csv(outcomeNaverCommentsPath+'outcome_comments_sentiment_naver_'+model3_ct_Name,index=None, encoding='utf-8')

In [None]:
del model3_ct
del model3_ct_Name
del classifier_by_model3
del load_Classifier_by_model3_Dict
del daum_vecs_by_model3
del naver_vecs_by_model3
del predict_Outcome_naver
del predict_Outcome_daum

### Mecab

#### News to tagged Document

In [None]:
%%time
tagged_by_mecab_daum_file = outcome_tagged_data+'word2vec_tagged_data_by_mecab_for_daum_news_comment.pickled'
if os.path.isfile(tagged_by_mecab_daum_file):
    tagged_daum_mecab = pickle.load(open(tagged_by_mecab_daum_file, 'rb'))
else:
    tagged_daum_mecab = bm.MakeTaggedData_For_Comments(dfDaum, TaggedDocument, mecab, stopwords)
    pickle.dump(tagged_daum_mecab, open(tagged_by_mecab_daum_file, 'wb'))

tagged_by_mecab_naver_file = outcome_tagged_data+'word2vec_tagged_data_by_mecab_for_naver_news_comment.pickled' 
if os.path.isfile(tagged_by_mecab_naver_file):
    tagged_naver_mecab = pickle.load(open(tagged_by_mecab_naver_file, 'rb'))
else:
    tagged_naver_mecab = bm.MakeTaggedData_For_Comments(dfNaver, TaggedDocument, mecab, stopwords)
    pickle.dump(tagged_naver_mecab, open(tagged_by_mecab_naver_file, 'wb'))

#### Train data set으로부터 TF-IDF Vectorizer을 만듦

In [None]:
trainName = './data/pre_data/train_test_Data/pre_data_train_for_word2vec_sentiment_by_mecab.pickled'
train_mecab = pickle.load(open(trainName, 'rb'))
tfidf_mecab = bm.Build_tfidf(train_mecab)
del train_mecab

#### Model 1

##### Load Model

In [None]:
taggerName_mecab = 'mecab'
print ( '{}'.format(taggerName_mecab))
model1_mecab = Word2Vec.load(loadModelPath+'word2vec_size-1000_epoch-20_window-10_negative-7_hs-0_sg-0_cbow_mean-0_min_count-2_by-mecab.model')
model1_mecab_Name = bm.Return_ModelName('word2vec', model1_mecab, taggerName_mecab)

##### Vectorization

In [None]:
if os.path.isfile(outcome_vectorized_data+model1_mecab_Name+'-daum'):
    daum_vecs_by_model1 = pickle.load(open(outcome_vectorized_data+model1_mecab_Name+'-daum', 'rb'))
else:
    wv1, daum_vecs_by_model1 = bm.Make_Pre_Data_For_DAUM(model1_mecab, tfidf_mecab, 1000, tagged_daum_mecab)
    pickle.dump(daum_vecs_by_model1, open(outcome_vectorized_data+model1_mecab_Name+'-daum', 'wb'))
    del wv1#, daum_vecs_by_model1

In [None]:
if os.path.isfile(outcome_vectorized_data+model1_mecab_Name+'-naver'):
    naver_vecs_by_model1 = pickle.load(open(outcome_vectorized_data+model1_mecab_Name+'-naver', 'rb'))
else:
    wv1, naver_vecs_by_model1 = bm.Make_Pre_Data_For_DAUM(model1_mecab, tfidf_mecab, 1000, tagged_naver_mecab)
    pickle.dump(naver_vecs_by_model1, open(outcome_vectorized_data+model1_mecab_Name+'-naver', 'wb'))
    del wv1#, naver_vecs_by_model1

##### Load Classifier

In [None]:
classifier_by_model1 = glob(classifierPath+'*'+model1_mecab_Name)
load_Classifier_by_model1_Dict = dict(map(lambda x:bm.LoadClassifier(x), classifier_by_model1))

In [None]:
%%time
warnings.filterwarnings('ignore')
predict_Outcome_daum = dict(map(lambda x: bm.PredictSentiment(daum_vecs_by_model1, x, load_Classifier_by_model1_Dict[x]), load_Classifier_by_model1_Dict))
predict_Outcome_daum = pd.DataFrame.from_dict(predict_Outcome_daum)
predict_Outcome_daum = extDfDaum.merge(predict_Outcome_daum,
                                   left_index = True, right_index = True)
predict_Outcome_daum.to_csv(outcomeDaumCommentsPath+'outcome_comments_sentiment_daum_'+model1_mecab_Name,index=None, encoding='utf-8')

In [None]:
%%time
warnings.filterwarnings('ignore')
predict_Outcome_naver = dict(map(lambda x: bm.PredictSentiment(naver_vecs_by_model1, x, load_Classifier_by_model1_Dict[x]), load_Classifier_by_model1_Dict))
predict_Outcome_naver = pd.DataFrame.from_dict(predict_Outcome_naver)
predict_Outcome_naver = extDfNaver.merge(predict_Outcome_naver,
                                   left_index = True, right_index = True)
predict_Outcome_naver.to_csv(outcomeNaverCommentsPath+'outcome_comments_sentiment_naver_'+model1_mecab_Name,index=None, encoding='utf-8')

In [None]:
del model1_mecab
del model1_mecab_Name
del classifier_by_model1
del load_Classifier_by_model1_Dict
del daum_vecs_by_model1
del naver_vecs_by_model1
del predict_Outcome_naver
del predict_Outcome_daum

#### Model 2

##### Load Model

In [None]:
taggerName_mecab = 'mecab'
print ( '{}'.format(taggerName_mecab))
model2_mecab = Word2Vec.load(loadModelPath+'word2vec_size-1000_epoch-20_window-10_negative-7_hs-0_sg-0_cbow_mean-1_min_count-2_by-mecab.model')
model2_mecab_Name = bm.Return_ModelName('word2vec', model2_mecab, taggerName_mecab)

##### Vectorization

In [None]:
if os.path.isfile(outcome_vectorized_data+model2_mecab_Name+'-daum'):
    daum_vecs_by_model2 = pickle.load(open(outcome_vectorized_data+model2_mecab_Name+'-daum', 'rb'))
else:
    wv1, daum_vecs_by_model2 = bm.Make_Pre_Data_For_DAUM(model2_mecab, tfidf_mecab, 1000, tagged_daum_mecab)
    pickle.dump(daum_vecs_by_model2, open(outcome_vectorized_data+model2_mecab_Name+'-daum', 'wb'))
    del wv1#, daum_vecs_by_model2

In [None]:
if os.path.isfile(outcome_vectorized_data+model2_mecab_Name+'-naver'):
    naver_vecs_by_model2 = pickle.load(open(outcome_vectorized_data+model2_mecab_Name+'-naver', 'rb'))
else:
    wv1, naver_vecs_by_model2 = bm.Make_Pre_Data_For_DAUM(model2_mecab, tfidf_mecab, 1000, tagged_naver_mecab)
    pickle.dump(naver_vecs_by_model2, open(outcome_vectorized_data+model2_mecab_Name+'-naver', 'wb'))
    del wv1#, naver_vecs_by_model2

##### Load Classifier

In [None]:
classifier_by_model2 = glob(classifierPath+'*'+model2_mecab_Name)
load_Classifier_by_model2_Dict = dict(map(lambda x:bm.LoadClassifier(x), classifier_by_model2))

In [None]:
%%time
warnings.filterwarnings('ignore')
predict_Outcome_daum = dict(map(lambda x: bm.PredictSentiment(daum_vecs_by_model2, x, load_Classifier_by_model2_Dict[x]), load_Classifier_by_model2_Dict))
predict_Outcome_daum = pd.DataFrame.from_dict(predict_Outcome_daum)
predict_Outcome_daum = extDfDaum.merge(predict_Outcome_daum,
                                   left_index = True, right_index = True)
predict_Outcome_daum.to_csv(outcomeDaumCommentsPath+'outcome_comments_sentiment_daum_'+model2_mecab_Name,index=None, encoding='utf-8')

In [None]:
%%time
warnings.filterwarnings('ignore')
predict_Outcome_naver = dict(map(lambda x: bm.PredictSentiment(naver_vecs_by_model2, x, load_Classifier_by_model2_Dict[x]), load_Classifier_by_model2_Dict))
predict_Outcome_naver = pd.DataFrame.from_dict(predict_Outcome_naver)
predict_Outcome_naver = extDfNaver.merge(predict_Outcome_naver,
                                   left_index = True, right_index = True)
predict_Outcome_naver.to_csv(outcomeNaverCommentsPath+'outcome_comments_sentiment_naver_'+model2_mecab_Name,index=None, encoding='utf-8')

In [None]:
del model2_mecab
del model2_mecab_Name
del classifier_by_model2
del load_Classifier_by_model2_Dict
del daum_vecs_by_model2
del naver_vecs_by_model2
del predict_Outcome_naver
del predict_Outcome_daum

#### Model 3

##### Load Model

In [None]:
taggerName_mecab = 'mecab'
print ( '{}'.format(taggerName_mecab))
model3_mecab = Word2Vec.load(loadModelPath+'word2vec_size-1000_epoch-20_window-10_negative-7_hs-0_sg-1_cbow_mean-0_min_count-2_by-mecab.model')
model3_mecab_Name = bm.Return_ModelName('word2vec', model3_mecab, taggerName_mecab)

##### Vectorization

In [None]:
if os.path.isfile(outcome_vectorized_data+model3_mecab_Name+'-daum'):
    daum_vecs_by_model3 = pickle.load(open(outcome_vectorized_data+model3_mecab_Name+'-daum', 'rb'))
else:
    wv1, daum_vecs_by_model3 = bm.Make_Pre_Data_For_DAUM(model3_mecab, tfidf_mecab, 1000, tagged_daum_mecab)
    pickle.dump(daum_vecs_by_model3, open(outcome_vectorized_data+model3_mecab_Name+'-daum', 'wb'))
    del wv1#, daum_vecs_by_model3

In [None]:
if os.path.isfile(outcome_vectorized_data+model3_mecab_Name+'-naver'):
    naver_vecs_by_model3 = pickle.load(open(outcome_vectorized_data+model3_mecab_Name+'-naver', 'rb'))
else:
    wv1, naver_vecs_by_model3 = bm.Make_Pre_Data_For_DAUM(model3_mecab, tfidf_mecab, 1000, tagged_naver_mecab)
    pickle.dump(naver_vecs_by_model3, open(outcome_vectorized_data+model3_mecab_Name+'-naver', 'wb'))
    del wv1#, naver_vecs_by_model3

##### Load Classifier

In [None]:
classifier_by_model3 = glob(classifierPath+'*'+model3_mecab_Name)
load_Classifier_by_model3_Dict = dict(map(lambda x:bm.LoadClassifier(x), classifier_by_model3))

In [None]:
%%time
warnings.filterwarnings('ignore')
predict_Outcome_daum = dict(map(lambda x: bm.PredictSentiment(daum_vecs_by_model3, x, load_Classifier_by_model3_Dict[x]), load_Classifier_by_model3_Dict))
predict_Outcome_daum = pd.DataFrame.from_dict(predict_Outcome_daum)
predict_Outcome_daum = extDfDaum.merge(predict_Outcome_daum,
                                   left_index = True, right_index = True)
predict_Outcome_daum.to_csv(outcomeDaumCommentsPath+'outcome_comments_sentiment_daum_'+model3_mecab_Name,index=None, encoding='utf-8')

In [None]:
%%time
warnings.filterwarnings('ignore')
predict_Outcome_naver = dict(map(lambda x: bm.PredictSentiment(naver_vecs_by_model3, x, load_Classifier_by_model3_Dict[x]), load_Classifier_by_model3_Dict))
predict_Outcome_naver = pd.DataFrame.from_dict(predict_Outcome_naver)
predict_Outcome_naver = extDfNaver.merge(predict_Outcome_naver,
                                   left_index = True, right_index = True)
predict_Outcome_naver.to_csv(outcomeNaverCommentsPath+'outcome_comments_sentiment_naver_'+model3_mecab_Name,index=None, encoding='utf-8')

In [None]:
del model3_mecab
del model3_mecab_Name
del classifier_by_model3
del load_Classifier_by_model3_Dict
del daum_vecs_by_model3
del naver_vecs_by_model3
del predict_Outcome_naver
del predict_Outcome_daum