# News Classification Using Doc2Vec
> * 네이버의 뉴스 기사를 이용하여 모델을 만들고 평가를 실시한뒤, 다음의 뉴스 기사를 이용하여 분류해보도록 한다. 

In [13]:
import pickle
import html
import multiprocessing
from collections import namedtuple, OrderedDict
import re
import sys
import os

os.environ['KERAS_BACKEND']='tensorflow'

import numpy as np
from tqdm import tqdm
tqdm.pandas(desc="progress-bar")
import pandas as pd

from gensim.models import doc2vec, KeyedVectors
from gensim.models.doc2vec import TaggedDocument

from konlpy.utils import pprint

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, roc_curve,  accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import scale, MinMaxScaler, LabelEncoder
from sklearn.manifold import TSNE

import keras.backend.tensorflow_backend as K
from keras.preprocessing import sequence
from keras_tqdm import TQDMCallback, TQDMNotebookCallback
from keras.models import Model, Sequential
from keras.regularizers import l2
from keras.optimizers import SGD
from keras.utils import np_utils
from keras.layers import Input, Flatten, Dense, Embedding, embeddings, merge, Dropout, Activation,  LSTM, Bidirectional, SimpleRNN, GRU
from keras.layers.convolutional import Conv1D, Conv2D
from keras.layers.pooling import MaxPooling1D, GlobalMaxPooling1D
from keras.layers.normalization import BatchNormalization
from keras.layers.core import SpatialDropout1D
from keras.utils import np_utils
from tensorflow.python.client import device_lib
from keras.layers.merge import dot

import xgboost as xgb

import matplotlib.pyplot as plt

import bokeh.plotting as bp
from bokeh.models import HoverTool, BoxSelectTool
from bokeh.plotting import figure, show, output_notebook

In [14]:
print (device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 15610050390881519760
]


In [15]:
def Make_Roc_Curve(x, y, model1, model2, model3, model4):
    print ('Logistic Regression')
    fpr1, tpr1, thresholds1 = roc_curve(y, model1.predict(x))
    print ('Random Forest')
    fpr2, tpr2, thresholds2 = roc_curve(y, model2.predict(x))
    print ('Kernel SVM')
    fpr3, tpr3, thresholds3 = roc_curve(y, model3.predict(x))
    print ('XGBoost')
    import xgboost as xgb
    fpr4, tpr4, thresholds4 = roc_curve(y, model4.predict(xgb.DMatrix(x)))
    plt.plot(fpr1, tpr1, label="Logistic Regression")
    plt.plot(fpr2, tpr2, label="RandomForest")
    plt.plot(fpr3, tpr3, label="Kernel SVM")
    plt.plot(fpr4, tpr4, label='XGBoost')
    plt.legend()
    plt.plot([0, 1], [0, 1], 'k--', label="random guess")
    plt.xlabel('False Positive Rate (Fall-Out)')
    plt.ylabel('True Positive Rate (Recall)')
    plt.title('Receiver operating characteristic example')
    plt.show()

In [16]:
def plot_history(history):
    """Plot model history after `fit()`.
    """

    # summarize history for accuracy
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'valid'], loc='upper left')
    plt.show()

    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'valid'], loc='upper left')
    plt.show()

In [17]:
from ckonlpy.tag import Twitter
from konlpy.tag import Mecab
ct = Twitter()
mecab = Mecab()
def nav_tokenizer(tagger, corpus, stopwords):
    pos = tagger.pos(corpus)
    pos = ['/'.join(t) for t in pos if not t[0] in stopwords]
    return pos

In [18]:
def MakeTaggedData(df, taggedDoc, tagger, stopwords, labelEncoder):
    w2v_docs = list()
    for idx in tqdm(df.index):
        text = df.loc[idx,'title']+'.\n'+df.loc[idx,'mainText']
        pos = nav_tokenizer(tagger, text, stopwords)
        category = df.loc[idx, 'category']
        encodeCategory = labelEncoder.transform([category])
        label = ['news_'+str(idx)]
        w2v_docs.append(TaggedDocument(pos, label, encodeCategory))
    return w2v_docs

In [19]:
import multiprocessing
cores = int(multiprocessing.cpu_count())
def Make_Doc2Vec_Model(modelPath, data, size, dm, dm_concat, dm_mean, hs, negative, epoch, window, alpha, min_alpha, workers, tagger):
    from tqdm import tqdm
    tqdm.pandas(desc="progress-bar")
    from datetime import datetime
    from gensim.models import doc2vec
    start = datetime.now()
    modelName = 'doc2vec_size-{}_epoch-{}_window-{}_negative-{}_hs-{}_dm-{}_dm_concat-{}_dm_mean-{}_by-{}.model'.format(
        size, epoch, window, negative, hs, dm, dm_concat, dm_mean, tagger)
    modelName = modelPath+modelName
    print (modelName)
    if window!=None:
        d2v_model = doc2vec.Doc2Vec(vector_size = size, dm = dm, dm_concat = dm_concat,
                   dm_mean = dm_mean, negative = negative, hs = hs, window = window,
                   alpha = alpha, min_alpha = min_alpha, workers = workers, epochs= epoch)
    else:
        d2v_model = doc2vec.Doc2Vec(vector_size = size, dm = dm, dm_concat = dm_concat,
                   dm_mean = dm_mean, negative = negative, hs = hs,
                   alpha = alpha, min_alpha = min_alpha, workers = workers, epochs= epoch)
    d2v_model.build_vocab(tqdm(data))
    d2v_model.train(tqdm(data), total_examples=d2v_model.corpus_count, epochs=d2v_model.iter)
    
    end = datetime.now()
    d2v_model.save(modelName)
    print ("Total running time: ", end-start)
    return d2v_model
print (cores)

4


## Load Data

In [20]:
#Naver
naverData = pickle.load(open('./data/pre_data/stastics/for_statistics_Naver_from_mongodb.pickled','rb'))
naverData = pd.DataFrame.from_dict(naverData, orient = 'index')
naverData.reset_index(inplace = True)
naverData.rename(columns = {'index' : 'id'}, inplace = True)
#Daum
daumData = pickle.load(open('./data/pre_data/stastics/for_statistics_daum_from_mongodb.pickled','rb'))
daumData = pd.DataFrame.from_dict(daumData, orient = 'index')
daumData.reset_index(inplace = True)
daumData.rename(columns = {'index' : 'id'}, inplace = True)

print ('Naver : {}'.format(naverData.shape))
print ('Daum : {}'.format(daumData.shape))

Naver : (15120, 11)
Daum : (9372, 11)


## Stopwords

In [21]:
stopwords = open('./data/stopwordsList.txt',encoding='utf-8').readlines()
stopwords = list(map(lambda x: x.strip(), stopwords))

## document Labeling

In [22]:
TaggedDocument = namedtuple('TaggedDocument', 'words tags category')

> * words : 기사에서 나온 단어들 or keywords
> * tags : 문서 tag
> * classes : category
>> 기사분류가 daum보다 naver에서 더 세분화되어 있기 때문에 네이버의 category 분류를 이용하기로 함

## Category 

In [23]:
if os.path.isfile('./data/pre_data/news_tagged_data/pre_data_category_label_encoder_by_ct_for_doc2vec_news_classification.pickled'):
    le = pickle.load(open('./data/pre_data/news_tagged_data/pre_data_category_label_encoder_by_ct_for_doc2vec_news_classification.pickled','rb'))
else:
    le = LabelEncoder()
    le.fit(naverData['category'])
    pickle.dump(le, open('./data/pre_data/news_tagged_data/pre_data_category_label_encoder_by_ct_for_doc2vec_news_classification.pickled','wb'))
print (le.classes_)

['IT/과학' '경제' '사회' '생활/문화' '세계' '스포츠' '연예' '정치']


## Twitter

### Doc2Vec 기본 포맷으로 변경

In [24]:
if os.path.isfile('./data/pre_data/news_tagged_data/pre_data_by_ct_for_doc2vec_news_classification.pickled'):
    w2v_docs = pickle.load(open('./data/pre_data/news_tagged_data/pre_data_by_ct_for_doc2vec_news_classification.pickled', 'rb'))
else:
    w2v_docs = MakeTaggedData(naverData, TaggedDocument, ct, stopwords, le)
    pickle.dump(w2v_docs, open('./data/pre_data/news_tagged_data/pre_data_by_ct_for_doc2vec_news_classification.pickled', 'wb'))

In [25]:
if 'ct' in locals():
    del ct

### train dataset & test dataset

In [26]:
trainName = './data/pre_data/news_train_test_Data/pre_data_doc2vec_train_for_news_classification_by_ct.pickled'
testName = './data/pre_data/news_train_test_Data/pre_data_doc2vec_test_for_news_classification_by_ct.pickled'

In [27]:
if os.path.isfile(trainName) & os.path.isfile(testName):
    train = pickle.load(open(trainName, 'rb'))
    test = pickle.load(open(testName, 'rb'))
else:
    train, test = train_test_split(w2v_docs, test_size = 0.15)
    pickle.dump(train,open(trainName,'wb'))
    pickle.dump(test,open(testName,'wb'))

In [28]:
if 'w2v_docs' in locals():
    del w2v_docs

### model 1

In [29]:
modelPath = './news_model/'

In [30]:
from konlpy.utils import pprint

In [34]:
%%time
#PV-DM W/
d2v_model = Make_Doc2Vec_Model(modelPath=modelPath, data=train, size = 500, dm = 1, dm_concat = 1,
                   dm_mean = 0, negative = 7, hs = 0, epoch = 20, window = 5,
                   alpha = 0.025, min_alpha = 0.025, workers = cores, tagger = 'ct')



  0%|          | 0/12852 [00:00<?, ?it/s][A[A

  3%|▎         | 407/12852 [00:00<00:03, 4009.54it/s][A[A

./news_model/doc2vec_size-500_epoch-20_window-5_negative-7_hs-0_dm-1_dm_concat-1_dm_mean-0_by-ct.model




  4%|▍         | 530/12852 [00:00<00:08, 1470.52it/s][A[A

  5%|▍         | 630/12852 [00:00<00:15, 783.27it/s] [A[A

  6%|▌         | 707/12852 [00:01<00:20, 604.58it/s][A[A

  6%|▌         | 767/12852 [00:01<00:21, 554.61it/s][A[A

  6%|▋         | 818/12852 [00:01<00:23, 520.54it/s][A[A

  7%|▋         | 862/12852 [00:01<00:24, 498.57it/s][A[A

  7%|▋         | 901/12852 [00:01<00:25, 464.88it/s][A[A

  7%|▋         | 934/12852 [00:02<00:26, 451.27it/s][A[A

  8%|▊         | 965/12852 [00:02<00:26, 441.48it/s][A[A

  8%|▊         | 995/12852 [00:02<00:27, 434.97it/s][A[A

  8%|▊         | 1043/12852 [00:02<00:27, 436.14it/s][A[A

  8%|▊         | 1078/12852 [00:02<00:27, 429.91it/s][A[A

  9%|▊         | 1112/12852 [00:02<00:27, 426.22it/s][A[A

  9%|▉         | 1146/12852 [00:02<00:27, 422.12it/s][A[A

  9%|▉         | 1179/12852 [00:02<00:27, 417.57it/s][A[A

 10%|▉         | 1225/12852 [00:02<00:27, 418.73it/s][A[A

 10%|▉         | 1262/12852 [0

 70%|██████▉   | 8985/12852 [00:17<00:07, 501.58it/s][A[A

 70%|███████   | 9035/12852 [00:18<00:07, 500.10it/s][A[A

 71%|███████   | 9080/12852 [00:18<00:07, 498.13it/s][A[A

 71%|███████▏  | 9158/12852 [00:18<00:07, 499.54it/s][A[A

 72%|███████▏  | 9208/12852 [00:18<00:07, 499.19it/s][A[A

 72%|███████▏  | 9257/12852 [00:18<00:07, 497.86it/s][A[A

 73%|███████▎  | 9322/12852 [00:18<00:07, 498.64it/s][A[A

 73%|███████▎  | 9393/12852 [00:18<00:06, 499.71it/s][A[A

 74%|███████▎  | 9472/12852 [00:18<00:06, 501.17it/s][A[A

 74%|███████▍  | 9535/12852 [00:19<00:06, 500.62it/s][A[A

 75%|███████▍  | 9605/12852 [00:19<00:06, 499.27it/s][A[A

 75%|███████▌  | 9657/12852 [00:19<00:06, 498.92it/s][A[A

 76%|███████▌  | 9722/12852 [00:19<00:06, 499.53it/s][A[A

 76%|███████▌  | 9775/12852 [00:19<00:06, 497.51it/s][A[A

 77%|███████▋  | 9854/12852 [00:19<00:06, 498.98it/s][A[A

 77%|███████▋  | 9909/12852 [00:19<00:05, 498.63it/s][A[A

 78%|███████▊  | 9961/12

 14%|█▍        | 1813/12852 [00:43<04:24, 41.78it/s][A[A

 14%|█▍        | 1847/12852 [00:44<04:23, 41.79it/s][A[A

 15%|█▍        | 1868/12852 [00:45<04:24, 41.45it/s][A[A

 15%|█▍        | 1883/12852 [00:45<04:23, 41.65it/s][A[A

 15%|█▌        | 1928/12852 [00:46<04:20, 41.91it/s][A[A

 15%|█▌        | 1949/12852 [00:46<04:22, 41.59it/s][A[A

 15%|█▌        | 1969/12852 [00:47<04:20, 41.74it/s][A[A

 16%|█▌        | 2011/12852 [00:47<04:18, 42.01it/s][A[A

 16%|█▌        | 2030/12852 [00:48<04:18, 41.83it/s][A[A

 16%|█▌        | 2053/12852 [00:48<04:17, 41.97it/s][A[A

 16%|█▌        | 2070/12852 [00:49<04:16, 42.11it/s][A[A

 16%|█▌        | 2085/12852 [00:49<04:16, 41.97it/s][A[A

 16%|█▋        | 2103/12852 [00:50<04:17, 41.67it/s][A[A

 17%|█▋        | 2125/12852 [00:50<04:16, 41.85it/s][A[A

 17%|█▋        | 2145/12852 [00:51<04:15, 41.90it/s][A[A

 17%|█▋        | 2168/12852 [00:51<04:14, 42.06it/s][A[A

 17%|█▋        | 2183/12852 [00:52<04:16

 37%|███▋      | 4814/12852 [01:58<03:18, 40.47it/s][A[A

 38%|███▊      | 4863/12852 [01:59<03:16, 40.60it/s][A[A

 38%|███▊      | 4886/12852 [02:00<03:16, 40.54it/s][A[A

 38%|███▊      | 4925/12852 [02:00<03:14, 40.83it/s][A[A

 39%|███▊      | 4951/12852 [02:01<03:13, 40.75it/s][A[A

 39%|███▊      | 4969/12852 [02:02<03:13, 40.71it/s][A[A

 39%|███▉      | 5008/12852 [02:02<03:11, 40.95it/s][A[A

 39%|███▉      | 5024/12852 [02:03<03:11, 40.79it/s][A[A

 39%|███▉      | 5043/12852 [02:03<03:11, 40.74it/s][A[A

 40%|███▉      | 5080/12852 [02:04<03:09, 40.96it/s][A[A

 40%|███▉      | 5100/12852 [02:04<03:09, 40.82it/s][A[A

 40%|███▉      | 5113/12852 [02:05<03:09, 40.74it/s][A[A

 40%|████      | 5154/12852 [02:05<03:07, 40.96it/s][A[A

 40%|████      | 5179/12852 [02:06<03:07, 40.91it/s][A[A

 40%|████      | 5198/12852 [02:07<03:07, 40.84it/s][A[A

 41%|████      | 5233/12852 [02:07<03:05, 41.00it/s][A[A

 41%|████      | 5251/12852 [02:08<03:05

 63%|██████▎   | 8033/12852 [03:15<01:57, 41.13it/s][A[A

 63%|██████▎   | 8052/12852 [03:16<01:57, 40.96it/s][A[A

 63%|██████▎   | 8073/12852 [03:16<01:56, 41.04it/s][A[A

 63%|██████▎   | 8111/12852 [03:16<01:55, 41.18it/s][A[A

 63%|██████▎   | 8129/12852 [03:18<01:55, 40.98it/s][A[A

 63%|██████▎   | 8154/12852 [03:18<01:54, 41.08it/s][A[A

 64%|██████▎   | 8165/12852 [03:18<01:54, 41.11it/s][A[A

 64%|██████▎   | 8178/12852 [03:18<01:53, 41.12it/s][A[A

 64%|██████▍   | 8202/12852 [03:20<01:53, 40.93it/s][A[A

 64%|██████▍   | 8242/12852 [03:20<01:52, 41.09it/s][A[A

 64%|██████▍   | 8261/12852 [03:20<01:51, 41.15it/s][A[A

 64%|██████▍   | 8280/12852 [03:22<01:51, 40.93it/s][A[A

 65%|██████▍   | 8299/12852 [03:22<01:51, 40.98it/s][A[A

 65%|██████▍   | 8312/12852 [03:22<01:50, 41.02it/s][A[A

 65%|██████▍   | 8331/12852 [03:22<01:50, 41.07it/s][A[A

 65%|██████▍   | 8351/12852 [03:25<01:50, 40.69it/s][A[A

 65%|██████▌   | 8367/12852 [03:25<01:50

 90%|████████▉ | 11524/12852 [04:39<00:32, 41.20it/s][A[A

 90%|████████▉ | 11542/12852 [04:40<00:31, 41.21it/s][A[A

 90%|█████████ | 11578/12852 [04:40<00:30, 41.30it/s][A[A

 90%|█████████ | 11602/12852 [04:41<00:30, 41.19it/s][A[A

 90%|█████████ | 11620/12852 [04:41<00:29, 41.21it/s][A[A

 91%|█████████ | 11633/12852 [04:42<00:29, 41.24it/s][A[A

 91%|█████████ | 11648/12852 [04:42<00:29, 41.26it/s][A[A

 91%|█████████ | 11666/12852 [04:43<00:28, 41.12it/s][A[A

 91%|█████████ | 11686/12852 [04:44<00:28, 41.14it/s][A[A

 91%|█████████ | 11710/12852 [04:44<00:27, 41.20it/s][A[A

 91%|█████████▏| 11730/12852 [04:44<00:27, 41.23it/s][A[A

 91%|█████████▏| 11752/12852 [04:45<00:26, 41.09it/s][A[A

 92%|█████████▏| 11773/12852 [04:46<00:26, 41.10it/s][A[A

 92%|█████████▏| 11790/12852 [04:46<00:25, 41.12it/s][A[A

 92%|█████████▏| 11797/12852 [04:46<00:25, 41.13it/s][A[A

 92%|█████████▏| 11814/12852 [04:48<00:25, 40.95it/s][A[A

 92%|█████████▏| 11856/1

Total running time:  1:10:18.610310
CPU times: user 1h 17min 13s, sys: 1min 45s, total: 1h 18min 59s
Wall time: 1h 10min 25s


In [35]:
del d2v_model

### model 2

In [36]:
%%time
#PV-DM w/
d2v_model = Make_Doc2Vec_Model(modelPath=modelPath, data=train, size = 500, dm = 1, dm_concat = 0,
                   dm_mean = 1, negative = 7, hs = 0, epoch = 20, window = 10,
                   alpha = 0.025, min_alpha = 0.025, workers = cores, tagger = 'ct')



  0%|          | 0/12852 [00:00<?, ?it/s][A[A

  0%|          | 2/12852 [00:00<11:30, 18.61it/s][A[A

./news_model/doc2vec_size-500_epoch-20_window-10_negative-7_hs-0_dm-1_dm_concat-0_dm_mean-1_by-ct.model




  0%|          | 45/12852 [00:00<00:59, 214.94it/s][A[A

  1%|          | 81/12852 [00:00<00:48, 263.00it/s][A[A

  1%|          | 102/12852 [00:00<00:55, 229.25it/s][A[A

  1%|          | 122/12852 [00:00<00:57, 222.23it/s][A[A

  1%|▏         | 179/12852 [00:00<00:46, 272.55it/s][A[A

  2%|▏         | 223/12852 [00:00<00:43, 287.37it/s][A[A

  2%|▏         | 300/12852 [00:00<00:38, 325.52it/s][A[A

  3%|▎         | 338/12852 [00:01<00:39, 318.78it/s][A[A

  3%|▎         | 397/12852 [00:01<00:36, 341.28it/s][A[A

  3%|▎         | 439/12852 [00:01<00:36, 337.97it/s][A[A

  4%|▎         | 478/12852 [00:01<00:37, 333.90it/s][A[A

  4%|▍         | 514/12852 [00:01<00:37, 329.86it/s][A[A

  5%|▍         | 594/12852 [00:01<00:34, 357.83it/s][A[A

  5%|▌         | 655/12852 [00:01<00:33, 364.59it/s][A[A

  6%|▌         | 731/12852 [00:01<00:31, 385.38it/s][A[A

  6%|▌         | 786/12852 [00:02<00:31, 385.32it/s][A[A

  7%|▋         | 866/12852 [00:02<00:31,

100%|█████████▉| 12829/12852 [00:16<00:00, 797.60it/s][A[A

100%|██████████| 12852/12852 [00:16<00:00, 797.30it/s][A[A



  0%|          | 1/12852 [00:00<38:41,  5.54it/s][A[A

  2%|▏         | 250/12852 [00:00<00:29, 433.77it/s][A[A

  3%|▎         | 323/12852 [00:00<00:33, 375.83it/s][A[A

  3%|▎         | 398/12852 [00:01<00:36, 340.65it/s][A[A

  4%|▎         | 476/12852 [00:01<00:36, 335.06it/s][A[A

  4%|▍         | 556/12852 [00:01<00:38, 315.62it/s][A[A

  5%|▍         | 636/12852 [00:01<00:38, 319.14it/s][A[A

  5%|▌         | 701/12852 [00:02<00:39, 305.24it/s][A[A

  6%|▌         | 781/12852 [00:02<00:40, 299.64it/s][A[A

  7%|▋         | 860/12852 [00:02<00:40, 297.64it/s][A[A

  7%|▋         | 922/12852 [00:03<00:40, 292.20it/s][A[A

  8%|▊         | 979/12852 [00:03<00:39, 300.49it/s][A[A

  8%|▊         | 1014/12852 [00:03<00:39, 296.23it/s][A[A

  8%|▊         | 1056/12852 [00:03<00:39, 296.42it/s][A[A

  8%|▊         | 1087/12852 [00:03<

 44%|████▎     | 5612/12852 [00:22<00:28, 254.70it/s][A[A

 44%|████▍     | 5640/12852 [00:22<00:28, 254.67it/s][A[A

 44%|████▍     | 5680/12852 [00:22<00:28, 254.39it/s][A[A

 44%|████▍     | 5718/12852 [00:22<00:27, 254.91it/s][A[A

 45%|████▍     | 5747/12852 [00:22<00:27, 254.53it/s][A[A

 45%|████▌     | 5788/12852 [00:22<00:27, 255.20it/s][A[A

 45%|████▌     | 5819/12852 [00:22<00:27, 253.20it/s][A[A

 46%|████▌     | 5879/12852 [00:23<00:27, 254.20it/s][A[A

 46%|████▌     | 5909/12852 [00:23<00:27, 253.68it/s][A[A

 47%|████▋     | 5978/12852 [00:23<00:27, 253.93it/s][A[A

 47%|████▋     | 6045/12852 [00:23<00:26, 254.61it/s][A[A

 47%|████▋     | 6074/12852 [00:23<00:26, 254.75it/s][A[A

 48%|████▊     | 6108/12852 [00:23<00:26, 254.69it/s][A[A

 48%|████▊     | 6159/12852 [00:24<00:26, 255.57it/s][A[A

 48%|████▊     | 6194/12852 [00:24<00:26, 255.06it/s][A[A

 49%|████▉     | 6282/12852 [00:24<00:25, 256.36it/s][A[A

 50%|████▉     | 6366/12

 97%|█████████▋| 12450/12852 [00:43<00:01, 287.77it/s][A[A

 97%|█████████▋| 12490/12852 [00:43<00:01, 287.44it/s][A[A

 98%|█████████▊| 12535/12852 [00:43<00:01, 287.06it/s][A[A

 98%|█████████▊| 12602/12852 [00:43<00:00, 287.14it/s][A[A

 98%|█████████▊| 12633/12852 [00:43<00:00, 287.18it/s][A[A

 99%|█████████▊| 12675/12852 [00:44<00:00, 287.24it/s][A[A

 99%|█████████▉| 12732/12852 [00:44<00:00, 287.86it/s][A[A

 99%|█████████▉| 12770/12852 [00:44<00:00, 287.94it/s][A[A

100%|█████████▉| 12806/12852 [00:44<00:00, 288.03it/s][A[A

100%|█████████▉| 12842/12852 [00:44<00:00, 287.97it/s][A[A

100%|██████████| 12852/12852 [00:44<00:00, 287.65it/s][A[A

Total running time:  0:12:55.135201
CPU times: user 22min 44s, sys: 15.3 s, total: 22min 59s
Wall time: 12min 56s


In [37]:
del d2v_model

### model 3

In [38]:
%%time
# PV - DBOW
d2v_model = Make_Doc2Vec_Model(modelPath=modelPath, data=train, size = 500, dm = 0, dm_concat = 0,
                   dm_mean = 0, negative = 7, hs = 0, epoch = 20, window = None,
                   alpha = 0.025, min_alpha = 0.025, workers = cores, tagger = 'ct')

./news_model/doc2vec_size-500_epoch-20_window-None_negative-7_hs-0_dm-0_dm_concat-0_dm_mean-0_by-ct.model




  0%|          | 0/12852 [00:00<?, ?it/s][A[A

  2%|▏         | 230/12852 [00:00<00:05, 2246.97it/s][A[A

  3%|▎         | 382/12852 [00:00<00:06, 1886.68it/s][A[A

  4%|▍         | 527/12852 [00:00<00:07, 1733.27it/s][A[A

  6%|▌         | 728/12852 [00:00<00:06, 1802.69it/s][A[A

  7%|▋         | 871/12852 [00:00<00:06, 1712.21it/s][A[A

  8%|▊         | 1063/12852 [00:00<00:06, 1748.99it/s][A[A

  9%|▉         | 1217/12852 [00:00<00:06, 1714.22it/s][A[A

 11%|█         | 1419/12852 [00:00<00:06, 1749.96it/s][A[A

 12%|█▏        | 1586/12852 [00:00<00:06, 1697.67it/s][A[A

 14%|█▎        | 1744/12852 [00:01<00:06, 1684.46it/s][A[A

 15%|█▍        | 1921/12852 [00:01<00:06, 1685.56it/s][A[A

 16%|█▋        | 2108/12852 [00:01<00:06, 1700.74it/s][A[A

 18%|█▊        | 2338/12852 [00:01<00:06, 1746.22it/s][A[A

 20%|█▉        | 2526/12852 [00:01<00:06, 1720.52it/s][A[A

 21%|██▏       | 2736/12852 [00:01<00:05, 1744.24it/s][A[A

 23%|██▎       | 2922/12

 31%|███       | 3995/12852 [00:09<00:20, 436.22it/s][A[A

 32%|███▏      | 4054/12852 [00:09<00:20, 437.05it/s][A[A

 32%|███▏      | 4099/12852 [00:09<00:20, 435.82it/s][A[A

 32%|███▏      | 4170/12852 [00:09<00:19, 434.90it/s][A[A

 33%|███▎      | 4224/12852 [00:09<00:19, 435.31it/s][A[A

 33%|███▎      | 4267/12852 [00:09<00:19, 434.55it/s][A[A

 34%|███▎      | 4320/12852 [00:09<00:19, 434.51it/s][A[A

 34%|███▍      | 4379/12852 [00:10<00:19, 434.67it/s][A[A

 34%|███▍      | 4422/12852 [00:10<00:19, 432.26it/s][A[A

 35%|███▍      | 4480/12852 [00:10<00:19, 433.73it/s][A[A

 35%|███▌      | 4537/12852 [00:10<00:19, 433.54it/s][A[A

 36%|███▌      | 4581/12852 [00:10<00:19, 431.42it/s][A[A

 36%|███▌      | 4632/12852 [00:10<00:19, 431.47it/s][A[A

 37%|███▋      | 4718/12852 [00:10<00:18, 431.39it/s][A[A

 37%|███▋      | 4797/12852 [00:11<00:18, 431.36it/s][A[A

 38%|███▊      | 4847/12852 [00:11<00:18, 431.57it/s][A[A

 38%|███▊      | 4890/12

 94%|█████████▍| 12143/12852 [00:29<00:01, 415.45it/s][A[A

 95%|█████████▍| 12187/12852 [00:29<00:01, 414.97it/s][A[A

 95%|█████████▌| 12227/12852 [00:29<00:01, 414.25it/s][A[A

 96%|█████████▌| 12281/12852 [00:29<00:01, 414.51it/s][A[A

 96%|█████████▌| 12336/12852 [00:29<00:01, 414.78it/s][A[A

 96%|█████████▋| 12379/12852 [00:29<00:01, 414.35it/s][A[A

 97%|█████████▋| 12430/12852 [00:30<00:01, 414.30it/s][A[A

 97%|█████████▋| 12474/12852 [00:30<00:00, 414.41it/s][A[A

 97%|█████████▋| 12518/12852 [00:30<00:00, 413.94it/s][A[A

 98%|█████████▊| 12557/12852 [00:30<00:00, 413.45it/s][A[A

 98%|█████████▊| 12602/12852 [00:30<00:00, 413.18it/s][A[A

 98%|█████████▊| 12656/12852 [00:30<00:00, 413.19it/s][A[A

 99%|█████████▉| 12693/12852 [00:30<00:00, 413.02it/s][A[A

 99%|█████████▉| 12751/12852 [00:30<00:00, 413.37it/s][A[A

100%|█████████▉| 12793/12852 [00:30<00:00, 413.40it/s][A[A

100%|█████████▉| 12835/12852 [00:31<00:00, 413.18it/s][A[A

100%|███

Total running time:  0:08:57.162400
CPU times: user 15min 19s, sys: 10.6 s, total: 15min 30s
Wall time: 8min 58s


In [39]:
del d2v_model

## Mecab

### Doc2Vec 기본 포맷으로 변경

In [40]:
if os.path.isfile('./data/pre_data/news_tagged_data/pre_data_by_mecab_for_doc2vec_news_classification.pickled'):
    w2v_docs = pickle.load(open('./data/pre_data/news_tagged_data/pre_data_by_mecab_for_doc2vec_news_classification.pickled', 'rb'))
else:
    w2v_docs = MakeTaggedData(naverData, TaggedDocument, mecab, stopwords, le)
    pickle.dump(w2v_docs, open('./data/pre_data/news_tagged_data/pre_data_by_mecab_for_doc2vec_news_classification.pickled', 'wb'))

In [41]:
if 'mecab' in locals():
    del mecab

### train dataset & test dataset

In [42]:
trainName = './data/pre_data/news_train_test_Data/pre_data_doc2vec_train_for_news_classification_by_mecab.pickled'
testName = './data/pre_data/news_train_test_Data/pre_data_doc2vec_test_for_news_classification_by_mecab.pickled'

In [43]:
if os.path.isfile(trainName) & os.path.isfile(testName):
    train = pickle.load(open(trainName, 'rb'))
    test = pickle.load(open(testName, 'rb'))
else:
    train, test = train_test_split(w2v_docs, test_size = 0.15)
    pickle.dump(train,open(trainName,'wb'))
    pickle.dump(test,open(testName,'wb'))

In [44]:
if 'w2v_docs' in locals():
    del w2v_docs

### model 1

In [45]:
modelPath = './news_model/'

In [46]:
from konlpy.utils import pprint

In [47]:
%%time
#PV-DM W/
d2v_model = Make_Doc2Vec_Model(modelPath=modelPath, data=train, size = 500, dm = 1, dm_concat = 1,
                   dm_mean = 0, negative = 7, hs = 0, epoch = 20, window = 5,
                   alpha = 0.025, min_alpha = 0.025, workers = cores, tagger = 'mecab')



  0%|          | 0/12852 [00:00<?, ?it/s][A[A

  1%|          | 130/12852 [00:00<00:10, 1183.57it/s][A[A

./news_model/doc2vec_size-500_epoch-20_window-5_negative-7_hs-0_dm-1_dm_concat-1_dm_mean-0_by-mecab.model




  2%|▏         | 304/12852 [00:00<00:08, 1442.06it/s][A[A

  4%|▎         | 466/12852 [00:00<00:08, 1472.56it/s][A[A

  4%|▍         | 567/12852 [00:00<00:10, 1195.22it/s][A[A

  6%|▌         | 768/12852 [00:00<00:09, 1335.82it/s][A[A

  7%|▋         | 891/12852 [00:00<00:09, 1281.79it/s][A[A

  9%|▉         | 1144/12852 [00:00<00:08, 1440.14it/s][A[A

 10%|█         | 1302/12852 [00:00<00:08, 1368.78it/s][A[A

 12%|█▏        | 1546/12852 [00:01<00:07, 1470.80it/s][A[A

 13%|█▎        | 1718/12852 [00:01<00:07, 1413.24it/s][A[A

 15%|█▍        | 1879/12852 [00:01<00:07, 1424.89it/s][A[A

 16%|█▌        | 2032/12852 [00:01<00:07, 1407.13it/s][A[A

 17%|█▋        | 2231/12852 [00:01<00:07, 1443.47it/s][A[A

 19%|█▊        | 2391/12852 [00:01<00:07, 1447.94it/s][A[A

 20%|█▉        | 2548/12852 [00:01<00:07, 1436.71it/s][A[A

 21%|██        | 2697/12852 [00:01<00:07, 1429.22it/s][A[A

 22%|██▏       | 2842/12852 [00:02<00:07, 1415.81it/s][A[A

 24%|██▎   

 13%|█▎        | 1729/12852 [00:31<03:20, 55.61it/s][A[A

 14%|█▎        | 1747/12852 [00:31<03:18, 55.91it/s][A[A

 14%|█▎        | 1766/12852 [00:31<03:17, 56.03it/s][A[A

 14%|█▍        | 1785/12852 [00:31<03:16, 56.31it/s][A[A

 14%|█▍        | 1806/12852 [00:32<03:17, 55.84it/s][A[A

 14%|█▍        | 1821/12852 [00:32<03:17, 55.89it/s][A[A

 14%|█▍        | 1837/12852 [00:32<03:17, 55.91it/s][A[A

 14%|█▍        | 1858/12852 [00:33<03:15, 56.20it/s][A[A

 15%|█▍        | 1874/12852 [00:33<03:17, 55.67it/s][A[A

 15%|█▍        | 1906/12852 [00:34<03:16, 55.79it/s][A[A

 15%|█▍        | 1927/12852 [00:34<03:14, 56.04it/s][A[A

 15%|█▌        | 1942/12852 [00:34<03:16, 55.57it/s][A[A

 15%|█▌        | 1961/12852 [00:35<03:14, 55.92it/s][A[A

 15%|█▌        | 1977/12852 [00:35<03:15, 55.72it/s][A[A

 16%|█▌        | 1993/12852 [00:35<03:14, 55.74it/s][A[A

 16%|█▌        | 2012/12852 [00:36<03:15, 55.42it/s][A[A

 16%|█▌        | 2049/12852 [00:36<03:13

 35%|███▍      | 4448/12852 [01:20<02:32, 55.12it/s][A[A

 35%|███▍      | 4466/12852 [01:20<02:31, 55.24it/s][A[A

 35%|███▍      | 4482/12852 [01:21<02:31, 55.25it/s][A[A

 35%|███▌      | 4501/12852 [01:21<02:30, 55.30it/s][A[A

 35%|███▌      | 4522/12852 [01:21<02:31, 55.17it/s][A[A

 35%|███▌      | 4542/12852 [01:22<02:30, 55.30it/s][A[A

 35%|███▌      | 4562/12852 [01:22<02:29, 55.34it/s][A[A

 36%|███▌      | 4578/12852 [01:22<02:29, 55.34it/s][A[A

 36%|███▌      | 4601/12852 [01:23<02:29, 55.25it/s][A[A

 36%|███▌      | 4633/12852 [01:23<02:28, 55.35it/s][A[A

 36%|███▌      | 4651/12852 [01:24<02:28, 55.35it/s][A[A

 36%|███▋      | 4667/12852 [01:24<02:28, 55.22it/s][A[A

 37%|███▋      | 4702/12852 [01:25<02:27, 55.28it/s][A[A

 37%|███▋      | 4711/12852 [01:25<02:27, 55.25it/s][A[A

 37%|███▋      | 4726/12852 [01:25<02:27, 55.07it/s][A[A

 37%|███▋      | 4758/12852 [01:26<02:26, 55.11it/s][A[A

 37%|███▋      | 4775/12852 [01:26<02:26

 57%|█████▋    | 7376/12852 [02:13<01:38, 55.40it/s][A[A

 58%|█████▊    | 7397/12852 [02:13<01:38, 55.42it/s][A[A

 58%|█████▊    | 7414/12852 [02:13<01:38, 55.36it/s][A[A

 58%|█████▊    | 7450/12852 [02:14<01:37, 55.41it/s][A[A

 58%|█████▊    | 7469/12852 [02:14<01:37, 55.44it/s][A[A

 58%|█████▊    | 7486/12852 [02:15<01:36, 55.41it/s][A[A

 58%|█████▊    | 7503/12852 [02:15<01:36, 55.49it/s][A[A

 59%|█████▊    | 7522/12852 [02:15<01:35, 55.55it/s][A[A

 59%|█████▊    | 7539/12852 [02:15<01:35, 55.46it/s][A[A

 59%|█████▉    | 7553/12852 [02:16<01:35, 55.38it/s][A[A

 59%|█████▉    | 7588/12852 [02:16<01:34, 55.53it/s][A[A

 59%|█████▉    | 7609/12852 [02:17<01:34, 55.47it/s][A[A

 59%|█████▉    | 7627/12852 [02:17<01:34, 55.43it/s][A[A

 60%|█████▉    | 7657/12852 [02:17<01:33, 55.54it/s][A[A

 60%|█████▉    | 7674/12852 [02:18<01:33, 55.44it/s][A[A

 60%|█████▉    | 7691/12852 [02:18<01:33, 55.37it/s][A[A

 60%|██████    | 7729/12852 [02:19<01:32

 82%|████████▏ | 10532/12852 [03:10<00:41, 55.33it/s][A[A

 82%|████████▏ | 10547/12852 [03:10<00:41, 55.28it/s][A[A

 82%|████████▏ | 10560/12852 [03:10<00:41, 55.29it/s][A[A

 82%|████████▏ | 10571/12852 [03:11<00:41, 55.32it/s][A[A

 82%|████████▏ | 10588/12852 [03:11<00:40, 55.26it/s][A[A

 83%|████████▎ | 10609/12852 [03:11<00:40, 55.28it/s][A[A

 83%|████████▎ | 10625/12852 [03:12<00:40, 55.30it/s][A[A

 83%|████████▎ | 10644/12852 [03:12<00:39, 55.34it/s][A[A

 83%|████████▎ | 10659/12852 [03:12<00:39, 55.26it/s][A[A

 83%|████████▎ | 10677/12852 [03:13<00:39, 55.30it/s][A[A

 83%|████████▎ | 10693/12852 [03:13<00:39, 55.29it/s][A[A

 83%|████████▎ | 10708/12852 [03:13<00:38, 55.32it/s][A[A

 83%|████████▎ | 10726/12852 [03:14<00:38, 55.20it/s][A[A

 84%|████████▎ | 10743/12852 [03:14<00:38, 55.22it/s][A[A

 84%|████████▎ | 10758/12852 [03:14<00:37, 55.22it/s][A[A

 84%|████████▍ | 10780/12852 [03:15<00:37, 55.15it/s][A[A

 84%|████████▍ | 10799/1

Total running time:  1:11:08.990168
CPU times: user 1h 28min 1s, sys: 35.7 s, total: 1h 28min 37s
Wall time: 1h 11min 14s


In [48]:
del d2v_model

### model 2

In [49]:
%%time
#PV-DM w/
d2v_model = Make_Doc2Vec_Model(modelPath=modelPath, data=train, size = 500, dm = 1, dm_concat = 0,
                   dm_mean = 1, negative = 7, hs = 0, epoch = 20, window = 10,
                   alpha = 0.025, min_alpha = 0.025, workers = cores, tagger = 'mecab')



  0%|          | 0/12852 [00:00<?, ?it/s][A[A

  2%|▏         | 231/12852 [00:00<00:05, 2257.40it/s][A[A

./news_model/doc2vec_size-500_epoch-20_window-10_negative-7_hs-0_dm-1_dm_concat-0_dm_mean-1_by-mecab.model




  3%|▎         | 419/12852 [00:00<00:06, 2056.14it/s][A[A

  5%|▌         | 661/12852 [00:00<00:05, 2175.99it/s][A[A

  7%|▋         | 899/12852 [00:00<00:05, 2226.23it/s][A[A

  9%|▉         | 1134/12852 [00:00<00:05, 2254.75it/s][A[A

 11%|█         | 1361/12852 [00:00<00:05, 2256.72it/s][A[A

 12%|█▏        | 1587/12852 [00:00<00:05, 2241.06it/s][A[A

 14%|█▍        | 1795/12852 [00:00<00:04, 2220.92it/s][A[A

 16%|█▌        | 2002/12852 [00:00<00:04, 2186.85it/s][A[A

 18%|█▊        | 2262/12852 [00:01<00:04, 2224.27it/s][A[A

 19%|█▉        | 2482/12852 [00:01<00:04, 2189.60it/s][A[A

 21%|██        | 2724/12852 [00:01<00:04, 2209.61it/s][A[A

 23%|██▎       | 2951/12852 [00:01<00:04, 2214.19it/s][A[A

 25%|██▍       | 3173/12852 [00:01<00:04, 2212.81it/s][A[A

 26%|██▋       | 3395/12852 [00:01<00:04, 2209.86it/s][A[A

 28%|██▊       | 3616/12852 [00:01<00:04, 2112.05it/s][A[A

 30%|██▉       | 3812/12852 [00:01<00:04, 1978.12it/s][A[A

 31%|███ 

 29%|██▉       | 3706/12852 [00:10<00:26, 339.76it/s][A[A

 29%|██▉       | 3740/12852 [00:11<00:26, 339.06it/s][A[A

 29%|██▉       | 3791/12852 [00:11<00:26, 337.75it/s][A[A

 30%|███       | 3864/12852 [00:11<00:26, 337.69it/s][A[A

 31%|███       | 3921/12852 [00:11<00:26, 339.39it/s][A[A

 31%|███       | 3959/12852 [00:11<00:26, 339.32it/s][A[A

 31%|███       | 3996/12852 [00:11<00:26, 339.49it/s][A[A

 31%|███▏      | 4032/12852 [00:11<00:26, 339.14it/s][A[A

 32%|███▏      | 4079/12852 [00:12<00:25, 337.57it/s][A[A

 32%|███▏      | 4138/12852 [00:12<00:25, 338.98it/s][A[A

 32%|███▏      | 4175/12852 [00:12<00:25, 337.87it/s][A[A

 33%|███▎      | 4216/12852 [00:12<00:25, 336.46it/s][A[A

 33%|███▎      | 4274/12852 [00:12<00:25, 338.22it/s][A[A

 34%|███▎      | 4312/12852 [00:12<00:25, 337.85it/s][A[A

 34%|███▍      | 4355/12852 [00:12<00:25, 337.98it/s][A[A

 34%|███▍      | 4391/12852 [00:13<00:25, 336.94it/s][A[A

 34%|███▍      | 4428/12

 80%|███████▉  | 10248/12852 [00:31<00:07, 329.16it/s][A[A

 80%|████████  | 10282/12852 [00:31<00:07, 328.98it/s][A[A

 80%|████████  | 10315/12852 [00:31<00:07, 328.80it/s][A[A

 81%|████████  | 10351/12852 [00:31<00:07, 328.62it/s][A[A

 81%|████████  | 10403/12852 [00:31<00:07, 328.33it/s][A[A

 81%|████████▏ | 10472/12852 [00:31<00:07, 328.47it/s][A[A

 82%|████████▏ | 10532/12852 [00:32<00:07, 329.08it/s][A[A

 82%|████████▏ | 10569/12852 [00:32<00:06, 329.10it/s][A[A

 83%|████████▎ | 10609/12852 [00:32<00:06, 328.69it/s][A[A

 83%|████████▎ | 10644/12852 [00:32<00:06, 328.75it/s][A[A

 83%|████████▎ | 10677/12852 [00:32<00:06, 328.51it/s][A[A

 83%|████████▎ | 10709/12852 [00:32<00:06, 328.41it/s][A[A

 84%|████████▎ | 10758/12852 [00:32<00:06, 328.08it/s][A[A

 84%|████████▍ | 10799/12852 [00:32<00:06, 327.99it/s][A[A

 84%|████████▍ | 10834/12852 [00:33<00:06, 327.77it/s][A[A

 85%|████████▍ | 10878/12852 [00:33<00:06, 327.73it/s][A[A

 85%|███

Total running time:  0:12:21.582926
CPU times: user 23min 34s, sys: 12.6 s, total: 23min 46s
Wall time: 12min 23s


In [50]:
del d2v_model

### model 3

In [51]:
%%time
# PV - DBOW
d2v_model = Make_Doc2Vec_Model(modelPath=modelPath, data=train, size = 500, dm = 0, dm_concat = 0,
                   dm_mean = 0, negative = 7, hs = 0, epoch = 20, window = None,
                   alpha = 0.025, min_alpha = 0.025, workers = cores, tagger = 'mecab')





./news_model/doc2vec_size-500_epoch-20_window-None_negative-7_hs-0_dm-0_dm_concat-0_dm_mean-0_by-mecab.model


  0%|          | 0/12852 [00:00<?, ?it/s][A[A

  1%|          | 134/12852 [00:00<00:09, 1312.96it/s][A[A

  3%|▎         | 338/12852 [00:00<00:07, 1667.51it/s][A[A

  5%|▍         | 582/12852 [00:00<00:06, 1922.16it/s][A[A

  6%|▋         | 833/12852 [00:00<00:05, 2068.84it/s][A[A

  8%|▊         | 1042/12852 [00:00<00:05, 2060.77it/s][A[A

 10%|▉         | 1223/12852 [00:00<00:05, 2015.29it/s][A[A

 11%|█         | 1426/12852 [00:00<00:05, 2010.95it/s][A[A

 13%|█▎        | 1673/12852 [00:00<00:05, 2055.37it/s][A[A

 15%|█▍        | 1900/12852 [00:00<00:05, 2069.67it/s][A[A

 16%|█▋        | 2114/12852 [00:01<00:05, 2080.80it/s][A[A

 18%|█▊        | 2360/12852 [00:01<00:04, 2113.60it/s][A[A

 20%|██        | 2580/12852 [00:01<00:04, 2116.36it/s][A[A

 22%|██▏       | 2799/12852 [00:01<00:04, 2121.47it/s][A[A

 23%|██▎       | 3017/12852 [00:01<00:04, 2105.21it/s][A[A

 25%|██▌       | 3236/12852 [00:01<00:04, 2109.23it/s][A[A

 27%|██▋       | 3449/128

 37%|███▋      | 4758/12852 [00:09<00:16, 496.68it/s][A[A

 38%|███▊      | 4828/12852 [00:09<00:16, 496.80it/s][A[A

 38%|███▊      | 4897/12852 [00:09<00:16, 496.53it/s][A[A

 39%|███▊      | 4962/12852 [00:09<00:15, 496.97it/s][A[A

 39%|███▉      | 5036/12852 [00:10<00:15, 496.45it/s][A[A

 40%|███▉      | 5105/12852 [00:10<00:15, 497.02it/s][A[A

 40%|████      | 5170/12852 [00:10<00:15, 498.61it/s][A[A

 41%|████      | 5226/12852 [00:10<00:15, 498.69it/s][A[A

 41%|████      | 5280/12852 [00:10<00:15, 498.55it/s][A[A

 42%|████▏     | 5344/12852 [00:10<00:15, 498.19it/s][A[A

 42%|████▏     | 5423/12852 [00:10<00:14, 497.50it/s][A[A

 43%|████▎     | 5492/12852 [00:11<00:14, 497.57it/s][A[A

 43%|████▎     | 5553/12852 [00:11<00:14, 497.12it/s][A[A

 44%|████▍     | 5627/12852 [00:11<00:14, 497.86it/s][A[A

 44%|████▍     | 5703/12852 [00:11<00:14, 498.12it/s][A[A

 45%|████▍     | 5764/12852 [00:11<00:14, 496.67it/s][A[A

 45%|████▌     | 5834/12

Total running time:  0:08:29.196870
CPU times: user 15min 19s, sys: 13.6 s, total: 15min 32s
Wall time: 8min 31s


In [52]:
del d2v_model