# News Clustering

In [1]:
import pickle
import itertools
import warnings
import sys 
import os
import logging
from multiprocessing import cpu_count

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from ckonlpy.tag import Twitter
from konlpy.tag import Mecab

from gensim.models import CoherenceModel, LdaModel, LsiModel, HdpModel, ldaseqmodel, LdaMulticore, lda_dispatcher
from gensim.models.wrappers import LdaMallet, DtmModel
from gensim.corpora import Dictionary, bleicorpus
from gensim.matutils import hellinger
from gensim import corpora, models, similarities
from gensim.models.callbacks import CoherenceMetric, DiffMetric, PerplexityMetric, ConvergenceMetric

import pyLDAvis.gensim

from tqdm import tqdm
tqdm.pandas(desc="progress-bar")

warnings.filterwarnings('ignore')

In [2]:
def Nav_tokenizer(doc, tagger, stopwords):
    pos = tagger.pos(doc)
    pos = [word[0] for word in pos if (len(word[0])>1) & (not word[0] in stopwords)]
    return pos

In [3]:
def Nav_tokenizer_noun(doc, tagger, stopwords):
    pos = tagger.nouns(doc)
    pos = [word for word in pos if (len(word)>1) & (not word in stopwords)]
    return pos

In [4]:
def evaluate_graph(dictionary, corpus, texts, limit, tagger, modelpath):
    """
    Function to display num_topics - LDA graph using c_v coherence
    
    Parameters:
    ----------
    dictionary : Gensim dictionary
    corpus : Gensim corpus
    limit : topic limit
    
    Returns:
    -------
    lm_list : List of LDA topic models
    c_v : Coherence values corresponding to the LDA model with respective number of topics
    """
    c_v = []
    lm_list = []
    import os
    for num_topics in range(1, limit):
        ldaFileName = os.path.join(modelpath, 'ldamodel_{}_topic-{}'.format(tagger, num_topics))
        cohFileName = os.path.join(modelpath, 'coherencemodel_{}_topic-{}'.format(tagger, num_topics))
        if not os.path.isfile(ldaFileName):
            lm = LdaModel(corpus=corpus, num_topics=num_topics, id2word=dictionary)
            lm.save(ldaFileName)
        else:
            lm = LdaModel.load(ldaFileName)
        lm_list.append(lm)
        if not os.path.isfile(cohFileName):
            cm = CoherenceModel(model=lm, texts=texts, dictionary=dictionary, coherence='c_v')
            cm.save(cohFileName)
        else:
            cm = CoherenceModel.load(cohFileName)
        c_v.append(cm.get_coherence())
        
    # Show graph
    x = range(1, limit)
    plt.plot(x, c_v)
    plt.xlabel("num_topics")
    plt.ylabel("Coherence score")
    plt.legend(("c_v"), loc='best')
    plt.show()
    
    return lm_list, c_v

### Stopwords

In [5]:
stopwords = open('./data/stopwordsList.txt',encoding='utf-8').readlines()
stopwords = list(map(lambda x: x.strip(), stopwords))

## News

### Naver

In [6]:
dictNaver = pickle.load(open('./data/pre_data/stastics/for_statistics_Naver_from_mongodb.pickled','rb'))
dfNaver = pd.DataFrame.from_dict(dictNaver, orient='index')
print (dfNaver.shape)

(15120, 10)


### Daum

In [7]:
dictDaum = pickle.load(open('./data/pre_data/stastics/for_statistics_daum_from_mongodb.pickled','rb'))
dfDaum = pd.DataFrame.from_dict(dictDaum, orient='index')
print (dfDaum.shape)

(9372, 10)


## Daum

### 뉴스 기사 통합

In [8]:
combinedDf = pd.concat([dfNaver, dfDaum])
combinedDf.head()

Unnamed: 0,category,date,press,number_of_comment,number_of_crawled_comment,rank,title,mainText,keywords,extracted_keywords
5a29c445588c132954d1973a,정치,2017.12.07,연합뉴스,1713,1465,1,"北외무성 ""전쟁 바라지 않지만 결코 피하지 않을 것""","美고위인사 대북언급 비난하며 ""전쟁 기정사실화"" 위협 며칠 새 이어지는 북한 군민...","[외무성, 핵전쟁, 대변인]","{조선반도, 중앙, 북한, 도화선, 고위, 대변인, 미국, 핵전쟁}"
5a29c445588c132954d1973b,정치,2017.12.07,한국일보,2551,2062,2,"예산전쟁, 예결위 간사ㆍ호남이 웃었다",예결위 간사들이 최대 수혜자..당 지도부 내 몫 챙기기도 여전 황주홍ㆍ김도읍 등...,"[예산, 예결위, soc]","{증액, 국민의당, 예산안, 지역구, 의원, 정부안, 호남}"
5a29c445588c132954d1973c,정치,2017.12.07,뉴시스,610,536,3,"혐의 부인에 20시간 조사…檢, 최경환 구속 카드 꺼내나",【서울=뉴시스】 최진석 기자 = 박근혜 정부 시절 국가정보원 특수활동비 수수 의혹 ...,"[최경환, 구속영장, 국가정보원]","{국정원장, 조사, 혐의, 의원, 정기국회, 검찰, 구속영장 청구}"
5a29c445588c132954d1973d,정치,2017.12.07,연합뉴스,145,133,4,"최재형 감사원장 후보자 ""독립성 강화는 임명권자의 뜻""",감사원장에 내정된 최재형 사법연수원장(고양=연합뉴스) 이희열 기자 = 7일 감사원장...,"[이슈 · 최재형 감사원장 내정, 감사원장, 최재형, 감사원]","{법관, 지명, 후보자, 공직 사회, 생활, 감사원장}"
5a29c445588c132954d1973e,정치,2017.12.07,동아일보,1074,932,5,"B-1B 한반도에 뜨자, 평양 비운 김정은",[동아일보] 북중 접경지 양강도 삼지연 시찰… 방북 유엔 사무차장 면담 안할듯 B-...,"[김정은, b-1b, 한반도]","{사무차장, 양강도, 시찰, 공장, 접경, 펠트먼, 훈련, 김정은, 삼지연}"


In [9]:
extKeywords = combinedDf.extracted_keywords.tolist()

In [10]:
if sys.platform =='darwin':
    clusteringPath ='/Volumes/disk1/Clustering/'
    clusteringModelPath = '/Volumes/disk1/Clustering_model/'
elif sys.platform =='win32':
    clusteringPath = 'd:/Clustering/' 
    clusteringModelPath = 'd:/Clustering_model/'

### 사전 데이터 제작

In [11]:
%%time
dict_keywords_name = clusteringModelPath + 'dictionary_keywords'
if not os.path.isfile(dict_keywords_name):
    dict_keywords = Dictionary(extKeywords)
    dict_keywords.save(dict_keywords_name)
else:
    dict_keywords = Dictionary.load(dict_keywords_name)

CPU times: user 23.1 ms, sys: 5.5 ms, total: 28.6 ms
Wall time: 412 ms


In [12]:
%%time
corpus_keywords_name = clusteringModelPath + 'corpus_keywords.pickled'
if not os.path.isfile(corpus_keywords_name):
    corpus_keywords = [ dict_keywords.doc2bow(text) for text in tqdm(extKeywords)]
    pickle.dump(corpus_keywords, open(corpus_keywords_name, 'wb'))
else:
    corpus_keywords = pickle.load(open(corpus_keywords_name, 'rb'))

CPU times: user 176 ms, sys: 12.7 ms, total: 189 ms
Wall time: 213 ms


In [13]:
print('Number of unique tokens: %d' % len(dict_keywords))
print('Number of documents: %d' % len(corpus_keywords))

Number of unique tokens: 32800
Number of documents: 24492


### LSI (  Latent Semantic Indexing )
* an indexing and retrieval method that uses a mathematical technique called singular value decomposition (SVD) to identify patterns in the relationships between the terms and concepts contained in an unstructured collection of text  

In [14]:
%%time
lsimodel_keywords_name = clusteringModelPath + 'lsimodel_keywords'
if not os.path.isfile(lsimodel_keywords_name):
    lsimodel_keywords = LsiModel(corpus = corpus_keywords, num_topics = 20, id2word = dict_keywords)
    lsimodel_keywords.save(lsimodel_keywords_name)
else:
    lsimodel_keywords = LsiModel.load(lsimodel_keywords_name)

CPU times: user 55.1 ms, sys: 18.4 ms, total: 73.5 ms
Wall time: 156 ms


In [15]:
lsimodel_keywords.show_topics(num_topics = 20)

[(0,
  '0.520*"선수" + 0.516*"경기" + 0.329*"한국" + 0.174*"미국" + 0.151*"계약" + 0.142*"감독" + 0.127*"중국" + 0.124*"일본" + 0.122*"기록" + 0.111*"손흥민"'),
 (1,
  '0.400*"대통령" + 0.327*"미국" + 0.319*"정부" + -0.298*"선수" + 0.256*"한국" + 0.253*"서울" + 0.225*"중국" + -0.223*"경기" + 0.198*"북한" + 0.193*"검찰"'),
 (2,
  '-0.690*"서울" + 0.295*"한국" + 0.281*"미국" + -0.257*"기온" + -0.177*"한파" + -0.162*"날씨" + 0.154*"중국" + -0.142*"추위" + -0.133*"지역" + 0.127*"북한"'),
 (3,
  '0.455*"선수" + -0.417*"경기" + -0.330*"한국" + 0.283*"대통령" + 0.248*"계약" + 0.212*"검찰" + 0.176*"구단" + 0.170*"FA" + -0.148*"서울" + 0.134*"혐의"'),
 (4,
  '0.416*"경기" + 0.364*"대통령" + -0.355*"미국" + 0.308*"검찰" + -0.233*"서울" + -0.198*"선수" + 0.182*"혐의" + 0.151*"수사" + -0.150*"계약" + 0.148*"청와대"'),
 (5,
  '-0.694*"방송" + -0.450*"모습" + -0.209*"한국" + 0.185*"미국" + 0.167*"경기" + -0.157*"사진" + 0.109*"정부" + -0.108*"공개" + -0.107*"드라마" + -0.105*"경찰"'),
 (6,
  '-0.613*"한국" + 0.533*"미국" + 0.259*"방송" + 0.238*"경기" + 0.201*"북한" + -0.161*"일본" + 0.159*"모습" + -0.085*"정부" + 0.085*"토트넘" + -0.084*"선

In [16]:
lsitopics_keywords = lsimodel_keywords.show_topics(formatted = False)

### HDP (Hierarchical Dirichlet Process)
* a non-parametric bayesian method (note the missing number of requested topics)

In [17]:
%%time
hdpmodel_keywords_name = clusteringModelPath+'hdpmodel_keywords'
if not os.path.isfile(hdpmodel_keywords_name):
    hdpmodel_keywords = HdpModel(corpus = corpus_keywords, id2word = dict_keywords)
    hdpmodel_keywords.save(clusteringModelPath+'hdpmodel_keywords')
else:
    hdpmodel_keywords = HdpModel.load(hdpmodel_keywords_name)

CPU times: user 845 ms, sys: 266 ms, total: 1.11 s
Wall time: 2.75 s


In [18]:
hdpmodel_keywords.show_topics()

[(0,
  '0.004*선수 + 0.002*계약 + 0.002*경기 + 0.002*FA + 0.001*롯데 + 0.001*한국 + 0.001*구단 + 0.001*투수 + 0.001*정부 + 0.001*KIA + 0.001*두산 + 0.001*일본 + 0.001*감독 + 0.001*연봉 + 0.001*북한 + 0.001*LG + 0.001*영입 + 0.001*리그 + 0.001*미국 + 0.001*대통령'),
 (1,
  '0.003*경기 + 0.002*한국 + 0.002*선수 + 0.001*축구 + 0.001*대회 + 0.001*감독 + 0.001*토트넘 + 0.001*기록 + 0.001*산체스 + 0.001*서울 + 0.001*맨유 + 0.001*월드컵 + 0.001*손흥민 + 0.001*일본 + 0.001*중국 + 0.001*계약 + 0.001*이적 + 0.001*리그 + 0.001*관심 + 0.001*대표팀'),
 (2,
  '0.001*경기 + 0.001*선수 + 0.001*서울 + 0.001*대통령 + 0.001*한국 + 0.001*정부 + 0.001*미국 + 0.001*대표 + 0.001*의원 + 0.001*검찰 + 0.001*사고 + 0.001*방송 + 0.001*중국 + 0.001*일본 + 0.001*청와대 + 0.001*기록 + 0.001*경찰 + 0.001*의혹 + 0.001*모습 + 0.000*혐의'),
 (3,
  '0.004*서울 + 0.003*기온 + 0.002*전국 + 0.002*날씨 + 0.002*한파 + 0.002*추위 + 0.001*영하 + 0.001*지역 + 0.001*아침 + 0.001*경기 + 0.001*선수 + 0.001*지방 + 0.001*중국 + 0.001*바람 + 0.001*대통령 + 0.001*남부 + 0.001*미세먼지 + 0.001*모습 + 0.001*공기 + 0.001*경찰'),
 (4,
  '0.002*선수 + 0.001*경기 + 0.001*서울 + 0.001*대통령 + 0.001*혐의 + 0.001*북한

In [19]:
hdptopics_keywords = hdpmodel_keywords.show_topics(formatted = False)

### LDA (Latent Dirichlet allocation)
* a generative statistical model that allows sets of observations to be explained by unobserved groups that explain why some parts of the data are similar  

#### LDA model1
* basic

In [20]:
pyLDAvis.enable_notebook()

In [21]:
pl_keywords = PerplexityMetric(corpus = corpus_keywords, logger = 'shell', 
                        title = 'Perplexity (twitter)')
ch_umass_keywords = CoherenceMetric(corpus = corpus_keywords, coherence = 'u_mass', 
                             logger = 'shell', title = ' Coherence (u_mass)')
ch_cv_keywords = CoherenceMetric(corpus = corpus_keywords, logger = 'shell', 
                          texts = extKeywords, coherence = 'c_v', 
                          title = 'Coherence (c_v)')
diff_kl_keywords = DiffMetric(distance = 'kullback_leibler', 
                       logger = 'shell', title = 'Diff (kullback_leibler)')
convergence_kl_keywords = ConvergenceMetric(distance = 'jaccard', logger = 'shell', 
                                     title = 'Convergence (jaccard)')
callbacks_keywords = [pl_keywords, ch_umass_keywords, ch_cv_keywords, diff_kl_keywords, convergence_kl_keywords]

In [22]:
%%time
logging.basicConfig(level = logging.INFO)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
ldamodel_keywords_name = clusteringModelPath + 'ldamodel_keywords'
if not os.path.isfile(ldamodel_keywords_name):
    ldamodel_keywords = LdaModel(corpus = corpus_keywords, num_topics = 20,
                           id2word = dict_keywords, passes = 50,
                           chunksize = 6123, iterations = 250,
                           alpha='auto', callbacks = callbacks_keywords)
    ldamodel_keywords.save(ldamodel_keywords_name)
else:
    ldamodel_keywords = LdaModel.load(ldamodel_keywords_name)

INFO:gensim.utils:loading LdaModel object from /Volumes/disk1/Clustering_model/ldamodel_keywords
INFO:gensim.utils:loading expElogbeta from /Volumes/disk1/Clustering_model/ldamodel_keywords.expElogbeta.npy with mmap=None
INFO:gensim.utils:setting ignored attribute dispatcher to None
INFO:gensim.utils:setting ignored attribute id2word to None
INFO:gensim.utils:setting ignored attribute state to None
INFO:gensim.utils:loaded /Volumes/disk1/Clustering_model/ldamodel_keywords
INFO:gensim.utils:loading LdaState object from /Volumes/disk1/Clustering_model/ldamodel_keywords.state
INFO:gensim.utils:loaded /Volumes/disk1/Clustering_model/ldamodel_keywords.state


CPU times: user 12.7 s, sys: 1.46 s, total: 14.2 s
Wall time: 16.6 s


In [23]:
%%time
coherence1_um_keywords_name = clusteringModelPath + 'coherence1_keywords_u_mass'
if not os.path.isfile(coherence1_um_keywords_name):
    cm_keywords = CoherenceModel(model = ldamodel_keywords, 
                      corpus = corpus_keywords, 
                      dictionary = dict_keywords,
                      coherence = 'u_mass')
    cm_keywords.save(coherence1_um_keywords_name)
else:
    cm_keywords = CoherenceModel.load(coherence1_um_keywords_name)

INFO:gensim.utils:loading CoherenceModel object from /Volumes/disk1/Clustering_model/coherence1_keywords_u_mass
INFO:gensim.utils:loading dictionary recursively from /Volumes/disk1/Clustering_model/coherence1_keywords_u_mass.dictionary.* with mmap=None
INFO:gensim.utils:loading _model recursively from /Volumes/disk1/Clustering_model/coherence1_keywords_u_mass._model.* with mmap=None
INFO:gensim.utils:loading id2word recursively from /Volumes/disk1/Clustering_model/coherence1_keywords_u_mass._model.id2word.* with mmap=None
INFO:gensim.utils:loading state recursively from /Volumes/disk1/Clustering_model/coherence1_keywords_u_mass._model.state.* with mmap=None
INFO:gensim.utils:loaded /Volumes/disk1/Clustering_model/coherence1_keywords_u_mass


CPU times: user 15.1 s, sys: 3.29 s, total: 18.4 s
Wall time: 21.3 s


In [24]:
print ('Coherence : {}'.format(cm_keywords.get_coherence()))

INFO:gensim.topic_coherence.text_analysis:CorpusAccumulator accumulated stats from 1000 documents
INFO:gensim.topic_coherence.text_analysis:CorpusAccumulator accumulated stats from 2000 documents
INFO:gensim.topic_coherence.text_analysis:CorpusAccumulator accumulated stats from 3000 documents
INFO:gensim.topic_coherence.text_analysis:CorpusAccumulator accumulated stats from 4000 documents
INFO:gensim.topic_coherence.text_analysis:CorpusAccumulator accumulated stats from 5000 documents
INFO:gensim.topic_coherence.text_analysis:CorpusAccumulator accumulated stats from 6000 documents
INFO:gensim.topic_coherence.text_analysis:CorpusAccumulator accumulated stats from 7000 documents
INFO:gensim.topic_coherence.text_analysis:CorpusAccumulator accumulated stats from 8000 documents
INFO:gensim.topic_coherence.text_analysis:CorpusAccumulator accumulated stats from 9000 documents
INFO:gensim.topic_coherence.text_analysis:CorpusAccumulator accumulated stats from 10000 documents
INFO:gensim.topic_c

Coherence : -10.899798791331238


In [25]:
%%time
coherence1_cv_keywords_name = clusteringModelPath + 'coherence1_keywords_c_v'
if not os.path.isfile(coherence1_cv_keywords_name):
    cm_keywords_cv = CoherenceModel(model = ldamodel_keywords, 
                         texts = extKeywords,
                         dictionary = dict_keywords, 
                         coherence = 'c_v')
    cm_keywords_cv.save(coherence1_cv_keywords_name)
else:
    cm_keywords_cv = CoherenceModel.load(coherence1_cv_keywords_name)

INFO:gensim.utils:loading CoherenceModel object from /Volumes/disk1/Clustering_model/coherence1_keywords_c_v
INFO:gensim.utils:loading dictionary recursively from /Volumes/disk1/Clustering_model/coherence1_keywords_c_v.dictionary.* with mmap=None
INFO:gensim.utils:loading _model recursively from /Volumes/disk1/Clustering_model/coherence1_keywords_c_v._model.* with mmap=None
INFO:gensim.utils:loading id2word recursively from /Volumes/disk1/Clustering_model/coherence1_keywords_c_v._model.id2word.* with mmap=None
INFO:gensim.utils:loading state recursively from /Volumes/disk1/Clustering_model/coherence1_keywords_c_v._model.state.* with mmap=None
INFO:gensim.utils:loaded /Volumes/disk1/Clustering_model/coherence1_keywords_c_v


CPU times: user 13.7 s, sys: 5.2 s, total: 18.9 s
Wall time: 22.1 s


In [26]:
print ('Coherence : {}'.format(cm_keywords_cv.get_coherence()))

INFO:gensim.topic_coherence.probability_estimation:using ParallelWordOccurrenceAccumulator(processes=3, batch_size=64) to estimate probabilities from sliding windows
INFO:gensim.topic_coherence.text_analysis:serializing accumulator to return to master...
INFO:gensim.topic_coherence.text_analysis:serializing accumulator to return to master...
INFO:gensim.topic_coherence.text_analysis:serializing accumulator to return to master...
INFO:gensim.topic_coherence.text_analysis:accumulator serialized
INFO:gensim.topic_coherence.text_analysis:accumulator serialized
INFO:gensim.topic_coherence.text_analysis:accumulator serialized
INFO:gensim.topic_coherence.text_analysis:3 accumulators retrieved from output queue
INFO:gensim.topic_coherence.text_analysis:accumulated word occurrence stats for 21234 virtual documents


Coherence : 0.4587076374923855


In [27]:
%%time
pyLDAvis.gensim.prepare(ldamodel_keywords, corpus_keywords, dict_keywords)

CPU times: user 24.1 s, sys: 7.04 s, total: 31.2 s
Wall time: 40.3 s


In [28]:
ldatopics_keywords = ldamodel_keywords.show_topics(formatted = False)

##### display num_topics - LDA graph using c_v coherence

In [None]:
%%time
lmlist_keywords, c_v_keywords = evaluate_graph(dictionary = dict_keywords, corpus = corpus_keywords, texts = extKeywords, limit = 100, tagger = 'mixed', modelpath=clusteringModelPath)

INFO:gensim.utils:loading LdaModel object from /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-1
INFO:gensim.utils:loading expElogbeta from /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-1.expElogbeta.npy with mmap=None
INFO:gensim.utils:setting ignored attribute id2word to None
INFO:gensim.utils:setting ignored attribute dispatcher to None
INFO:gensim.utils:setting ignored attribute state to None
INFO:gensim.utils:loaded /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-1
INFO:gensim.utils:loading LdaState object from /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-1.state
INFO:gensim.utils:loaded /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-1.state
INFO:gensim.utils:loading CoherenceModel object from /Volumes/disk1/Clustering_model/coherencemodel_mixed_topic-1
INFO:gensim.utils:loading dictionary recursively from /Volumes/disk1/Clustering_model/coherencemodel_mixed_topic-1.dictionary.* with mmap=None
INFO:gensim.utils:loading _model recursively from /Volume

INFO:gensim.topic_coherence.probability_estimation:using ParallelWordOccurrenceAccumulator(processes=3, batch_size=64) to estimate probabilities from sliding windows
INFO:gensim.topic_coherence.text_analysis:serializing accumulator to return to master...
INFO:gensim.topic_coherence.text_analysis:serializing accumulator to return to master...
INFO:gensim.topic_coherence.text_analysis:serializing accumulator to return to master...
INFO:gensim.topic_coherence.text_analysis:accumulator serialized
INFO:gensim.topic_coherence.text_analysis:accumulator serialized
INFO:gensim.topic_coherence.text_analysis:accumulator serialized
INFO:gensim.topic_coherence.text_analysis:3 accumulators retrieved from output queue
INFO:gensim.topic_coherence.text_analysis:accumulated word occurrence stats for 15319 virtual documents
INFO:gensim.utils:loading LdaModel object from /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-5
INFO:gensim.utils:loading expElogbeta from /Volumes/disk1/Clustering_model/ldamod

INFO:gensim.utils:loaded /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-8.state
INFO:gensim.utils:loading CoherenceModel object from /Volumes/disk1/Clustering_model/coherencemodel_mixed_topic-8
INFO:gensim.utils:loading dictionary recursively from /Volumes/disk1/Clustering_model/coherencemodel_mixed_topic-8.dictionary.* with mmap=None
INFO:gensim.utils:loading _model recursively from /Volumes/disk1/Clustering_model/coherencemodel_mixed_topic-8._model.* with mmap=None
INFO:gensim.utils:loading id2word recursively from /Volumes/disk1/Clustering_model/coherencemodel_mixed_topic-8._model.id2word.* with mmap=None
INFO:gensim.utils:loading state recursively from /Volumes/disk1/Clustering_model/coherencemodel_mixed_topic-8._model.state.* with mmap=None
INFO:gensim.utils:loaded /Volumes/disk1/Clustering_model/coherencemodel_mixed_topic-8
INFO:gensim.topic_coherence.probability_estimation:using ParallelWordOccurrenceAccumulator(processes=3, batch_size=64) to estimate probabilities from sl

INFO:gensim.models.ldamodel:topic #1 (0.100): 0.028*"서울" + 0.015*"기온" + 0.014*"전국" + 0.011*"추위" + 0.010*"날씨" + 0.009*"한파" + 0.009*"아침" + 0.008*"지역" + 0.008*"거래소" + 0.007*"영하"
INFO:gensim.models.ldamodel:topic #3 (0.100): 0.012*"경기" + 0.007*"서비스" + 0.005*"선수" + 0.005*"기술" + 0.005*"감독" + 0.005*"사용" + 0.005*"2017" + 0.004*"대출" + 0.004*"학교" + 0.004*"개봉"
INFO:gensim.models.ldamodel:topic #7 (0.100): 0.023*"북한" + 0.012*"미국" + 0.009*"손흥민" + 0.008*"영화" + 0.008*"배우" + 0.007*"중국" + 0.007*"드라마" + 0.007*"제재" + 0.007*"토트넘" + 0.007*"화재"
INFO:gensim.models.ldamodel:topic diff=0.347252, rho=0.500000
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #10000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #2 (0.100): 0.014*"경찰" + 0.013*"정부" + 0.010*"혐의" + 0.010*"화재" + 0.007*"아파트" + 0.007*"대통령" + 0.006*"청와대" + 0.005*"중국" + 0.005*"건물" + 0.005*"사건"
INFO:gensim.models.ldamodel:topic #8 (0.100): 0.020*"선수" + 0.014*

INFO:gensim.models.ldamodel:topic #2 (0.100): 0.015*"경찰" + 0.013*"화재" + 0.010*"혐의" + 0.009*"정부" + 0.007*"사건" + 0.007*"아파트" + 0.006*"건물" + 0.006*"평균자책점" + 0.006*"멤버" + 0.006*"소속사"
INFO:gensim.models.ldamodel:topic diff=0.332403, rho=0.301511
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #24000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #6 (0.100): 0.037*"방송" + 0.023*"모습" + 0.010*"공개" + 0.010*"사랑" + 0.010*"사진" + 0.009*"예능" + 0.009*"프로그램" + 0.007*"생활" + 0.007*"이야기" + 0.007*"엄마"
INFO:gensim.models.ldamodel:topic #4 (0.100): 0.012*"한국" + 0.009*"평창" + 0.008*"중국" + 0.008*"올림픽" + 0.008*"시장" + 0.007*"미국" + 0.007*"세트" + 0.006*"방송" + 0.006*"국내" + 0.006*"호주"
INFO:gensim.models.ldamodel:topic #0 (0.100): 0.015*"검찰" + 0.013*"경기" + 0.012*"대회" + 0.012*"수사" + 0.010*"상황" + 0.009*"이적" + 0.008*"아내" + 0.008*"23" + 0.008*"tvN" + 0.007*"맨유"
INFO:gensim.models.ldamodel:topic #2 (0.100): 0.015*"화재" + 0.015*

INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #2 (0.091): 0.018*"정부" + 0.010*"북한" + 0.009*"미국" + 0.009*"중국" + 0.008*"대통령" + 0.007*"신생아" + 0.007*"가능성" + 0.006*"2018" + 0.006*"서비스" + 0.006*"세계"
INFO:gensim.models.ldamodel:topic #9 (0.091): 0.015*"혐의" + 0.010*"의원" + 0.010*"검찰" + 0.009*"대표" + 0.008*"사진" + 0.007*"대통령" + 0.007*"모습" + 0.006*"조사" + 0.006*"일본" + 0.006*"자금"
INFO:gensim.models.ldamodel:topic #5 (0.091): 0.009*"리그" + 0.008*"경찰" + 0.008*"실종" + 0.008*"선수" + 0.007*"경기" + 0.006*"한국" + 0.006*"중국" + 0.006*"준희양" + 0.006*"고준희" + 0.005*"피해자"
INFO:gensim.models.ldamodel:topic #8 (0.091): 0.030*"애플" + 0.018*"아이폰" + 0.015*"배터리" + 0.010*"대표" + 0.010*"시장" + 0.009*"삼성전자" + 0.008*"스마트폰" + 0.007*"소송" + 0.006*"EPL" + 0.006*"미국"
INFO:gensim.models.ldamodel:topic #7 (0.091): 0.013*"성능" + 0.008*"전망" + 0.007*"가족" + 0.007*"소비자" + 0.007*"서울" + 0.006*"눈물" + 0.005*"결혼" + 0.005*"대상" + 0.005*"영화" + 0.005*"모습"
INFO:gensim.mode

INFO:gensim.models.ldamodel:topic #7 (0.091): 0.016*"영화" + 0.013*"사랑" + 0.012*"결혼" + 0.012*"모습" + 0.010*"이야기" + 0.008*"엄마" + 0.008*"아들" + 0.008*"배우" + 0.008*"여성" + 0.008*"눈물"
INFO:gensim.models.ldamodel:topic #8 (0.091): 0.008*"삼성전자" + 0.008*"애플" + 0.008*"EPL" + 0.007*"방송" + 0.007*"시장" + 0.006*"스마트폰" + 0.005*"황금빛" + 0.005*"기술" + 0.005*"업무" + 0.005*"음악"
INFO:gensim.models.ldamodel:topic #0 (0.091): 0.026*"서울" + 0.015*"한파" + 0.013*"지역" + 0.013*"기온" + 0.012*"국민의당" + 0.010*"정당" + 0.010*"의원" + 0.009*"통합" + 0.009*"날씨" + 0.009*"추위"
INFO:gensim.models.ldamodel:topic #2 (0.091): 0.020*"정부" + 0.011*"북한" + 0.010*"2018" + 0.008*"홈런" + 0.008*"대통령" + 0.008*"넥센" + 0.008*"단장" + 0.007*"인스타그램" + 0.007*"가상 화폐" + 0.006*"기록"
INFO:gensim.models.ldamodel:topic diff=0.335241, rho=0.301511
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #24000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #1 (0.091): 0.038*"방송" 

INFO:gensim.models.ldamodel:topic #6 (0.083): 0.016*"경기" + 0.015*"신생아" + 0.014*"선수" + 0.008*"맨시티" + 0.007*"중국" + 0.007*"한국" + 0.007*"달러" + 0.007*"아이폰" + 0.007*"애플" + 0.006*"영국"
INFO:gensim.models.ldamodel:topic #7 (0.083): 0.028*"화재" + 0.014*"비트코인" + 0.013*"가상 화폐" + 0.012*"서울" + 0.008*"시장" + 0.008*"사고" + 0.008*"거래" + 0.007*"미국" + 0.007*"구조" + 0.006*"투자"
INFO:gensim.models.ldamodel:topic diff=0.362618, rho=0.577350
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #8000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #8 (0.083): 0.024*"애플" + 0.017*"배터리" + 0.013*"성능" + 0.013*"아이폰" + 0.009*"스마트폰" + 0.008*"삼성전자" + 0.007*"EPL" + 0.005*"사진" + 0.005*"성능 저하" + 0.005*"사랑"
INFO:gensim.models.ldamodel:topic #3 (0.083): 0.017*"대표" + 0.017*"통합" + 0.016*"국민의당" + 0.010*"정당" + 0.009*"소송" + 0.009*"전망" + 0.007*"케인" + 0.007*"애플" + 0.007*"방송" + 0.007*"의원"
INFO:gensim.models.ldamodel:topic #0 (0.083): 0.014*"전국"

INFO:gensim.models.ldamodel:topic diff=0.335198, rho=0.316228
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #22000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #5 (0.083): 0.036*"선수" + 0.028*"계약" + 0.017*"FA" + 0.016*"구단" + 0.013*"롯데" + 0.012*"투수" + 0.012*"경기" + 0.011*"두산" + 0.011*"메이저리그" + 0.011*"LG"
INFO:gensim.models.ldamodel:topic #2 (0.083): 0.017*"방송" + 0.013*"한국" + 0.011*"모습" + 0.011*"시즌" + 0.010*"병원" + 0.010*"프로그램" + 0.010*"예능" + 0.008*"대표팀" + 0.007*"외국인" + 0.006*"이유"
INFO:gensim.models.ldamodel:topic #7 (0.083): 0.020*"화재" + 0.014*"정부" + 0.012*"가상 화폐" + 0.010*"사고" + 0.008*"UFC" + 0.008*"현장" + 0.008*"비트코인" + 0.007*"슬기" + 0.007*"거래소" + 0.007*"감빵 생활"
INFO:gensim.models.ldamodel:topic #9 (0.083): 0.019*"영화" + 0.017*"배우" + 0.013*"방송" + 0.012*"드라마" + 0.011*"일본" + 0.010*"연기" + 0.010*"한국" + 0.007*"모습" + 0.007*"촬영" + 0.006*"작품"
INFO:gensim.models.ldamodel:topic #6 (0.083): 0.044*"경기" 

INFO:gensim.models.ldamodel:topic #2 (0.077): 0.020*"사진" + 0.015*"경찰" + 0.011*"모습" + 0.009*"방송" + 0.008*"혐의" + 0.008*"검찰" + 0.008*"영화" + 0.007*"배우" + 0.007*"공개" + 0.006*"눈물"
INFO:gensim.models.ldamodel:topic #3 (0.077): 0.016*"미국" + 0.012*"북한" + 0.009*"중국" + 0.007*"인터뷰" + 0.006*"상황" + 0.005*"검찰" + 0.005*"연구" + 0.005*"김정은" + 0.005*"웃음" + 0.005*"전쟁"
INFO:gensim.models.ldamodel:topic #0 (0.077): 0.015*"한국" + 0.012*"대통령" + 0.010*"혐의" + 0.008*"사드" + 0.007*"조사" + 0.007*"북한" + 0.006*"서울" + 0.006*"무죄" + 0.006*"경기" + 0.006*"가구"
INFO:gensim.models.ldamodel:topic #5 (0.077): 0.013*"달러" + 0.011*"스마트폰" + 0.010*"시장" + 0.010*"삼성전자" + 0.009*"제품" + 0.009*"출시" + 0.009*"도쿄" + 0.009*"중국" + 0.007*"마음" + 0.007*"EPL"
INFO:gensim.models.ldamodel:topic diff=0.365077, rho=0.577350
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #8000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #5 (0.077): 0.014*"달러" + 0.013*"시장

INFO:gensim.models.ldamodel:topic #8 (0.077): 0.018*"한파" + 0.018*"선수" + 0.016*"계약" + 0.014*"한화" + 0.010*"이적" + 0.010*"기록" + 0.010*"구단" + 0.010*"우승" + 0.008*"리그" + 0.007*"롯데"
INFO:gensim.models.ldamodel:topic #7 (0.077): 0.022*"화재" + 0.010*"사고" + 0.009*"병원" + 0.008*"현장" + 0.008*"정부" + 0.008*"건물" + 0.008*"서울" + 0.008*"화유기" + 0.007*"중국" + 0.007*"인스타그램"
INFO:gensim.models.ldamodel:topic diff=0.333749, rho=0.316228
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #22000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #0 (0.077): 0.014*"한국" + 0.012*"대통령" + 0.011*"혐의" + 0.011*"홈런" + 0.010*"여성" + 0.009*"선고" + 0.009*"재판" + 0.007*"정부" + 0.007*"남성" + 0.007*"니퍼트"
INFO:gensim.models.ldamodel:topic #3 (0.077): 0.017*"KIA" + 0.015*"북한" + 0.010*"웃음" + 0.009*"인터뷰" + 0.007*"생활" + 0.007*"슬기" + 0.007*"방송" + 0.007*"단장" + 0.007*"kt" + 0.007*"미국"
INFO:gensim.models.ldamodel:topic #8 (0.077): 0.018*"선수" + 0.017*"한

INFO:gensim.models.ldamodel:topic diff=0.426329, rho=0.707107
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #6000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #7 (0.071): 0.016*"사진" + 0.011*"애플" + 0.011*"스마트폰" + 0.010*"중국" + 0.009*"한파" + 0.009*"제품" + 0.009*"미국" + 0.009*"조치" + 0.008*"소송" + 0.007*"삼성전자"
INFO:gensim.models.ldamodel:topic #4 (0.071): 0.022*"대통령" + 0.021*"대표" + 0.014*"통합" + 0.014*"의원" + 0.014*"국민의당" + 0.013*"정당" + 0.011*"중국" + 0.011*"정부" + 0.009*"양국" + 0.008*"청와대"
INFO:gensim.models.ldamodel:topic #2 (0.071): 0.016*"서울" + 0.011*"아파트" + 0.008*"결의안" + 0.007*"조사" + 0.006*"정부" + 0.005*"한국" + 0.005*"맨체스터" + 0.005*"도로" + 0.005*"가구" + 0.005*"폭행"
INFO:gensim.models.ldamodel:topic #1 (0.071): 0.036*"미국" + 0.014*"트럼프" + 0.013*"북한" + 0.011*"예루살렘" + 0.011*"중국" + 0.011*"이스라엘" + 0.010*"대통령" + 0.009*"트럼프 대통령" + 0.007*"팔레스타인" + 0.007*"유엔"
INFO:gensim.models.ldamodel:topic #11 (0.071): 0.0

INFO:gensim.models.ldamodel:topic #2 (0.071): 0.016*"케인" + 0.014*"아파트" + 0.013*"아내" + 0.011*"대표팀" + 0.009*"서울" + 0.008*"언론" + 0.008*"보도" + 0.007*"역할" + 0.006*"음식" + 0.006*"성추행"
INFO:gensim.models.ldamodel:topic #9 (0.071): 0.040*"경기" + 0.036*"선수" + 0.023*"계약" + 0.015*"감독" + 0.015*"리그" + 0.014*"드라마" + 0.014*"기록" + 0.012*"구단" + 0.012*"KIA" + 0.011*"FA"
INFO:gensim.models.ldamodel:topic #5 (0.071): 0.025*"선수" + 0.022*"토트넘" + 0.017*"손흥민" + 0.016*"롯데" + 0.012*"2017" + 0.011*"경기" + 0.011*"두산" + 0.011*"야구" + 0.009*"LG" + 0.009*"영국"
INFO:gensim.models.ldamodel:topic #12 (0.071): 0.020*"일본" + 0.013*"북한" + 0.011*"남편" + 0.009*"한국" + 0.009*"모습" + 0.008*"슬기" + 0.008*"훈련" + 0.008*"도쿄" + 0.007*"사진" + 0.007*"감빵 생활"
INFO:gensim.models.ldamodel:topic #6 (0.071): 0.018*"한국" + 0.013*"김현수" + 0.010*"월드컵" + 0.010*"무대" + 0.009*"관심" + 0.008*"일본" + 0.008*"박나래" + 0.008*"경기" + 0.008*"프랑스" + 0.007*"중국"
INFO:gensim.models.ldamodel:topic diff=0.357326, rho=0.316228
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at do

INFO:gensim.models.ldamodel:topic #5 (0.067): 0.013*"사진" + 0.011*"계약" + 0.008*"두산" + 0.008*"구단" + 0.007*"협상" + 0.007*"니퍼트" + 0.007*"도쿄" + 0.007*"롯데" + 0.007*"정부" + 0.007*"재계약"
INFO:gensim.models.ldamodel:topic #8 (0.067): 0.009*"조사" + 0.008*"정부" + 0.008*"연구" + 0.007*"문재인 대통령" + 0.006*"학생" + 0.006*"서울" + 0.006*"드라마" + 0.005*"개발" + 0.005*"대책" + 0.005*"교수"
INFO:gensim.models.ldamodel:topic #4 (0.067): 0.009*"수능" + 0.008*"대통령" + 0.008*"모습" + 0.008*"서울" + 0.007*"정부" + 0.006*"차량" + 0.006*"북한" + 0.006*"틸러슨 장관" + 0.006*"단말기" + 0.005*"위원장"
INFO:gensim.models.ldamodel:topic diff=0.426992, rho=0.707107
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #6000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #5 (0.067): 0.019*"계약" + 0.014*"사진" + 0.012*"선수" + 0.010*"두산" + 0.010*"도쿄" + 0.009*"구단" + 0.008*"달러" + 0.007*"투수" + 0.007*"미국" + 0.007*"속도"
INFO:gensim.models.ldamodel:topic #1 (0.067): 0.024*"한국" + 0.

INFO:gensim.models.ldamodel:topic #4 (0.067): 0.015*"결혼" + 0.013*"병원" + 0.011*"차량" + 0.010*"모습" + 0.010*"방송" + 0.007*"서울" + 0.007*"프랑스" + 0.007*"신생아" + 0.006*"가능성" + 0.006*"위원장"
INFO:gensim.models.ldamodel:topic diff=0.374013, rho=0.333333
INFO:gensim.models.ldamodel:-11.662 per-word bound, 3239.6 perplexity estimate based on a held-out corpus of 2000 documents with 15133 words
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #20000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #13 (0.067): 0.021*"서울" + 0.016*"한파" + 0.012*"대표팀" + 0.012*"기온" + 0.012*"평창" + 0.010*"지역" + 0.010*"비트코인" + 0.010*"슬기" + 0.009*"가상 화폐" + 0.009*"영하"
INFO:gensim.models.ldamodel:topic #10 (0.067): 0.020*"미국" + 0.013*"시장" + 0.011*"한국" + 0.010*"중국" + 0.009*"인스타그램" + 0.009*"회장" + 0.009*"정부" + 0.009*"사진" + 0.008*"공개" + 0.008*"시상식"
INFO:gensim.models.ldamodel:topic #8 (0.067): 0.013*"드라마" + 0.010*"검사" + 0.008*"연구" + 0.008*

INFO:gensim.models.ldamodel:topic diff=14.750855, rho=1.000000
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #4000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #2 (0.062): 0.013*"계약" + 0.012*"선수" + 0.011*"거래소" + 0.011*"기록" + 0.011*"시장" + 0.010*"경기" + 0.009*"정부" + 0.008*"한국" + 0.006*"영입" + 0.006*"가상 화폐"
INFO:gensim.models.ldamodel:topic #0 (0.062): 0.008*"미국" + 0.008*"협력" + 0.007*"중국" + 0.006*"조치" + 0.006*"SK" + 0.006*"혜택" + 0.006*"EPL" + 0.006*"경기" + 0.006*"아이폰" + 0.005*"애플"
INFO:gensim.models.ldamodel:topic #1 (0.062): 0.035*"중국" + 0.030*"대통령" + 0.014*"미국" + 0.013*"양국" + 0.012*"북한" + 0.011*"한중" + 0.010*"주석" + 0.009*"정상 회담" + 0.009*"한국" + 0.009*"정부"
INFO:gensim.models.ldamodel:topic #6 (0.062): 0.013*"중국" + 0.011*"북한" + 0.010*"사드" + 0.010*"정부" + 0.009*"수능" + 0.007*"달러" + 0.007*"서울" + 0.007*"속도" + 0.007*"관계" + 0.006*"지원"
INFO:gensim.models.ldamodel:topic #7 (0.062): 0.017*"서울" + 0.009*

INFO:gensim.models.ldamodel:topic #0 (0.062): 0.010*"EPL" + 0.010*"치료" + 0.009*"맨시티" + 0.009*"조치" + 0.008*"삼성전자" + 0.008*"박나래" + 0.008*"대책" + 0.007*"미국" + 0.007*"감빵 생활" + 0.007*"경기"
INFO:gensim.models.ldamodel:topic #6 (0.062): 0.015*"정부" + 0.010*"인스타그램" + 0.010*"한화" + 0.008*"서울" + 0.008*"업체" + 0.008*"달러" + 0.007*"중국" + 0.007*"시장" + 0.007*"재건축" + 0.007*"공연"
INFO:gensim.models.ldamodel:topic #14 (0.062): 0.025*"토트넘" + 0.023*"대표" + 0.021*"손흥민" + 0.016*"경기" + 0.012*"의원" + 0.012*"국민의당" + 0.011*"영국" + 0.010*"승리" + 0.010*"정당" + 0.008*"전반"
INFO:gensim.models.ldamodel:topic #5 (0.062): 0.047*"서울" + 0.027*"한파" + 0.024*"기온" + 0.018*"날씨" + 0.015*"전국" + 0.015*"영하" + 0.015*"추위" + 0.014*"지역" + 0.014*"회사" + 0.013*"아침"
INFO:gensim.models.ldamodel:topic diff=0.388755, rho=0.333333
INFO:gensim.models.ldamodel:-11.850 per-word bound, 3692.2 perplexity estimate based on a held-out corpus of 2000 documents with 15133 words
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #20000/24492
INFO:gensim.m

INFO:gensim.models.ldamodel:topic #5 (0.059): 0.012*"서울" + 0.010*"기온" + 0.009*"날씨" + 0.008*"추위" + 0.007*"선수" + 0.006*"한국" + 0.005*"영입" + 0.004*"바람" + 0.004*"아침" + 0.004*"영하"
INFO:gensim.models.ldamodel:topic #14 (0.059): 0.010*"정부" + 0.006*"모습" + 0.006*"사고" + 0.005*"북한" + 0.004*"화성" + 0.004*"발사" + 0.004*"서울" + 0.004*"한국당" + 0.004*"국민의당" + 0.004*"기록"
INFO:gensim.models.ldamodel:topic #9 (0.059): 0.008*"북한" + 0.007*"감독" + 0.006*"미국" + 0.006*"선수" + 0.005*"시장" + 0.005*"투자" + 0.005*"경기" + 0.004*"환자" + 0.004*"보상" + 0.004*"한국"
INFO:gensim.models.ldamodel:topic diff=15.684449, rho=1.000000
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #4000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #9 (0.059): 0.008*"감독" + 0.007*"틸러슨 장관" + 0.007*"시장" + 0.007*"사진" + 0.007*"북한" + 0.006*"사랑" + 0.006*"의사" + 0.006*"사드" + 0.006*"중소기업" + 0.005*"16"
INFO:gensim.models.ldamodel:topic #10 (0.059): 0.043*"중국" + 0.034

INFO:gensim.models.ldamodel:topic diff=0.375739, rho=0.353553
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #18000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #11 (0.059): 0.027*"방송" + 0.024*"사고" + 0.012*"20" + 0.009*"범행" + 0.009*"안전" + 0.008*"다스" + 0.008*"건물" + 0.008*"해경" + 0.008*"성추행" + 0.008*"화재"
INFO:gensim.models.ldamodel:topic #12 (0.059): 0.009*"방송" + 0.008*"문화" + 0.007*"공연" + 0.007*"문재인" + 0.007*"무한도전" + 0.007*"멤버들" + 0.007*"SNS" + 0.006*"정부" + 0.006*"질문" + 0.006*"베트남"
INFO:gensim.models.ldamodel:topic #4 (0.059): 0.045*"경기" + 0.043*"선수" + 0.030*"계약" + 0.017*"구단" + 0.016*"감독" + 0.014*"일본" + 0.014*"기록" + 0.013*"FA" + 0.012*"리그" + 0.012*"영입"
INFO:gensim.models.ldamodel:topic #13 (0.059): 0.021*"경기" + 0.019*"대회" + 0.018*"한국" + 0.015*"월드컵" + 0.012*"선수" + 0.012*"영국" + 0.011*"독일" + 0.011*"토트넘" + 0.010*"손흥민" + 0.010*"케인"
INFO:gensim.models.ldamodel:topic #3 (0.059): 0.012*"친구" + 0.0

INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #15 (0.056): 0.007*"사고" + 0.007*"해경" + 0.006*"한국" + 0.006*"애플" + 0.005*"교수" + 0.005*"경기" + 0.005*"20" + 0.005*"서울" + 0.004*"급유선" + 0.004*"이적"
INFO:gensim.models.ldamodel:topic #4 (0.056): 0.011*"시장" + 0.010*"경기" + 0.008*"중국" + 0.008*"미국" + 0.007*"일본" + 0.007*"한국" + 0.006*"북한" + 0.006*"롯데" + 0.006*"구단" + 0.006*"스마트폰"
INFO:gensim.models.ldamodel:topic #1 (0.056): 0.008*"정부" + 0.006*"지역" + 0.005*"국회" + 0.005*"본회의" + 0.005*"국내" + 0.005*"북한" + 0.005*"예산안" + 0.004*"원내대표" + 0.004*"중국" + 0.004*"협상"
INFO:gensim.models.ldamodel:topic #12 (0.056): 0.016*"서울" + 0.010*"경기" + 0.008*"한국" + 0.008*"월드컵" + 0.006*"미국" + 0.006*"독일" + 0.006*"방송" + 0.005*"러시아" + 0.005*"예산안" + 0.005*"기록"
INFO:gensim.models.ldamodel:topic #2 (0.056): 0.011*"한국" + 0.009*"서울" + 0.008*"예산안" + 0.007*"미국" + 0.007*"경기" + 0.007*"기온" + 0.005*"선수" + 0.005*"중국" + 0.005*"협상" + 0.005*"기술"
INFO:gensim.models.ld

INFO:gensim.models.ldamodel:topic #12 (0.056): 0.029*"화재" + 0.029*"경기" + 0.016*"선수" + 0.015*"한국" + 0.014*"감독" + 0.014*"대회" + 0.014*"월드컵" + 0.014*"평창" + 0.012*"토트넘" + 0.012*"독일"
INFO:gensim.models.ldamodel:topic #10 (0.056): 0.031*"대통령" + 0.015*"청와대" + 0.013*"북한" + 0.012*"정부" + 0.012*"일본" + 0.012*"중국" + 0.011*"논란" + 0.010*"사고" + 0.009*"트럼프 대통령" + 0.009*"한국"
INFO:gensim.models.ldamodel:topic #0 (0.056): 0.012*"모습" + 0.012*"방송" + 0.011*"가족" + 0.011*"남편" + 0.010*"30" + 0.010*"친구" + 0.009*"평창올림픽" + 0.009*"동영상" + 0.009*"프로그램" + 0.009*"대표팀"
INFO:gensim.models.ldamodel:topic diff=0.368940, rho=0.353553
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #18000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #5 (0.056): 0.017*"장관" + 0.011*"복귀" + 0.010*"부모" + 0.010*"조치" + 0.010*"음악" + 0.009*"방송" + 0.009*"업무" + 0.008*"마음" + 0.008*"수술" + 0.007*"대책"
INFO:gensim.models.ldamodel:topic #7 (0.056): 0.026*"선수" 

INFO:gensim.models.ldamodel:using symmetric eta at 0.05263157894736842
INFO:gensim.models.ldamodel:using serial LDA version on this node
INFO:gensim.models.ldamodel:running online (single-pass) LDA training, 19 topics, 1 passes over the supplied corpus of 24492 documents, updating model once every 2000 documents, evaluating perplexity every 20000 documents, iterating 50x with a convergence threshold of 0.001000
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #2000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #2 (0.053): 0.009*"투자" + 0.008*"비트코인" + 0.008*"가격" + 0.008*"가상 화폐" + 0.007*"일본" + 0.007*"발견" + 0.007*"국회" + 0.006*"미국" + 0.006*"대통령" + 0.006*"모습"
INFO:gensim.models.ldamodel:topic #16 (0.053): 0.008*"미국" + 0.007*"추위" + 0.006*"정부" + 0.005*"혐의" + 0.005*"사고" + 0.005*"위원회" + 0.004*"모습" + 0.004*"선수" + 0.003*"인천" + 0.003*"그림"
INFO:gensim.models.ldamodel:topic #8 (0.053): 0.010*"서울" + 0.00

INFO:gensim.models.ldamodel:topic diff=0.331801, rho=0.377964
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #16000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #12 (0.053): 0.041*"서울" + 0.037*"한파" + 0.025*"기온" + 0.020*"토트넘" + 0.018*"경기" + 0.016*"영하" + 0.016*"이적" + 0.016*"지역" + 0.015*"손흥민" + 0.015*"추위"
INFO:gensim.models.ldamodel:topic #2 (0.053): 0.038*"가상 화폐" + 0.026*"정부" + 0.024*"거래소" + 0.020*"가격" + 0.017*"시장" + 0.017*"규제" + 0.017*"거래" + 0.017*"비트코인" + 0.016*"아파트" + 0.015*"투자"
INFO:gensim.models.ldamodel:topic #4 (0.053): 0.039*"미국" + 0.025*"트럼프 대통령" + 0.018*"트럼프" + 0.011*"대화" + 0.010*"대통령" + 0.009*"상승" + 0.008*"백악관" + 0.008*"업무" + 0.007*"파운드" + 0.007*"주택"
INFO:gensim.models.ldamodel:topic #16 (0.053): 0.013*"친구" + 0.011*"사고" + 0.010*"엄마" + 0.009*"정책" + 0.009*"어머니" + 0.008*"부모" + 0.008*"문화" + 0.008*"위원회" + 0.008*"언론" + 0.007*"안전"
INFO:gensim.models.ldamodel:topic #13 (0.053): 0.031*

INFO:gensim.topic_coherence.text_analysis:serializing accumulator to return to master...
INFO:gensim.topic_coherence.text_analysis:accumulator serialized
INFO:gensim.topic_coherence.text_analysis:accumulator serialized
INFO:gensim.topic_coherence.text_analysis:serializing accumulator to return to master...
INFO:gensim.topic_coherence.text_analysis:accumulator serialized
INFO:gensim.topic_coherence.text_analysis:3 accumulators retrieved from output queue
INFO:gensim.topic_coherence.text_analysis:accumulated word occurrence stats for 20590 virtual documents
INFO:gensim.models.ldamodel:using symmetric alpha at 0.05
INFO:gensim.models.ldamodel:using symmetric eta at 0.05
INFO:gensim.models.ldamodel:using serial LDA version on this node
INFO:gensim.models.ldamodel:running online (single-pass) LDA training, 20 topics, 1 passes over the supplied corpus of 24492 documents, updating model once every 2000 documents, evaluating perplexity every 20000 documents, iterating 50x with a convergence th

INFO:gensim.models.ldamodel:topic #8 (0.050): 0.038*"미국" + 0.018*"평창" + 0.016*"중국" + 0.015*"조치" + 0.014*"북한" + 0.012*"트럼프" + 0.010*"정부" + 0.010*"업계" + 0.010*"세계" + 0.010*"삼성전자"
INFO:gensim.models.ldamodel:topic #10 (0.050): 0.014*"방송" + 0.012*"논란" + 0.010*"영화" + 0.010*"인터뷰" + 0.008*"작품" + 0.008*"시행" + 0.008*"세상" + 0.008*"검사" + 0.007*"촬영" + 0.007*"음악"
INFO:gensim.models.ldamodel:topic #12 (0.050): 0.029*"한파" + 0.022*"경기" + 0.012*"평창올림픽" + 0.012*"한국" + 0.012*"맨유" + 0.011*"대표팀" + 0.011*"상승" + 0.011*"영국" + 0.009*"중국" + 0.009*"베트남"
INFO:gensim.models.ldamodel:topic diff=0.331380, rho=0.377964
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #16000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #7 (0.050): 0.027*"트럼프 대통령" + 0.026*"미국" + 0.017*"북한" + 0.016*"검찰" + 0.014*"대통령" + 0.012*"조사" + 0.008*"백악관" + 0.007*"트럼프" + 0.007*"진술" + 0.007*"대통령의"
INFO:gensim.models.ldamodel:topic #4 (0.050): 0.031*"한

INFO:gensim.utils:saved /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-20
INFO:gensim.utils:saving CoherenceModel object under /Volumes/disk1/Clustering_model/coherencemodel_mixed_topic-20, separately None
INFO:gensim.utils:saved /Volumes/disk1/Clustering_model/coherencemodel_mixed_topic-20
INFO:gensim.topic_coherence.probability_estimation:using ParallelWordOccurrenceAccumulator(processes=3, batch_size=64) to estimate probabilities from sliding windows
INFO:gensim.topic_coherence.text_analysis:serializing accumulator to return to master...
INFO:gensim.topic_coherence.text_analysis:serializing accumulator to return to master...
INFO:gensim.topic_coherence.text_analysis:serializing accumulator to return to master...
INFO:gensim.topic_coherence.text_analysis:accumulator serialized
INFO:gensim.topic_coherence.text_analysis:accumulator serialized
INFO:gensim.topic_coherence.text_analysis:accumulator serialized
INFO:gensim.topic_coherence.text_analysis:3 accumulators retrieved from ou

INFO:gensim.models.ldamodel:topic diff=0.328854, rho=0.408248
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #14000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #15 (0.048): 0.025*"2018" + 0.017*"재판" + 0.014*"업체" + 0.014*"회장" + 0.013*"선고" + 0.013*"혐의" + 0.012*"업무" + 0.011*"UAE" + 0.009*"파견" + 0.009*"15"
INFO:gensim.models.ldamodel:topic #14 (0.048): 0.015*"직원" + 0.014*"기술" + 0.011*"논란" + 0.011*"소비자" + 0.010*"내용" + 0.009*"복귀" + 0.008*"아버지" + 0.008*"징계" + 0.008*"21" + 0.008*"삼성전자"
INFO:gensim.models.ldamodel:topic #16 (0.048): 0.053*"미국" + 0.023*"정부" + 0.023*"중국" + 0.018*"트럼프" + 0.016*"세계" + 0.015*"트럼프 대통령" + 0.014*"북한" + 0.010*"기업" + 0.010*"인텔" + 0.009*"대통령"
INFO:gensim.models.ldamodel:topic #1 (0.048): 0.026*"아파트" + 0.012*"인스타그램" + 0.012*"경찰" + 0.012*"서울" + 0.010*"공급" + 0.010*"농도" + 0.009*"주민" + 0.009*"사망" + 0.009*"상태" + 0.009*"기간"
INFO:gensim.models.ldamodel:topic #3 (0.048): 0.042*"화

INFO:gensim.utils:saved /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-21.state
INFO:gensim.utils:saving LdaModel object under /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-21, separately ['expElogbeta', 'sstats']
INFO:gensim.utils:storing np array 'expElogbeta' to /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-21.expElogbeta.npy
INFO:gensim.utils:not storing attribute id2word
INFO:gensim.utils:not storing attribute dispatcher
INFO:gensim.utils:not storing attribute state
INFO:gensim.utils:saved /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-21
INFO:gensim.utils:saving CoherenceModel object under /Volumes/disk1/Clustering_model/coherencemodel_mixed_topic-21, separately None
INFO:gensim.utils:saved /Volumes/disk1/Clustering_model/coherencemodel_mixed_topic-21
INFO:gensim.topic_coherence.probability_estimation:using ParallelWordOccurrenceAccumulator(processes=3, batch_size=64) to estimate probabilities from sliding windows
INFO:gensim.topic_coherence.text_analysi

INFO:gensim.models.ldamodel:topic #20 (0.045): 0.017*"배터리" + 0.015*"방송" + 0.011*"이야기" + 0.010*"미국" + 0.010*"인천공항" + 0.009*"관계" + 0.009*"tvN" + 0.009*"아이폰" + 0.009*"웃음" + 0.009*"건강"
INFO:gensim.models.ldamodel:topic #2 (0.045): 0.037*"청와대" + 0.036*"검찰" + 0.036*"대통령" + 0.020*"혐의" + 0.016*"국정원" + 0.016*"정부" + 0.014*"수사" + 0.009*"북한" + 0.009*"한국" + 0.009*"실장"
INFO:gensim.models.ldamodel:topic #17 (0.045): 0.031*"전국" + 0.023*"장관" + 0.020*"정부" + 0.013*"정치" + 0.012*"지방" + 0.011*"위안부" + 0.009*"새해" + 0.008*"아베 총리" + 0.008*"대통령" + 0.008*"서울"
INFO:gensim.models.ldamodel:topic diff=0.334442, rho=0.408248
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #14000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #13 (0.045): 0.056*"경기" + 0.029*"선수" + 0.020*"감독" + 0.016*"맨유" + 0.012*"전반" + 0.012*"토트넘" + 0.012*"기록" + 0.011*"리그" + 0.011*"공격" + 0.011*"축구"
INFO:gensim.models.ldamodel:topic #18 (0.045): 0.048*"가상 

INFO:gensim.models.ldamodel:topic #8 (0.045): 0.028*"영국" + 0.024*"손흥민" + 0.023*"출전" + 0.020*"대표팀" + 0.019*"아내" + 0.016*"사랑" + 0.016*"부부" + 0.016*"모습" + 0.015*"병원" + 0.014*"부상"
INFO:gensim.models.ldamodel:topic #15 (0.045): 0.025*"엄마" + 0.016*"행사" + 0.015*"가족" + 0.015*"연기" + 0.014*"한국" + 0.013*"남북" + 0.013*"금메달" + 0.012*"자메이카" + 0.011*"친구들" + 0.011*"만원"
INFO:gensim.models.ldamodel:topic diff=0.235361, rho=0.277350
INFO:gensim.utils:saving LdaState object under /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-22.state, separately None
INFO:gensim.utils:saved /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-22.state
INFO:gensim.utils:saving LdaModel object under /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-22, separately ['expElogbeta', 'sstats']
INFO:gensim.utils:storing np array 'expElogbeta' to /Volumes/disk1/Clustering_model/ldamodel_mixed_topic-22.expElogbeta.npy
INFO:gensim.utils:not storing attribute id2word
INFO:gensim.utils:not storing attribute dispatcher
INFO:ge

INFO:gensim.models.ldamodel:topic diff=0.348525, rho=0.447214
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #12000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #13 (0.043): 0.018*"지역" + 0.017*"가족" + 0.017*"작품" + 0.016*"드라마" + 0.012*"방송" + 0.011*"모습" + 0.010*"트레이드" + 0.010*"제주" + 0.009*"통화" + 0.009*"아내"
INFO:gensim.models.ldamodel:topic #9 (0.043): 0.030*"한파" + 0.015*"방송" + 0.013*"12" + 0.013*"공개" + 0.013*"실장" + 0.011*"강남" + 0.009*"1987" + 0.009*"영화" + 0.009*"겨울" + 0.008*"국방부"
INFO:gensim.models.ldamodel:topic #15 (0.043): 0.022*"중국" + 0.020*"미국" + 0.012*"이란" + 0.011*"CES" + 0.011*"복귀" + 0.010*"위원장" + 0.009*"부처" + 0.009*"시장" + 0.008*"고객" + 0.008*"업체"
INFO:gensim.models.ldamodel:topic #0 (0.043): 0.030*"토트넘" + 0.018*"경기" + 0.018*"영국" + 0.018*"손흥민" + 0.017*"맨유" + 0.014*"기술" + 0.013*"슈팅" + 0.012*"다스" + 0.012*"이적료" + 0.012*"AI"
INFO:gensim.models.ldamodel:topic #7 (0.043): 0.032*"경찰" + 0.0

INFO:gensim.models.ldamodel:merging changes from 492 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #3 (0.043): 0.019*"질문" + 0.014*"교수" + 0.013*"사회" + 0.012*"식사" + 0.012*"사진" + 0.011*"치료" + 0.010*"판사" + 0.009*"학생" + 0.009*"네이마르" + 0.009*"메시"
INFO:gensim.models.ldamodel:topic #22 (0.043): 0.033*"리그" + 0.020*"타율" + 0.020*"홈런" + 0.014*"역할" + 0.013*"음악" + 0.012*"거래소" + 0.011*"25" + 0.010*"목표" + 0.009*"타점" + 0.009*"한국"
INFO:gensim.models.ldamodel:topic #15 (0.043): 0.028*"복귀" + 0.019*"혼자" + 0.017*"회장" + 0.016*"세트" + 0.014*"중국" + 0.012*"나이" + 0.012*"미국" + 0.011*"내용" + 0.011*"kt" + 0.009*"교육"
INFO:gensim.models.ldamodel:topic #0 (0.043): 0.047*"토트넘" + 0.035*"손흥민" + 0.031*"영국" + 0.028*"경기" + 0.026*"베트남" + 0.023*"전반" + 0.018*"맨유" + 0.018*"다스" + 0.016*"이적" + 0.014*"슈팅"
INFO:gensim.models.ldamodel:topic #4 (0.043): 0.018*"팬들" + 0.016*"공연" + 0.016*"인상" + 0.015*"무한도전" + 0.015*"EPL" + 0.013*"호주" + 0.013*"방송" + 0.009*"최저 임금" + 0.009*"특집" + 0.009*"게임"
INFO:gensim.models.ld

INFO:gensim.models.ldamodel:topic #23 (0.042): 0.016*"혐의" + 0.014*"여성" + 0.013*"제품" + 0.011*"세계" + 0.010*"인터넷" + 0.010*"미국" + 0.008*"기업" + 0.008*"불법" + 0.008*"얼굴" + 0.008*"소비자"
INFO:gensim.models.ldamodel:topic #12 (0.042): 0.020*"인텔" + 0.015*"달러" + 0.014*"조치" + 0.013*"실장" + 0.012*"가격" + 0.011*"상태" + 0.011*"거래" + 0.011*"부동산" + 0.011*"경찰" + 0.009*"조사"
INFO:gensim.models.ldamodel:topic #2 (0.042): 0.046*"경기" + 0.023*"선수" + 0.023*"기록" + 0.020*"리그" + 0.019*"평창" + 0.017*"성능" + 0.014*"감독" + 0.011*"전망" + 0.009*"바르셀로나" + 0.009*"지난"
INFO:gensim.models.ldamodel:topic diff=0.343797, rho=0.447214
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #12000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #15 (0.042): 0.020*"남북" + 0.015*"건물" + 0.014*"경찰" + 0.009*"도로" + 0.009*"오늘" + 0.009*"울산" + 0.008*"의견" + 0.007*"시신" + 0.007*"새마을금고" + 0.007*"서울"
INFO:gensim.models.ldamodel:topic #3 (0.042): 0.015*"맨시티" + 0.0

INFO:gensim.models.ldamodel:topic #19 (0.042): 0.028*"아내" + 0.025*"아들" + 0.025*"베트남" + 0.019*"이야기" + 0.018*"친구" + 0.016*"어머니" + 0.012*"직원" + 0.010*"고백" + 0.009*"관심" + 0.009*"모습"
INFO:gensim.models.ldamodel:topic diff=0.347655, rho=0.288675
INFO:gensim.models.ldamodel:-12.853 per-word bound, 7397.6 perplexity estimate based on a held-out corpus of 492 documents with 3702 words
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #24492/24492
INFO:gensim.models.ldamodel:merging changes from 492 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #6 (0.042): 0.056*"대표" + 0.029*"의원" + 0.027*"대표팀" + 0.025*"복귀" + 0.022*"국민의당" + 0.017*"통합" + 0.017*"정당" + 0.015*"눈물" + 0.013*"사연" + 0.013*"외국인"
INFO:gensim.models.ldamodel:topic #3 (0.042): 0.024*"20" + 0.020*"질문" + 0.019*"피해" + 0.018*"파운드" + 0.017*"행사" + 0.016*"맨유" + 0.014*"EPL" + 0.013*"맨시티" + 0.011*"인천" + 0.011*"이청용"
INFO:gensim.models.ldamodel:topic #20 (0.042): 0.016*"신고" + 0.013*"2017" + 0.013*"MC" + 0.012*"역할" 

INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #2 (0.040): 0.068*"서울" + 0.039*"기온" + 0.034*"전국" + 0.033*"지역" + 0.031*"날씨" + 0.028*"추위" + 0.021*"아침" + 0.016*"영하" + 0.013*"도로" + 0.011*"부산"
INFO:gensim.models.ldamodel:topic #23 (0.040): 0.032*"현장" + 0.014*"외교부" + 0.013*"작업" + 0.012*"사고" + 0.010*"인천공항" + 0.010*"경찰" + 0.010*"웃음" + 0.009*"14" + 0.009*"신고" + 0.008*"프랑스"
INFO:gensim.models.ldamodel:topic #4 (0.040): 0.019*"전망" + 0.018*"서비스" + 0.013*"보안" + 0.012*"시장" + 0.012*"인상" + 0.011*"대책" + 0.010*"영향" + 0.010*"정보" + 0.010*"행사" + 0.009*"가상 통화"
INFO:gensim.models.ldamodel:topic #14 (0.040): 0.018*"피해" + 0.012*"승객" + 0.011*"CES" + 0.011*"서울시" + 0.011*"경찰" + 0.010*"엄마" + 0.010*"교체" + 0.009*"버스" + 0.009*"부모" + 0.008*"미국"
INFO:gensim.models.ldamodel:topic #1 (0.040): 0.030*"남북" + 0.022*"선수" + 0.019*"연구" + 0.019*"성능" + 0.017*"영국" + 0.016*"경기" + 0.012*"통일부" + 0.010*"북측" + 0.010*"상대" + 0.009*"호주"
INFO:gensim.models.ld

INFO:gensim.models.ldamodel:topic #15 (0.040): 0.029*"연기" + 0.022*"23" + 0.017*"작품" + 0.012*"이야기" + 0.012*"회사" + 0.012*"UFC" + 0.012*"배우" + 0.011*"드라마" + 0.010*"과정" + 0.010*"한국"
INFO:gensim.models.ldamodel:topic #1 (0.040): 0.050*"선수" + 0.046*"경기" + 0.037*"정현" + 0.023*"호주오픈" + 0.022*"한국" + 0.021*"테니스" + 0.018*"영국" + 0.016*"상대" + 0.013*"감독" + 0.012*"조코비치"
INFO:gensim.models.ldamodel:topic #19 (0.040): 0.048*"검찰" + 0.046*"대통령" + 0.035*"혐의" + 0.027*"청와대" + 0.024*"수사" + 0.020*"사건" + 0.015*"국정원" + 0.013*"재판" + 0.012*"의혹" + 0.012*"선고"
INFO:gensim.models.ldamodel:topic #11 (0.040): 0.062*"대회" + 0.027*"대표팀" + 0.017*"다스" + 0.012*"한국" + 0.012*"전화" + 0.011*"측면" + 0.010*"UAE" + 0.010*"아빠" + 0.010*"감독" + 0.010*"월드컵"
INFO:gensim.models.ldamodel:topic diff=0.349314, rho=0.288675
INFO:gensim.models.ldamodel:-13.037 per-word bound, 8405.6 perplexity estimate based on a held-out corpus of 492 documents with 3702 words
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #24492/24492
INFO:gensim.mod

INFO:gensim.models.ldamodel:topic #7 (0.038): 0.029*"배터리" + 0.020*"위안부" + 0.017*"애플" + 0.016*"신생아" + 0.015*"아이폰" + 0.014*"감독" + 0.010*"서울" + 0.009*"날씨" + 0.008*"추위" + 0.008*"성능 저하"
INFO:gensim.models.ldamodel:topic #8 (0.038): 0.031*"성능" + 0.023*"애플" + 0.016*"케인" + 0.013*"아이폰" + 0.013*"거래소" + 0.010*"눈물" + 0.010*"지난" + 0.010*"가상 화폐" + 0.009*"정부" + 0.009*"규제"
INFO:gensim.models.ldamodel:topic diff=0.373298, rho=0.500000
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #10000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #9 (0.038): 0.015*"실장" + 0.014*"전북" + 0.013*"어머니" + 0.012*"방송" + 0.009*"바르사" + 0.008*"새벽" + 0.008*"선수들" + 0.007*"다저스" + 0.007*"이야기" + 0.007*"미국"
INFO:gensim.models.ldamodel:topic #1 (0.038): 0.017*"기업" + 0.015*"LG" + 0.013*"서울시" + 0.012*"메이저리그" + 0.011*"25" + 0.010*"건물" + 0.010*"김현수" + 0.010*"인상" + 0.010*"이름" + 0.009*"18"
INFO:gensim.models.ldamodel:topic #7 (0.038): 0.022*"

INFO:gensim.models.ldamodel:topic diff=0.362377, rho=0.301511
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #24000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #4 (0.038): 0.047*"북한" + 0.014*"남북" + 0.014*"대통령" + 0.013*"중국" + 0.013*"미국" + 0.012*"장면" + 0.012*"행사" + 0.011*"평창올림픽" + 0.010*"정부" + 0.010*"사진"
INFO:gensim.models.ldamodel:topic #17 (0.038): 0.062*"화재" + 0.028*"통합" + 0.015*"공식" + 0.014*"사업" + 0.013*"투어" + 0.011*"제천" + 0.009*"뉴욕" + 0.008*"미국" + 0.008*"전쟁" + 0.008*"참사"
INFO:gensim.models.ldamodel:topic #3 (0.038): 0.037*"일본" + 0.025*"서울" + 0.019*"지역" + 0.017*"한국" + 0.014*"SNS" + 0.014*"1987" + 0.012*"손님" + 0.012*"전국" + 0.011*"영화" + 0.011*"UAE"
INFO:gensim.models.ldamodel:topic #15 (0.038): 0.024*"활약" + 0.020*"영화" + 0.018*"민주당" + 0.017*"도움" + 0.012*"후보" + 0.012*"사망" + 0.010*"성폭행" + 0.010*"대학" + 0.009*"유로" + 0.009*"기간"
INFO:gensim.models.ldamodel:topic #5 (0.038): 0.040*"대회" + 0.01

INFO:gensim.models.ldamodel:topic #24 (0.037): 0.022*"통합" + 0.017*"미국" + 0.015*"정부" + 0.010*"대통령" + 0.009*"열차" + 0.009*"서비스" + 0.008*"검토" + 0.008*"제도" + 0.007*"중단" + 0.007*"외교"
INFO:gensim.models.ldamodel:topic #11 (0.037): 0.027*"아파트" + 0.021*"크리스마스" + 0.015*"경찰" + 0.015*"병원" + 0.015*"건물" + 0.012*"화유기" + 0.011*"서울" + 0.011*"사고" + 0.011*"도로" + 0.011*"양현종"
INFO:gensim.models.ldamodel:topic #3 (0.037): 0.047*"경기" + 0.032*"기록" + 0.029*"선수" + 0.023*"비트코인" + 0.021*"가상 화폐" + 0.016*"가격" + 0.015*"감독" + 0.014*"맨시티" + 0.013*"한국" + 0.013*"LG"
INFO:gensim.models.ldamodel:topic #1 (0.037): 0.035*"검찰" + 0.031*"수사" + 0.027*"대통령" + 0.024*"혐의" + 0.021*"북한" + 0.019*"청와대" + 0.016*"의혹" + 0.013*"광주" + 0.010*"서울" + 0.010*"김정은"
INFO:gensim.models.ldamodel:topic diff=0.366924, rho=0.500000
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #10000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #7 (0.037): 0.017*"경찰"

INFO:gensim.models.ldamodel:topic #11 (0.037): 0.028*"경찰" + 0.026*"사고" + 0.023*"병원" + 0.023*"아파트" + 0.019*"작품" + 0.018*"20" + 0.016*"넥센" + 0.015*"세상" + 0.015*"건물" + 0.014*"화유기"
INFO:gensim.models.ldamodel:topic #1 (0.037): 0.065*"검찰" + 0.046*"대통령" + 0.041*"수사" + 0.039*"혐의" + 0.031*"청와대" + 0.022*"사건" + 0.021*"경찰" + 0.018*"국정원" + 0.017*"의혹" + 0.011*"의원"
INFO:gensim.models.ldamodel:topic diff=0.361531, rho=0.301511
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #24000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #6 (0.037): 0.017*"홈런" + 0.017*"성적" + 0.016*"호주" + 0.013*"음식" + 0.012*"황금빛" + 0.012*"타율" + 0.011*"발언" + 0.011*"타자" + 0.011*"건강" + 0.009*"모습"
INFO:gensim.models.ldamodel:topic #4 (0.037): 0.029*"방송" + 0.022*"베트남" + 0.017*"촬영" + 0.017*"무한도전" + 0.015*"출연" + 0.014*"만원" + 0.014*"예능" + 0.014*"언론" + 0.014*"모습" + 0.012*"프로그램"
INFO:gensim.models.ldamodel:topic #5 (0.037): 0.043*"토트넘" + 0.0

INFO:gensim.models.ldamodel:topic diff=0.417373, rho=0.577350
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at document #8000/24492
INFO:gensim.models.ldamodel:merging changes from 2000 documents into a model of 24492 documents
INFO:gensim.models.ldamodel:topic #10 (0.036): 0.011*"승객" + 0.010*"미국" + 0.009*"통신" + 0.009*"관광객" + 0.008*"부부" + 0.008*"KT" + 0.008*"부모" + 0.007*"수입" + 0.007*"대통령" + 0.007*"정규직 전환"
INFO:gensim.models.ldamodel:topic #15 (0.036): 0.017*"한국당" + 0.014*"눈물" + 0.011*"직원" + 0.011*"결혼" + 0.010*"고객" + 0.008*"세대" + 0.008*"무한도전" + 0.007*"진심" + 0.007*"의원" + 0.007*"멤버들"
INFO:gensim.models.ldamodel:topic #2 (0.036): 0.017*"영화" + 0.015*"고준희" + 0.012*"원전" + 0.011*"계약" + 0.011*"사진" + 0.010*"이름" + 0.010*"지원" + 0.010*"KIA" + 0.009*"개봉" + 0.008*"배우"
INFO:gensim.models.ldamodel:topic #19 (0.036): 0.017*"30" + 0.014*"광주" + 0.014*"세계" + 0.013*"18" + 0.013*"서비스" + 0.011*"주변" + 0.011*"개통" + 0.010*"의견" + 0.010*"기술" + 0.010*"레알"
INFO:gensim.models.ldamodel:topic #5 (0.036): 0.027*"투표" + 0

INFO:gensim.models.ldamodel:topic #11 (0.036): 0.021*"인상" + 0.019*"최저 임금" + 0.018*"기온" + 0.015*"날씨" + 0.013*"서울" + 0.013*"아침" + 0.013*"추위" + 0.012*"겨울" + 0.012*"대구" + 0.011*"최고"
INFO:gensim.models.ldamodel:topic #24 (0.036): 0.036*"경기" + 0.028*"맨유" + 0.019*"맨시티" + 0.019*"부상" + 0.016*"kt" + 0.014*"아스널" + 0.013*"남성" + 0.013*"UAE" + 0.011*"감독" + 0.010*"여성"
INFO:gensim.models.ldamodel:topic #15 (0.036): 0.052*"결혼" + 0.023*"눈물" + 0.021*"한국당" + 0.015*"직원" + 0.014*"방송" + 0.013*"멤버들" + 0.010*"출연" + 0.009*"무한도전" + 0.008*"사랑" + 0.008*"고객"
INFO:gensim.models.ldamodel:topic #25 (0.036): 0.053*"북한" + 0.017*"남북" + 0.017*"미국" + 0.015*"니퍼트" + 0.015*"중국" + 0.012*"신고" + 0.012*"법원" + 0.012*"얼굴" + 0.011*"행사" + 0.010*"나이"
INFO:gensim.models.ldamodel:topic #19 (0.036): 0.021*"레알" + 0.019*"30" + 0.018*"11" + 0.016*"손님" + 0.015*"광주" + 0.013*"과거" + 0.013*"평가" + 0.012*"기술" + 0.012*"24" + 0.011*"호날두"
INFO:gensim.models.ldamodel:topic diff=0.359270, rho=0.301511
INFO:gensim.models.ldamodel:PROGRESS: pass 0, at do