# topic classification on wikipedia based on outgoing links

In [1]:
# to support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# magics and warnings
%load_ext autoreload
%autoreload 2
import warnings; warnings.simplefilter('ignore')

# machinery
import os, codecs, string, random
from numpy.random import seed as random_seed
from numpy.random import shuffle as random_shuffle
import numpy as np
seed = 42 # reproducibility!
random.seed(seed)
random_seed(seed)
np.random.seed(seed)
import nltk, gensim, sklearn, spacy # the armory!
import matplotlib.pyplot as plt
import pyLDAvis.gensim

In [2]:
import pandas as pd

First, using the <a href=https://docs.python.org/3/library/xml.etree.elementtree.html>xml element tree</a> library we parse the xml dump of the simple english wikipedia.

In [3]:
import xml.etree.ElementTree as ET
tree = ET.parse('simplewiki-20171201-pages-articles-multistream.xml')

In [4]:
root = tree.getroot()
root.tag

'{http://www.mediawiki.org/xml/export-0.10/}mediawiki'

Each children of the root element are actually different pages in wikipedia. As we can see below each page has also 4 children which are `title`, `ns`, `id` and `revision`. We mainly want to do topic classification on articles which belong to name space 0 (`ns=0`) which is the main name space of normal wikipedia articles.

In [5]:
root[3].getchildren()

[<Element '{http://www.mediawiki.org/xml/export-0.10/}title' at 0x11ea047c8>,
 <Element '{http://www.mediawiki.org/xml/export-0.10/}ns' at 0x11ea04818>,
 <Element '{http://www.mediawiki.org/xml/export-0.10/}id' at 0x11ea04868>,
 <Element '{http://www.mediawiki.org/xml/export-0.10/}revision' at 0x11ea048b8>]

In [6]:
page = root.find('{http://www.mediawiki.org/xml/export-0.10/}page')
revisions = page.find('{http://www.mediawiki.org/xml/export-0.10/}revision')
for node in revisions.getiterator():
    if node.tag == '{http://www.mediawiki.org/xml/export-0.10/}text':
        print(node.text[:10])

{{monththi


We can see that the main text of wikipedia articles is in the `text` node which is a child of `revision`. So we put document id, title, namespace and the text of each article in a list of dictionaries to make a pandas data frame out of it.

Also we map each article title to its namespace in `article2ns`. we want to later use this dictionary to see which outgoing links in an article are actually referring to an article in namespace 0.

In [7]:
documents = list()
article2ns = dict()
for child in root:
    title = child.find('{http://www.mediawiki.org/xml/export-0.10/}title')
    docID = child.find('{http://www.mediawiki.org/xml/export-0.10/}id')
    ns = child.find('{http://www.mediawiki.org/xml/export-0.10/}ns')
    revisions = child.find('{http://www.mediawiki.org/xml/export-0.10/}revision')
    if revisions != None:
        text = revisions.find('{http://www.mediawiki.org/xml/export-0.10/}text')
        if text.text != None:
            documents.append({'id': docID.text , 'title': title.text, 'namespace': ns.text, 'text': text.text})
            article2ns[title.text] = ns.text
    

In [8]:
wikiDF = pd.DataFrame(documents)

In [9]:
wikiDF.head()

Unnamed: 0,id,namespace,text,title
0,1,0,{{monththisyear|4}}\n'''April''' is the 4th [[...,April
1,2,0,{{monththisyear|8}}\n'''August''' (Aug.) is th...,August
2,6,0,[[File:Chemin montant dans les hautes herbes -...,Art
3,8,0,{{more sources|date=February 2012}}\n\n: ''Thi...,A
4,9,0,{{dablink|Air is one of the four [[classical e...,Air


In total we have 239639 articles in this corpus.

In [10]:
wikiDF.shape

(239639, 4)

In [11]:
wikiDF.groupby('namespace').id.count()

namespace
0      182776
10      17888
12        123
14      32289
4        5460
6          36
8         537
828       530
Name: id, dtype: int64

By filtering out the documents which are not in namespace 0 we can see that we have 182776 documents in namespace 0.

In [12]:
documentsDF = wikiDF[wikiDF['namespace']=='0']

In [13]:
documentsDF.shape

(182776, 4)

First of all, we should extract the outgoing links in each article. Links are inside double brackets [[...]]. For extracting them we use the regular expression bellow.

In [14]:
import re
def get_out_links(s):
    pat = r'(?<=\[\[).+?(?=\]\])'
    return re.findall(pat,s)

For the links with two names seperated by `|`, it seems that the first name is the actual wiki page name which is being refrenced and the second name is the text of the hyperlink in the text. for now we keep the first name i.e. the actual wiki page name.
Also for the links which are for the captions of the pictures (the ones starting with File) we skip them for now.

In [15]:
def get_main_linkname(l):
    return list(map(lambda x: x.split('|')[0], l))

In [16]:
def remove_file(l):
    return [s for s in l if not s.startswith('File:')]

Finally, we only keep the links which are pointing to another article in namespace 0. Also we keep the links pointing to an article which is not in our dataset.

In [17]:
def article_only(l):
    out=[]
    for s in l:
        try:
            if article2ns[s]=='0':
                out.append(s)
        except:
            KeyError
            out.append(s)
    return out

In [18]:
documentsDF['out_links'] = documentsDF['text'].map(get_out_links).map(get_main_linkname).map(remove_file).map(article_only)

In [19]:
documentsDF['num_out_links'] = documentsDF['out_links'].map(len)

In [20]:
documentsDF.head(10)

Unnamed: 0,id,namespace,text,title,out_links,num_out_links
0,1,0,{{monththisyear|4}}\n'''April''' is the 4th [[...,April,"[month, year, March, May, day, July, January, ...",731
1,2,0,{{monththisyear|8}}\n'''August''' (Aug.) is th...,August,"[month, year, Gregorian calendar, July, Septem...",382
2,6,0,[[File:Chemin montant dans les hautes herbes -...,Art,"[artist, creativity, drawing, painting, sculpt...",94
3,8,0,{{more sources|date=February 2012}}\n\n: ''Thi...,A,"[letter, alphabet, Article (grammar), A (disam...",27
4,9,0,{{dablink|Air is one of the four [[classical e...,Air,"[classical element, water, earth, fire, Image:...",43
6,12,0,[[Spain]] is divided in 17 parts called '''aut...,Autonomous communities of Spain,"[Spain, United States, 1978, Catalonia, Catala...",54
7,13,0,[[File:Alan Turing Memorial Closer.jpg|right|t...,Alan Turing,"[Order of the British Empire, FRS, London, Wil...",48
8,14,0,{{Infobox musical artist\n| Name = Ala...,Alanis Morissette,"[Ottawa, Ontario, Canada, Guitar, flute, harmo...",77
9,17,0,{{Infobox software\n| name =...,Adobe Illustrator,"[Adobe Systems, Microsoft Windows, macOS, Vect...",34
10,18,0,[[File:Andouille.jpg|right|thumb|160px]]\n[[Fi...,Andouille,"[pork, sausage, meat, fat, intestines, stomach...",13


In total we have 3649587 outgoing links in all of the articles in our corpus.

In [21]:
sum(list(documentsDF['num_out_links']))

3649587

### LDA

We represent our articles as a list of their outgoing links. Therefore we can represent the whole corpus as a list of lists.

In [22]:
docs = list(documentsDF['out_links'])

We first make a dictionary representation of the links in the documents. Each link in an article is represented by the title of the article it is reffering to. Having this dictionary we can represent each document as a bag of links.

As we can see below, we have 818338 unique links in our whole corpus. This means that there are actually a lot of outgoing links in Simple English Wikipedia which are pointing to an article in the big (actual) Wikipedia.

In [71]:
# Create a dictionary representation of the documents
from gensim.corpora import Dictionary
dictionary = Dictionary(docs)

# Bag-of-words(links) representation of the documents.
corpus = [dictionary.doc2bow(doc) for doc in docs]


print('Number of unique tokens(links): %d' % len(dictionary))
print('Number of documents: %d' % len(corpus))

Number of unique tokens(links): 818338
Number of documents: 182776


As training LDA on a big corpus like this may take a conciderable amount of time we use logging library in python in order to see the progress of LDA training algorithm. 

In [27]:
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

Finally we train a <a href=https://radimrehurek.com/gensim/models/ldamodel.html>LDA model from gensim</a> library on our corpus. We set the number of topics to be 10 and do 5 passes over our data. The log of the trainig can be seen at the end of the notebook.

In [78]:
# models
from gensim.models import LdaMulticore,ldamodel
params = {'passes': 5, 'random_state': seed}
base_models = dict()

# model = LdaMulticore(corpus=corpus, num_topics=4, id2word=dictionary, workers=6,
#                 passes=params['passes'], random_state=params['random_state'])

model = ldamodel.LdaModel(corpus=corpus, num_topics=10, id2word=dictionary,
                passes=params['passes'], random_state=params['random_state'])




The top 5 terms in the resulting topics is shown below:

In [67]:
model.show_topics(num_words=5)

[(0,
  '0.005*"United States" + 0.003*"Contemporary R&B" + 0.003*"stroke" + 0.002*"New York (state)" + 0.002*"law"'),
 (1,
  '0.019*"Munch Museum" + 0.001*"Rome" + 0.001*"National Gallery (Norway)" + 0.001*"Downtown MRT Line" + 0.001*"Southeastern (train operating company)"'),
 (2,
  '0.014*"municipality" + 0.014*"Switzerland" + 0.014*"France" + 0.013*"Cantons of Switzerland" + 0.011*"United States"'),
 (3,
  '0.005*"Japan" + 0.005*"Association football" + 0.005*"Brazil" + 0.005*"association football" + 0.004*"Bachelor of Arts"'),
 (4,
  '0.003*"Animal" + 0.003*"UNESCO" + 0.002*"species" + 0.002*"genus" + 0.002*"Bachelor of Science"'),
 (5,
  '0.008*"India" + 0.005*"Tamil language" + 0.004*"Russia" + 0.004*"Australia" + 0.003*"Netherlands"'),
 (6,
  '0.011*"Republican Party (United States)" + 0.011*"Democratic Party (United States)" + 0.010*"Americans" + 0.006*"United States" + 0.006*"United States House of Representatives"'),
 (7,
  '0.008*"Prefectures in France" + 0.008*"Germany" + 0

We use the LDA visualisation from gensim to visualize and interpret the resulting 10 topics.

In [69]:
data =  pyLDAvis.gensim.prepare(model, corpus, dictionary)
pyLDAvis.display(data)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  topic_term_dists = topic_term_dists.ix[topic_order]


As it can be seen in this visualisation, it is not easy to make a clear interpretation for all of the resulting topics. Especially for some the topics which have low term frequencies, it is really hard to interpret them. But there still some topics which can be interpreted clearly. For Example by looking at the term distributions for topic 1, we can see that it contains the party names in the united states and also the names of the former presidents of the United States. Therefore we can conclude that this topic is about (U.S.) politics. 

Here, I list my interpretations for other topics:

1. United States politics
2. It seems that it is about movie industry, actors and artist. But there are also terms about sweden which were represented only in this topic, so we cannot be sure.
3. different countries.
4. countries, states, municipalities, city, etc.
5. mostly about animals and Environment 
6. mixture of hockey and some tv programms
7. not clear, terms have really low frequency in this topic
8. mostly football (the name of different cities and football clubs of Brazil in this topic strengthen this assumption) 
9. mixture of stuff related to museums and churchrs and subway lines. still not really clear.
10. Mostly about wrestling and rugby. (Here the high ranks of Knesset as the Jews prayer place is strange)


### conclosion
 As it can be seen from above interpreting these topics is not easy and it seems that this topic classification is not perfect and can be improved. One important point is that we have some topics that are mixture of unrelated things. This shows that maybe the selection of 10 topics for the whole Wikipedia is not enough. Also for interpreting a topic, the terms which have small differenc between their frequency within the selected topic and theri overall frequency are more important (in the visualisation, when the differens of the height of the red bar and the blue bar for a term is small.). For example in most of the topics we have the name of the countries, but this terms are not really informative because in most of the Wikipedia articles no matter what the title of the article is, there is usually a sentence that say to which country this title is belonging to and therefore there is a link to the Wikipedia page of that country.

There are also some category pages in Wikipedia (namespace 14) which define a category and give the link to the articles which are within that category. For example <a href=https://simple.wikipedia.org/wiki/Category:Living_people>this</a> page gives the links to the pages which are about famous people who are alive now. One idea is that for all links in an article we try to find the category that each link belongs to and then represent that link by its category rather than its actual title. This may make the interpretation easier. However, as it can be seen in the data frame below not all of these categories are really "categories". Some of them are too specific to be called as a category. For example there is a category called "Category:Bern (canton)" and another category called "Bern" which doesn't make sense.

In [77]:
pd.DataFrame(wikiDF[wikiDF['namespace']=='14'].groupby('title').id.count()).sort_values('id')

Unnamed: 0_level_0,id
title,Unnamed: 1_level_1
"Category:""Part of a series on"" templates",1
Category:Musical entertainers navigational templates,1
Category:Musical entertainers navigational boxes,1
Category:Musical entertainers from Toronto,1
"Category:Musical entertainers from Los Angeles, California",1
Category:Musical entertainers from London,1
Category:Musical entertainers by record label,1
Category:Musical entertainers by nationality,1
Category:Musical entertainers,1
Category:Musical duos,1


In [66]:
# models
from gensim.models import LdaMulticore,ldamodel
params = {'passes': 5, 'random_state': seed}
base_models = dict()

# model = LdaMulticore(corpus=corpus, num_topics=4, id2word=dictionary, workers=6,
#                 passes=params['passes'], random_state=params['random_state'])

model = ldamodel.LdaModel(corpus=corpus, num_topics=10, id2word=dictionary,
                passes=params['passes'], random_state=params['random_state'])





2018-01-01 02:06:44,709 : INFO : using symmetric alpha at 0.1
2018-01-01 02:06:44,710 : INFO : using symmetric eta at 1.2219889581077744e-06
2018-01-01 02:06:44,863 : INFO : using serial LDA version on this node
2018-01-01 02:07:22,886 : INFO : running online (multi-pass) LDA training, 10 topics, 5 passes over the supplied corpus of 182776 documents, updating model once every 2000 documents, evaluating perplexity every 20000 documents, iterating 50x with a convergence threshold of 0.001000
2018-01-01 02:07:22,900 : INFO : PROGRESS: pass 0, at document #2000/182776
2018-01-01 02:07:25,572 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:07:26,480 : INFO : topic #5 (0.100): 0.008*"United States" + 0.005*"United Kingdom" + 0.003*"France" + 0.002*"Japan" + 0.002*"London" + 0.002*"Italy" + 0.002*"Germany" + 0.002*"Russia" + 0.002*"Canada" + 0.002*"Australia"
2018-01-01 02:07:26,492 : INFO : topic #3 (0.100): 0.059*"Football League Championship" + 0

2018-01-01 02:07:47,245 : INFO : topic #6 (0.100): 0.007*"footballer" + 0.006*"2014" + 0.005*"United States" + 0.005*"2002" + 0.004*"2015" + 0.004*"1997" + 0.003*"1980" + 0.003*"1983" + 0.003*"2017" + 0.003*"1982"
2018-01-01 02:07:47,259 : INFO : topic #4 (0.100): 0.006*"Greek mythology" + 0.003*"Animal" + 0.003*"species" + 0.003*"English language" + 0.003*"WP:MOSNUM" + 0.002*"Asia" + 0.002*"Africa" + 0.002*"Greek language" + 0.002*"Jesus" + 0.002*"Islam"
2018-01-01 02:07:47,275 : INFO : topic #8 (0.100): 0.008*"footballer" + 0.007*"2014" + 0.007*"2015" + 0.005*"2016" + 0.005*"1999" + 0.005*"2017" + 0.004*"2012" + 0.004*"actress" + 0.004*"2013" + 0.004*"actor"
2018-01-01 02:07:47,289 : INFO : topic #0 (0.100): 0.005*"United States" + 0.003*"England" + 0.003*"music" + 0.002*"computer" + 0.002*"United Kingdom" + 0.002*"piano" + 0.002*"metal" + 0.002*"Zeus" + 0.002*"Americans" + 0.002*"Europe"
2018-01-01 02:07:47,318 : INFO : topic diff=0.313187, rho=0.408248
2018-01-01 02:07:47,340 : INF

2018-01-01 02:08:08,998 : INFO : topic diff=0.252900, rho=0.301511
2018-01-01 02:08:09,022 : INFO : PROGRESS: pass 0, at document #24000/182776
2018-01-01 02:08:10,238 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:08:12,184 : INFO : topic #1 (0.100): 0.002*"blood" + 0.002*"disease" + 0.002*"F.C. Internazionale Milano" + 0.002*"Rome" + 0.002*"Litre" + 0.001*"bacteria" + 0.001*"Osiris" + 0.001*"philosophy" + 0.001*"medicine" + 0.001*"yellow fever"
2018-01-01 02:08:12,199 : INFO : topic #3 (0.100): 0.017*"Serie A" + 0.005*"Eredivisie" + 0.004*"La Liga" + 0.003*"Brazil" + 0.003*"Association football" + 0.002*"Real Madrid C.F." + 0.002*"Juventus F.C." + 0.002*"Germany national football team" + 0.002*"United States Republican Party" + 0.002*"Serie B"
2018-01-01 02:08:12,212 : INFO : topic #0 (0.100): 0.004*"United States" + 0.004*"music" + 0.002*"computer" + 0.002*"piano" + 0.002*"mathematics" + 0.002*"England" + 0.002*"money" + 0.002*"Microsoft 

2018-01-01 02:08:28,659 : INFO : topic #9 (0.100): 0.011*"United States" + 0.010*"2007" + 0.007*"Americans" + 0.005*"singer" + 0.004*"band" + 0.003*"actor" + 0.003*"2006" + 0.003*"guitar" + 0.003*"Japan" + 0.003*"Beyoncé Knowles"
2018-01-01 02:08:28,674 : INFO : topic #5 (0.100): 0.006*"Italy" + 0.006*"France" + 0.005*"Australia" + 0.005*"United Kingdom" + 0.005*"China" + 0.004*"India" + 0.004*"Japan" + 0.004*"United States" + 0.004*"Russia" + 0.004*"Germany"
2018-01-01 02:08:28,702 : INFO : topic diff=0.244410, rho=0.242536
2018-01-01 02:08:28,723 : INFO : PROGRESS: pass 0, at document #36000/182776
2018-01-01 02:08:29,938 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:08:32,146 : INFO : topic #7 (0.100): 0.013*"Germany" + 0.007*"Scuderia Ferrari" + 0.006*"Ford Motor Company" + 0.004*"England" + 0.004*"North Rhine-Westphalia" + 0.004*"Bavaria" + 0.004*"district" + 0.003*"Baden-Württemberg" + 0.003*"Michael Schumacher" + 0.002*"London"
2018-

2018-01-01 02:08:48,930 : INFO : topic #2 (0.100): 0.028*"municipality" + 0.015*"Belgium" + 0.010*"Switzerland" + 0.010*"United States" + 0.009*"province" + 0.008*"Cantons of Switzerland" + 0.006*"district" + 0.005*"France" + 0.005*"Delaware" + 0.005*"Nintendo"
2018-01-01 02:08:48,959 : INFO : topic diff=0.141148, rho=0.213201
2018-01-01 02:08:48,984 : INFO : PROGRESS: pass 0, at document #46000/182776
2018-01-01 02:08:49,861 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:08:51,983 : INFO : topic #0 (0.100): 0.003*"Provinces of Italy" + 0.003*"United States" + 0.002*"Regions of Italy" + 0.002*"Microsoft Windows" + 0.002*"music" + 0.002*"Linux" + 0.002*"Comune" + 0.002*"computer" + 0.002*"television" + 0.002*"Debian"
2018-01-01 02:08:52,001 : INFO : topic #1 (0.100): 0.002*"East Flanders" + 0.001*"Litre" + 0.001*"Gothic architecture" + 0.001*"Christianity" + 0.001*"Hockey Hall of Fame" + 0.001*"philosophy" + 0.001*"church" + 0.001*"DNA" + 0.0

2018-01-01 02:09:04,538 : INFO : topic diff=0.131276, rho=0.192450
2018-01-01 02:09:04,561 : INFO : PROGRESS: pass 0, at document #56000/182776
2018-01-01 02:09:05,115 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:09:06,991 : INFO : topic #6 (0.100): 0.008*"North American Central Time Zone" + 0.004*"Île-de-France" + 0.004*"2008" + 0.004*"2004" + 0.003*"2001" + 0.003*"2005" + 0.003*"2002" + 0.003*"1998" + 0.003*"2006" + 0.003*"United States"
2018-01-01 02:09:07,006 : INFO : topic #5 (0.100): 0.016*"Pakistan" + 0.012*"Romania" + 0.009*"Commune in Romania" + 0.006*"Scotland" + 0.006*"Australia" + 0.006*"India" + 0.005*"United Kingdom" + 0.005*"Dolj County" + 0.005*"Suceava County" + 0.005*"Sindh"
2018-01-01 02:09:07,020 : INFO : topic #3 (0.100): 0.024*"Football League One" + 0.018*"Football League Two" + 0.018*"Football League Championship" + 0.016*"Japan Soccer League" + 0.014*"J. League Division 1" + 0.009*"Ligue 1" + 0.007*"Association foo

2018-01-01 02:09:18,988 : INFO : topic #6 (0.100): 0.018*"North American Central Time Zone" + 0.013*"Japan national football team" + 0.006*"Campeonato Brasileiro Série A" + 0.005*"British Railways" + 0.003*"2008" + 0.003*"2001" + 0.003*"Spain national football team" + 0.003*"United States" + 0.003*"Île-de-France" + 0.003*"2005"
2018-01-01 02:09:19,019 : INFO : topic diff=0.178035, rho=0.176777
2018-01-01 02:09:19,039 : INFO : PROGRESS: pass 0, at document #66000/182776
2018-01-01 02:09:20,009 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:09:21,588 : INFO : topic #1 (0.100): 0.021*"Aquitaine" + 0.007*"British Rail" + 0.005*"F.C. Internazionale Milano" + 0.003*"Mayenne" + 0.003*"A.F.C. Ajax" + 0.002*"S.S. Lazio" + 0.002*"TOPS" + 0.002*"P.S.V. Eindhoven" + 0.002*"Nobel Prize in Physiology or Medicine" + 0.002*"Feyenoord Rotterdam"
2018-01-01 02:09:21,607 : INFO : topic #9 (0.100): 0.016*"United States" + 0.007*"United States Census, 2000" + 0.

2018-01-01 02:09:31,538 : INFO : topic #0 (0.100): 0.004*"F.C. Bayern Munich" + 0.002*"Internet" + 0.002*"Microsoft Windows" + 0.002*"United States" + 0.002*"music" + 0.002*"computer" + 0.001*"United Kingdom" + 0.001*"Microsoft" + 0.001*"Image:Green check.png" + 0.001*"community"
2018-01-01 02:09:31,546 : INFO : topic #3 (0.100): 0.030*"Association football" + 0.026*"J. League Division 1" + 0.023*"Japan" + 0.022*"Serie A" + 0.022*"La Liga" + 0.019*"Segunda División" + 0.015*"J. League Division 2" + 0.013*"Brazil" + 0.012*"Football League Two" + 0.011*"Midfielder"
2018-01-01 02:09:31,574 : INFO : topic diff=0.108426, rho=0.164399
2018-01-01 02:09:31,594 : INFO : PROGRESS: pass 0, at document #76000/182776
2018-01-01 02:09:32,288 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:09:33,834 : INFO : topic #9 (0.100): 0.015*"United States" + 0.006*"Americans" + 0.005*"California" + 0.004*"New York City" + 0.004*"United States Census, 2000" + 0.003*"

2018-01-01 02:09:46,684 : INFO : topic #1 (0.100): 0.006*"F.C. Internazionale Milano" + 0.004*"A.F.C. Ajax" + 0.004*"Emperor of Japan" + 0.004*"Aquitaine" + 0.003*"A.C.F. Fiorentina" + 0.003*"S.S. Lazio" + 0.003*"Parma F.C." + 0.003*"Imperial Household Agency" + 0.003*"List of Emperors of Japan" + 0.003*"Italy"
2018-01-01 02:09:46,693 : INFO : topic #5 (0.100): 0.005*"Australia" + 0.005*"Chile" + 0.005*"Avispa Fukuoka" + 0.005*"South Korea" + 0.004*"Russia" + 0.004*"India" + 0.004*"Pakistan" + 0.004*"China" + 0.004*"Romania" + 0.004*"United Kingdom"
2018-01-01 02:09:46,727 : INFO : topic diff=0.128342, rho=0.154303
2018-01-01 02:09:46,747 : INFO : PROGRESS: pass 0, at document #86000/182776
2018-01-01 02:09:47,480 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:09:49,096 : INFO : topic #7 (0.100): 0.019*"Striker" + 0.009*"Germany" + 0.008*"Yokohama F. Marinos" + 0.006*"England" + 0.006*"Chicago Blackhawks" + 0.006*"National Hockey League" + 0

2018-01-01 02:09:59,868 : INFO : topic #3 (0.100): 0.041*"Japan" + 0.037*"Association football" + 0.026*"J. League Division 1" + 0.020*"J. League Division 2" + 0.013*"Midfielder" + 0.012*"La Liga" + 0.011*"Brazil" + 0.010*"Defender (football)" + 0.010*"Japan Football League" + 0.010*"Serie A"
2018-01-01 02:09:59,908 : INFO : topic diff=0.132566, rho=0.145865
2018-01-01 02:09:59,929 : INFO : PROGRESS: pass 0, at document #96000/182776
2018-01-01 02:10:00,832 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:10:02,696 : INFO : topic #2 (0.100): 0.027*"United States" + 0.023*"France" + 0.009*"Communes of France" + 0.008*"Departments of France" + 0.008*"Regions of France" + 0.007*"city" + 0.007*"World Wrestling Entertainment" + 0.004*"university" + 0.004*"Florida" + 0.004*"Goalkeeper"
2018-01-01 02:10:02,705 : INFO : topic #7 (0.100): 0.009*"National Hockey League" + 0.007*"Italian Grand Prix" + 0.007*"British Grand Prix" + 0.007*"Litre" + 0.007*"M

2018-01-01 02:10:18,526 : INFO : topic #2 (0.100): 0.018*"France" + 0.017*"United States" + 0.009*"Communes of France" + 0.007*"Departments of France" + 0.005*"Regions of France" + 0.004*"city" + 0.003*"List of popes" + 0.003*"World Wrestling Entertainment" + 0.003*"Postal code" + 0.003*"Florida"
2018-01-01 02:10:18,535 : INFO : topic #1 (0.100): 0.003*"Nobel Prize in Physiology or Medicine" + 0.002*"INSEE code" + 0.002*"Communauté d'agglomération" + 0.002*"Rome" + 0.002*"Emperor of Japan" + 0.002*"Italy" + 0.002*"protein" + 0.002*"DNA" + 0.002*"Imperial Household Agency" + 0.001*"Pope"
2018-01-01 02:10:18,545 : INFO : topic #5 (0.100): 0.006*"United Kingdom" + 0.005*"Australia" + 0.004*"Italy" + 0.004*"India" + 0.004*"Germany" + 0.004*"2008 Summer Olympics" + 0.004*"List of IOC country codes" + 0.004*"Russia" + 0.004*"2000 Summer Olympics" + 0.004*"1996 Summer Olympics"
2018-01-01 02:10:18,575 : INFO : topic diff=0.118311, rho=0.138675
2018-01-01 02:10:18,593 : INFO : PROGRESS: pass 0

2018-01-01 02:10:36,273 : INFO : topic #4 (0.100): 0.008*"UNESCO" + 0.005*"Animal" + 0.004*"species" + 0.003*"Capital (political)" + 0.002*"Chordate" + 0.002*"Honshū" + 0.002*"television series" + 0.002*"bird" + 0.002*"genus" + 0.002*"World Heritage Site"
2018-01-01 02:10:36,280 : INFO : topic #9 (0.100): 0.019*"United States" + 0.009*"Americans" + 0.006*"movie" + 0.005*"California" + 0.004*"actor" + 0.004*"New York City" + 0.004*"actress" + 0.004*"singer" + 0.003*"television" + 0.003*"Los Angeles"
2018-01-01 02:10:36,289 : INFO : topic #8 (0.100): 0.003*"Wrestling" + 0.002*"Emperor Go-Murakami" + 0.002*"Boxing" + 0.002*"Weightlifting" + 0.002*"Shinto shrine" + 0.002*"Judo" + 0.002*"Emperor Tenji" + 0.002*"WWE" + 0.002*"Emperor Fushimi" + 0.001*"Drama"
2018-01-01 02:10:36,299 : INFO : topic #3 (0.100): 0.032*"Japan" + 0.018*"Association football" + 0.012*"J. League Division 1" + 0.008*"Brazil" + 0.008*"J. League Division 2" + 0.007*"Midfielder" + 0.005*"Westchester County, New York" + 

2018-01-01 02:10:54,842 : INFO : PROGRESS: pass 0, at document #124000/182776
2018-01-01 02:10:55,622 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:10:58,565 : INFO : topic #8 (0.100): 0.003*"Boyacá Department" + 0.002*"Sergeant" + 0.002*"Private First Class" + 0.002*"Columbia Pictures" + 0.002*"Order of the British Empire" + 0.002*"WWE" + 0.002*"Wrestling" + 0.001*"Shinto shrine" + 0.001*"Paramount Pictures" + 0.001*"Japan women's national football team"
2018-01-01 02:10:58,574 : INFO : topic #7 (0.100): 0.006*"England" + 0.006*"civil parish" + 0.005*"Canada" + 0.005*"National Hockey League" + 0.004*"Cumbria" + 0.004*"Ontario" + 0.003*"Suffolk" + 0.003*"ice hockey" + 0.003*"Germany" + 0.003*"Chicago Blackhawks"
2018-01-01 02:10:58,587 : INFO : topic #6 (0.100): 0.036*"United States Army" + 0.027*"Union Army" + 0.017*"United States Navy" + 0.009*"United States Marine Corps" + 0.007*"Union Navy" + 0.005*"Corporal#United States" + 0.004*"Yonn

2018-01-01 02:11:14,010 : INFO : topic diff=0.097533, rho=0.123091
2018-01-01 02:11:14,030 : INFO : PROGRESS: pass 0, at document #134000/182776
2018-01-01 02:11:14,878 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:11:17,784 : INFO : topic #7 (0.100): 0.009*"England" + 0.006*"Canada" + 0.005*"civil parish" + 0.004*"National Hockey League" + 0.004*"Ontario" + 0.004*"London" + 0.004*"ice hockey" + 0.004*"Chicago Blackhawks" + 0.003*"Germany" + 0.003*"Cumbria"
2018-01-01 02:11:17,796 : INFO : topic #4 (0.100): 0.006*"Animal" + 0.004*"species" + 0.004*"UNESCO" + 0.003*"genus" + 0.003*"Chordate" + 0.002*"television series" + 0.002*"bird" + 0.002*"Africa" + 0.002*"Chordata" + 0.002*"Australia"
2018-01-01 02:11:17,811 : INFO : topic #6 (0.100): 0.025*"United States Army" + 0.018*"Union Army" + 0.012*"United States Navy" + 0.007*"Yonne" + 0.006*"United States Marine Corps" + 0.005*"Democratic Party (United States)" + 0.005*"Australian Labor Party" 

2018-01-01 02:11:36,128 : INFO : topic #5 (0.100): 0.006*"Sweden" + 0.006*"Spain" + 0.005*"India" + 0.004*"Australia" + 0.004*"Italy" + 0.004*"Germany" + 0.004*"United Kingdom" + 0.004*"World War II" + 0.003*"Finland" + 0.003*"China"
2018-01-01 02:11:36,161 : INFO : topic diff=0.087749, rho=0.118678
2018-01-01 02:11:36,183 : INFO : PROGRESS: pass 0, at document #144000/182776
2018-01-01 02:11:37,188 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:11:39,722 : INFO : topic #6 (0.100): 0.016*"United States Army" + 0.011*"Union Army" + 0.009*"Australian Labor Party" + 0.008*"United States Navy" + 0.006*"Liberal Party of Australia" + 0.005*"Democratic Party (United States)" + 0.005*"Americans" + 0.004*"United States Marine Corps" + 0.004*"Yonne" + 0.004*"President of the United States"
2018-01-01 02:11:39,735 : INFO : topic #1 (0.100): 0.004*"aircraft" + 0.003*"Rome" + 0.003*"Julian calendar" + 0.002*"engine" + 0.002*"Japanese era name" + 0.002*"2

2018-01-01 02:11:53,406 : INFO : topic #7 (0.100): 0.010*"England" + 0.008*"Canada" + 0.007*"National Hockey League" + 0.006*"Chicago Blackhawks" + 0.006*"London" + 0.005*"Ontario" + 0.005*"ice hockey" + 0.005*"Germany" + 0.004*"English people" + 0.003*"Quebec"
2018-01-01 02:11:53,422 : INFO : topic #8 (0.100): 0.008*"WWE" + 0.004*"professional wrestling" + 0.004*"English Premiership (rugby union)" + 0.003*"Order of the British Empire" + 0.002*"pay-per-view" + 0.002*"WWE Championship" + 0.002*"Tag team" + 0.002*"Professional wrestling match types#Variations of singles matches" + 0.002*"Pro Wrestling Illustrated" + 0.001*"Triple H"
2018-01-01 02:11:53,469 : INFO : topic diff=0.103257, rho=0.114708
2018-01-01 02:11:53,505 : INFO : PROGRESS: pass 0, at document #154000/182776
2018-01-01 02:11:54,846 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:11:57,086 : INFO : topic #0 (0.100): 0.003*"United States" + 0.002*"stroke" + 0.002*"natural causes"

2018-01-01 02:12:13,044 : INFO : topic #6 (0.100): 0.010*"Americans" + 0.008*"Republican Party (United States)" + 0.006*"United States Army" + 0.006*"Democratic Party (United States)" + 0.005*"Australian Labor Party" + 0.004*"President of the United States" + 0.004*"Ronald Reagan" + 0.004*"United States Senate" + 0.004*"Liberal Party of Australia" + 0.004*"2015"
2018-01-01 02:12:13,056 : INFO : topic #9 (0.100): 0.031*"Americans" + 0.023*"Sweden" + 0.015*"United States" + 0.008*"Counties of Sweden" + 0.008*"Municipalities of Sweden" + 0.008*"Provinces of Sweden" + 0.007*"Statistics Sweden" + 0.007*"California" + 0.006*"movie" + 0.006*"New York City"
2018-01-01 02:12:13,085 : INFO : topic diff=0.251133, rho=0.111111
2018-01-01 02:12:13,106 : INFO : PROGRESS: pass 0, at document #164000/182776
2018-01-01 02:12:14,001 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:12:16,215 : INFO : topic #6 (0.100): 0.011*"Americans" + 0.009*"Republican Party 

2018-01-01 02:12:28,780 : INFO : topic #7 (0.100): 0.009*"England" + 0.007*"Canada" + 0.005*"London" + 0.005*"Prefectures in France" + 0.005*"Germany" + 0.005*"National Hockey League" + 0.004*"English people" + 0.004*"Ontario" + 0.004*"British people" + 0.004*"Germans"
2018-01-01 02:12:28,790 : INFO : topic #8 (0.100): 0.004*"WWE" + 0.002*"Order of the British Empire" + 0.002*"professional wrestling" + 0.002*"WWE Championship" + 0.001*"pay-per-view" + 0.001*"Belgians" + 0.001*"English Premiership (rugby union)" + 0.001*"Tag team" + 0.001*"Kane (wrestler)" + 0.001*"WWE Intercontinental Championship"
2018-01-01 02:12:28,804 : INFO : topic #6 (0.100): 0.012*"Americans" + 0.009*"Republican Party (United States)" + 0.007*"Democratic Party (United States)" + 0.005*"President of the United States" + 0.005*"United States Senate" + 0.005*"United States House of Representatives" + 0.005*"United States Army" + 0.003*"Ronald Reagan" + 0.003*"United States" + 0.003*"Union Army"
2018-01-01 02:12:28,

2018-01-01 02:12:48,114 : INFO : topic #8 (0.100): 0.003*"WWE" + 0.002*"Order of the British Empire" + 0.001*"professional wrestling" + 0.001*"WWE Championship" + 0.001*"Knesset" + 0.001*"Belgians" + 0.001*"pay-per-view" + 0.001*"capital" + 0.001*"Nicki Minaj" + 0.001*"Randy Orton"
2018-01-01 02:12:48,124 : INFO : topic #4 (0.100): 0.003*"Animal" + 0.003*"Munch Museum" + 0.003*"UNESCO" + 0.002*"genus" + 0.002*"species" + 0.002*"water" + 0.002*"Europe" + 0.001*"Bachelor of Science" + 0.001*"television series" + 0.001*"Africa"
2018-01-01 02:12:48,134 : INFO : topic #7 (0.100): 0.008*"Prefectures in France" + 0.008*"Germany" + 0.007*"England" + 0.007*"Cornwall" + 0.006*"BBC Two" + 0.005*"Canada" + 0.005*"London" + 0.004*"Grand Prix (TV programme)" + 0.003*"BBC One" + 0.003*"English people"
2018-01-01 02:12:48,163 : INFO : topic diff=0.166727, rho=0.104828
2018-01-01 02:12:50,151 : INFO : -16.655 per-word bound, 103215.4 perplexity estimate based on a held-out corpus of 776 documents with 

2018-01-01 02:13:08,545 : INFO : topic #2 (0.100): 0.015*"France" + 0.015*"United States" + 0.010*"Switzerland" + 0.009*"municipality" + 0.008*"city" + 0.008*"Cantons of Switzerland" + 0.006*"district" + 0.005*"Departments of France" + 0.005*"Communes of France" + 0.004*"U.S. state"
2018-01-01 02:13:08,555 : INFO : topic #4 (0.100): 0.004*"Earth" + 0.003*"Africa" + 0.003*"WP:MOSNUM" + 0.003*"Europe" + 0.003*"water" + 0.002*"species" + 0.002*"Animal" + 0.002*"animal" + 0.002*"Asia" + 0.002*"North America"
2018-01-01 02:13:08,567 : INFO : topic #6 (0.100): 0.011*"footballer" + 0.010*"2014" + 0.009*"2015" + 0.006*"2016" + 0.006*"2017" + 0.005*"2013" + 0.004*"2012" + 0.004*"1945" + 0.004*"United States" + 0.004*"1944"
2018-01-01 02:13:08,600 : INFO : topic diff=0.130689, rho=0.103480
2018-01-01 02:13:08,622 : INFO : PROGRESS: pass 1, at document #10000/182776
2018-01-01 02:13:09,409 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:13:12,140 : INFO

2018-01-01 02:13:26,462 : INFO : topic diff=0.120185, rho=0.103480
2018-01-01 02:13:30,404 : INFO : -14.643 per-word bound, 25591.9 perplexity estimate based on a held-out corpus of 2000 documents with 43697 words
2018-01-01 02:13:30,404 : INFO : PROGRESS: pass 1, at document #20000/182776
2018-01-01 02:13:31,309 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:13:34,131 : INFO : topic #6 (0.100): 0.010*"footballer" + 0.009*"2014" + 0.008*"2015" + 0.005*"2016" + 0.005*"2017" + 0.005*"2013" + 0.004*"United States" + 0.004*"2012" + 0.004*"2005" + 0.004*"1945"
2018-01-01 02:13:34,144 : INFO : topic #9 (0.100): 0.013*"United States" + 0.011*"actor" + 0.009*"singer" + 0.009*"Americans" + 0.009*"actress" + 0.006*"singer-songwriter" + 0.005*"California" + 0.005*"producer" + 0.005*"New York City" + 0.005*"movie"
2018-01-01 02:13:34,156 : INFO : topic #7 (0.100): 0.014*"England" + 0.011*"Germany" + 0.009*"ice hockey" + 0.006*"London" + 0.005*"Canada" +

2018-01-01 02:13:53,138 : INFO : topic #2 (0.100): 0.017*"United States" + 0.011*"France" + 0.008*"Switzerland" + 0.008*"city" + 0.006*"municipality" + 0.004*"Cantons of Switzerland" + 0.004*"population" + 0.004*"district" + 0.004*"Florida" + 0.003*"U.S. state"
2018-01-01 02:13:53,150 : INFO : topic #7 (0.100): 0.012*"England" + 0.011*"Germany" + 0.006*"London" + 0.005*"ice hockey" + 0.005*"Canada" + 0.003*"United Kingdom" + 0.002*"Bavaria" + 0.002*"Ontario" + 0.002*"Berlin" + 0.002*"Borough"
2018-01-01 02:13:53,162 : INFO : topic #5 (0.100): 0.007*"France" + 0.006*"Russia" + 0.006*"Germany" + 0.006*"India" + 0.006*"Italy" + 0.006*"Spain" + 0.006*"United Kingdom" + 0.005*"Australia" + 0.005*"Japan" + 0.005*"China"
2018-01-01 02:13:53,174 : INFO : topic #1 (0.100): 0.003*"philosopher" + 0.002*"Munch Museum" + 0.002*"Roman Empire" + 0.002*"Christianity" + 0.002*"Rome" + 0.002*"Jesus" + 0.002*"Roman Catholic Church" + 0.002*"aircraft" + 0.002*"Julian calendar" + 0.002*"blood"
2018-01-01 0

2018-01-01 02:14:16,620 : INFO : topic #4 (0.100): 0.005*"Animal" + 0.003*"species" + 0.003*"Earth" + 0.003*"Africa" + 0.003*"water" + 0.002*"Europe" + 0.002*"Chordate" + 0.002*"genus" + 0.002*"animal" + 0.002*"North America"
2018-01-01 02:14:16,630 : INFO : topic #2 (0.100): 0.014*"Switzerland" + 0.013*"municipality" + 0.012*"United States" + 0.011*"Cantons of Switzerland" + 0.010*"district" + 0.008*"France" + 0.006*"city" + 0.005*"Delaware" + 0.004*"population" + 0.003*"Sussex County, Delaware"
2018-01-01 02:14:16,659 : INFO : topic diff=0.127052, rho=0.103480
2018-01-01 02:14:16,680 : INFO : PROGRESS: pass 1, at document #42000/182776
2018-01-01 02:14:17,433 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:14:20,067 : INFO : topic #8 (0.100): 0.005*"WWE" + 0.002*"WWE Championship" + 0.001*"Order of the British Empire" + 0.001*"Triple H" + 0.001*"John Cena" + 0.001*"World Heavyweight Championship (WWE)" + 0.001*"Vince McMahon" + 0.001*"The U

2018-01-01 02:14:34,553 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:14:36,989 : INFO : topic #7 (0.100): 0.008*"England" + 0.008*"Germany" + 0.006*"Scuderia Ferrari" + 0.006*"Montreal Canadiens" + 0.005*"Detroit Red Wings" + 0.005*"Prefectures in France" + 0.004*"London" + 0.004*"Boston Bruins" + 0.004*"New York Rangers" + 0.003*"Toronto Maple Leafs"
2018-01-01 02:14:37,003 : INFO : topic #8 (0.100): 0.004*"WWE" + 0.002*"WWE Championship" + 0.002*"professional wrestling" + 0.002*"Order of the British Empire" + 0.002*"Liège (province)" + 0.001*"Triple H" + 0.001*"Isle of Wight" + 0.001*"John Cena" + 0.001*"Gordie Howe" + 0.001*"World Heavyweight Championship (WWE)"
2018-01-01 02:14:37,018 : INFO : topic #3 (0.100): 0.018*"Football League Championship" + 0.016*"Football League One" + 0.012*"Football League Two" + 0.008*"Ligue 1" + 0.007*"Serie A" + 0.007*"English Premier League" + 0.007*"La Liga" + 0.006*"Association football" + 0.004*"Japa

2018-01-01 02:14:51,429 : INFO : PROGRESS: pass 1, at document #62000/182776
2018-01-01 02:14:52,127 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:14:54,268 : INFO : topic #8 (0.100): 0.003*"WWE" + 0.002*"WWE Championship" + 0.002*"professional wrestling" + 0.001*"Triple H" + 0.001*"Order of the British Empire" + 0.001*"Liège (province)" + 0.001*"John Cena" + 0.001*"Rangers F.C." + 0.001*"Isle of Wight" + 0.001*"The Undertaker"
2018-01-01 02:14:54,280 : INFO : topic #3 (0.100): 0.024*"Nord-Pas-de-Calais" + 0.017*"Football League One" + 0.015*"Football League Championship" + 0.014*"Serie A" + 0.012*"Football League Two" + 0.010*"J. League Division 1" + 0.010*"Association football" + 0.009*"Japan Soccer League" + 0.008*"English Premier League" + 0.007*"Japan"
2018-01-01 02:14:54,290 : INFO : topic #0 (0.100): 0.004*"United States" + 0.002*"computer" + 0.002*"English language" + 0.002*"music" + 0.002*"United Kingdom" + 0.002*"Microsoft Windows

2018-01-01 02:15:09,121 : INFO : topic #5 (0.100): 0.008*"Pakistan" + 0.005*"Italy" + 0.005*"Australia" + 0.005*"Romania" + 0.005*"United Kingdom" + 0.005*"Germany" + 0.005*"India" + 0.004*"Russia" + 0.004*"Spain" + 0.004*"Netherlands"
2018-01-01 02:15:09,130 : INFO : topic #7 (0.100): 0.012*"Germany" + 0.010*"Picardie" + 0.009*"England" + 0.008*"Striker" + 0.006*"London" + 0.004*"North American Soccer League" + 0.004*"Yokohama F. Marinos" + 0.003*"Canada" + 0.003*"Valencia C.F." + 0.003*"Scuderia Ferrari"
2018-01-01 02:15:09,138 : INFO : topic #0 (0.100): 0.003*"United States" + 0.003*"F.C. Bayern Munich" + 0.002*"computer" + 0.002*"English language" + 0.002*"music" + 0.002*"Internet" + 0.002*"United Kingdom" + 0.002*"government" + 0.002*"Microsoft Windows" + 0.001*"law"
2018-01-01 02:15:09,148 : INFO : topic #8 (0.100): 0.003*"WWE" + 0.002*"WWE Championship" + 0.002*"Total Nonstop Action Wrestling" + 0.002*"Paris Saint-Germain F.C." + 0.002*"Canadian Online Explorer" + 0.002*"profess

2018-01-01 02:15:25,585 : INFO : topic #4 (0.100): 0.007*"Animal" + 0.005*"species" + 0.003*"North America" + 0.003*"plant" + 0.002*"Chordate" + 0.002*"genus" + 0.002*"Europe" + 0.002*"Mammal" + 0.002*"bird" + 0.002*"Africa"
2018-01-01 02:15:25,597 : INFO : topic #8 (0.100): 0.003*"WWE" + 0.002*"Canadian Online Explorer" + 0.002*"Paris Saint-Germain F.C." + 0.002*"Order of the British Empire" + 0.002*"professional wrestling" + 0.002*"WWE Championship" + 0.001*"Total Nonstop Action Wrestling" + 0.001*"John Cena" + 0.001*"Triple H" + 0.001*"Rangers F.C."
2018-01-01 02:15:25,609 : INFO : topic #6 (0.100): 0.019*"Campeonato Brasileiro Série A" + 0.008*"Japan national football team" + 0.006*"2009" + 0.004*"North American Central Time Zone" + 0.004*"2007" + 0.004*"Spain national football team" + 0.003*"2006" + 0.003*"2005" + 0.003*"United States" + 0.003*"1999"
2018-01-01 02:15:25,640 : INFO : topic diff=0.095792, rho=0.103480
2018-01-01 02:15:25,660 : INFO : PROGRESS: pass 1, at document #8

2018-01-01 02:15:38,859 : INFO : topic #2 (0.100): 0.034*"France" + 0.029*"United States" + 0.021*"Communes of France" + 0.019*"Departments of France" + 0.018*"Regions of France" + 0.014*"city" + 0.006*"World Wrestling Entertainment" + 0.005*"Goalkeeper" + 0.005*"Switzerland" + 0.005*"Iowa"
2018-01-01 02:15:38,871 : INFO : topic #6 (0.100): 0.013*"Campeonato Brasileiro Série A" + 0.010*"Japan national football team" + 0.004*"2009" + 0.003*"Sportsperson" + 0.003*"Spain national football team" + 0.002*"North American Central Time Zone" + 0.002*"1986" + 0.002*"2010" + 0.002*"1988" + 0.002*"United States"
2018-01-01 02:15:38,903 : INFO : topic diff=0.096527, rho=0.103480
2018-01-01 02:15:38,925 : INFO : PROGRESS: pass 1, at document #94000/182776
2018-01-01 02:15:39,740 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:15:41,599 : INFO : topic #7 (0.100): 0.008*"National Hockey League" + 0.007*"Striker" + 0.007*"Italian Grand Prix" + 0.007*"British

2018-01-01 02:15:57,359 : INFO : topic #4 (0.100): 0.008*"Animal" + 0.005*"species" + 0.003*"Chordate" + 0.003*"genus" + 0.003*"North America" + 0.002*"bird" + 0.002*"Earth" + 0.002*"Indo-European languages" + 0.002*"Europe" + 0.002*"Mammal"
2018-01-01 02:15:57,368 : INFO : topic #7 (0.100): 0.009*"National Hockey League" + 0.007*"Canada" + 0.006*"England" + 0.006*"Germany" + 0.006*"ice hockey" + 0.006*"Italian Grand Prix" + 0.005*"British Grand Prix" + 0.005*"Striker" + 0.005*"Litre" + 0.005*"Monaco Grand Prix"
2018-01-01 02:15:57,380 : INFO : topic #6 (0.100): 0.007*"Campeonato Brasileiro Série A" + 0.006*"Track and field athletics" + 0.006*"Japan national football team" + 0.004*"2006" + 0.003*"Boxing" + 0.003*"Baseball-Reference" + 0.003*"2011" + 0.003*"2009" + 0.003*"2010" + 0.002*"2005"
2018-01-01 02:15:57,414 : INFO : topic diff=0.109270, rho=0.103480
2018-01-01 02:15:57,434 : INFO : PROGRESS: pass 1, at document #104000/182776
2018-01-01 02:15:58,124 : INFO : merging changes fro

2018-01-01 02:16:11,871 : INFO : topic #4 (0.100): 0.006*"Animal" + 0.004*"species" + 0.002*"Honshū" + 0.002*"Capital (political)" + 0.002*"Chordate" + 0.002*"genus" + 0.002*"television series" + 0.002*"North America" + 0.002*"bird" + 0.002*"Europe"
2018-01-01 02:16:11,882 : INFO : topic #6 (0.100): 0.006*"Japan national football team" + 0.005*"Track and field athletics" + 0.004*"Campeonato Brasileiro Série A" + 0.003*"Boxing" + 0.003*"2000" + 0.003*"President of the United States" + 0.003*"2006" + 0.003*"2011" + 0.003*"American football" + 0.003*"Sportsperson"
2018-01-01 02:16:11,891 : INFO : topic #8 (0.100): 0.003*"Wrestling" + 0.002*"WWE" + 0.002*"Weightlifting" + 0.002*"Judo" + 0.002*"Shinto shrine" + 0.002*"Emperor Go-Murakami" + 0.002*"Order of the British Empire" + 0.001*"Emperor Tenji" + 0.001*"Emperor Fushimi" + 0.001*"Bío Bío Region"
2018-01-01 02:16:11,924 : INFO : topic diff=0.177270, rho=0.103480
2018-01-01 02:16:11,945 : INFO : PROGRESS: pass 1, at document #114000/18277

2018-01-01 02:16:32,643 : INFO : topic #6 (0.100): 0.003*"Japan national football team" + 0.003*"Americans" + 0.003*"Democratic Party (United States)" + 0.003*"Republican Party (United States)" + 0.003*"Track and field athletics" + 0.003*"Sportsperson" + 0.002*"United States" + 0.002*"Aaron Rodgers" + 0.002*"Mason Crosby" + 0.002*"Campeonato Brasileiro Série A"
2018-01-01 02:16:32,655 : INFO : topic #2 (0.100): 0.015*"France" + 0.014*"United States" + 0.005*"city" + 0.005*"Communes of France" + 0.004*"Departments of France" + 0.004*"Suffolk County, New York" + 0.003*"Regions of France" + 0.003*"Florida" + 0.002*"U.S. state" + 0.002*"Illinois"
2018-01-01 02:16:32,669 : INFO : topic #9 (0.100): 0.016*"United States" + 0.013*"Americans" + 0.005*"movie" + 0.004*"New York City" + 0.004*"California" + 0.004*"actor" + 0.003*"television" + 0.003*"actress" + 0.003*"Chicago" + 0.003*"New York"
2018-01-01 02:16:32,703 : INFO : topic diff=0.096905, rho=0.103480
2018-01-01 02:16:32,731 : INFO : PRO

2018-01-01 02:16:49,884 : INFO : topic #1 (0.100): 0.004*"Japanese era name" + 0.003*"Louis-Frédéric" + 0.003*"Isaac Titsingh" + 0.002*"Julian calendar" + 0.002*"Rome" + 0.002*"National Diet Library" + 0.002*"Roman numerals" + 0.002*"aircraft" + 0.002*"Imperial Household Agency" + 0.002*"Italy"
2018-01-01 02:16:49,894 : INFO : topic #7 (0.100): 0.008*"England" + 0.006*"Canada" + 0.005*"National Hockey League" + 0.005*"civil parish" + 0.005*"Ontario" + 0.004*"Chicago Blackhawks" + 0.004*"London" + 0.004*"ice hockey" + 0.004*"Germany" + 0.003*"Cumbria"
2018-01-01 02:16:49,929 : INFO : topic diff=0.079175, rho=0.103480
2018-01-01 02:16:49,951 : INFO : PROGRESS: pass 1, at document #134000/182776
2018-01-01 02:16:50,693 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:16:53,449 : INFO : topic #8 (0.100): 0.011*"English Premiership (rugby union)" + 0.002*"Order of the British Empire" + 0.002*"Columbia Pictures" + 0.002*"WWE" + 0.002*"Boyacá Departm

2018-01-01 02:17:11,502 : INFO : topic #6 (0.100): 0.017*"United States Army" + 0.012*"Union Army" + 0.008*"Australian Labor Party" + 0.008*"United States Navy" + 0.006*"Liberal Party of Australia" + 0.005*"Americans" + 0.005*"Yonne" + 0.004*"United States Marine Corps" + 0.004*"Virginia" + 0.004*"Democratic Party (United States)"
2018-01-01 02:17:11,520 : INFO : topic #2 (0.100): 0.021*"France" + 0.014*"United States" + 0.012*"Central European Time" + 0.011*"Central European Summer Time" + 0.010*"Departments of France" + 0.010*"Communes of France" + 0.007*"Country" + 0.006*"List of sovereign states" + 0.005*"Virginia" + 0.004*"city"
2018-01-01 02:17:11,567 : INFO : topic diff=0.074457, rho=0.103480
2018-01-01 02:17:11,597 : INFO : PROGRESS: pass 1, at document #144000/182776
2018-01-01 02:17:12,512 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:17:14,818 : INFO : topic #9 (0.100): 0.023*"Americans" + 0.016*"United States" + 0.013*"Sweden" +

2018-01-01 02:17:29,325 : INFO : topic #3 (0.100): 0.014*"Japan" + 0.007*"Association football" + 0.007*"Brazil" + 0.004*"association football" + 0.003*"Spain" + 0.003*"J. League Division 1" + 0.002*"Tokyo" + 0.002*"Midfielder" + 0.002*"J. League Division 2" + 0.002*"Exhibition game"
2018-01-01 02:17:29,338 : INFO : topic #0 (0.100): 0.004*"United States" + 0.002*"English language" + 0.002*"stroke" + 0.002*"natural causes" + 0.002*"box office" + 0.002*"disability" + 0.002*"Contemporary R&B" + 0.002*"Apple Inc." + 0.002*"murder" + 0.001*"military"
2018-01-01 02:17:29,369 : INFO : topic diff=0.089501, rho=0.103480
2018-01-01 02:17:29,390 : INFO : PROGRESS: pass 1, at document #154000/182776
2018-01-01 02:17:30,079 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:17:32,263 : INFO : topic #4 (0.100): 0.005*"Animal" + 0.004*"species" + 0.003*"genus" + 0.003*"television series" + 0.002*"constellation" + 0.002*"UNESCO" + 0.002*"Chordate" + 0.002*"Eur

2018-01-01 02:17:46,468 : INFO : topic #0 (0.100): 0.004*"United States" + 0.003*"Contemporary R&B" + 0.002*"English language" + 0.002*"stroke" + 0.002*"writer" + 0.002*"murder" + 0.001*"law" + 0.001*"natural causes" + 0.001*"critic" + 0.001*"novel"
2018-01-01 02:17:46,480 : INFO : topic #6 (0.100): 0.010*"Americans" + 0.008*"Republican Party (United States)" + 0.006*"United States Army" + 0.005*"Democratic Party (United States)" + 0.005*"Australian Labor Party" + 0.004*"President of the United States" + 0.004*"United States" + 0.004*"Ronald Reagan" + 0.004*"United States Senate" + 0.004*"Union Army"
2018-01-01 02:17:46,514 : INFO : topic diff=0.225628, rho=0.103480
2018-01-01 02:17:46,533 : INFO : PROGRESS: pass 1, at document #164000/182776
2018-01-01 02:17:47,270 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:17:49,309 : INFO : topic #6 (0.100): 0.011*"Americans" + 0.009*"Republican Party (United States)" + 0.006*"United States Army" + 0.

2018-01-01 02:18:00,972 : INFO : topic #7 (0.100): 0.009*"England" + 0.008*"Canada" + 0.006*"London" + 0.005*"Germany" + 0.005*"Prefectures in France" + 0.005*"National Hockey League" + 0.005*"English people" + 0.004*"Ontario" + 0.004*"British people" + 0.004*"ice hockey"
2018-01-01 02:18:00,984 : INFO : topic #1 (0.100): 0.002*"Think of the children" + 0.002*"Rome" + 0.001*"bacteria" + 0.001*"aircraft" + 0.001*"jurisdiction" + 0.001*"Italy" + 0.001*"Christianity" + 0.001*"Limoges" + 0.001*"Roman Catholic Church" + 0.001*"Christian"
2018-01-01 02:18:00,994 : INFO : topic #2 (0.100): 0.013*"United States" + 0.011*"France" + 0.011*"Switzerland" + 0.011*"municipality" + 0.011*"Cantons of Switzerland" + 0.007*"Bern (canton)" + 0.007*"Central European Time" + 0.006*"Central European Summer Time" + 0.006*"administrative district" + 0.006*"Departments of France"
2018-01-01 02:18:01,024 : INFO : topic diff=0.111049, rho=0.103480
2018-01-01 02:18:01,043 : INFO : PROGRESS: pass 1, at document #1

2018-01-01 02:18:18,294 : INFO : topic #1 (0.100): 0.022*"Munch Museum" + 0.001*"National Gallery (Norway)" + 0.001*"Southeastern (train operating company)" + 0.001*"Rome" + 0.001*"sex ratio" + 0.001*"aircraft" + 0.001*"Think of the children" + 0.001*"Italy" + 0.001*"Roman Catholic Church" + 0.001*"Nobel Prize in Chemistry"
2018-01-01 02:18:18,304 : INFO : topic #0 (0.100): 0.004*"United States" + 0.004*"Contemporary R&B" + 0.002*"stroke" + 0.002*"law" + 0.002*"New York (state)" + 0.002*"drainage basin" + 0.002*"English language" + 0.001*"soldier" + 0.001*"government" + 0.001*"murder"
2018-01-01 02:18:18,316 : INFO : topic #5 (0.100): 0.007*"India" + 0.006*"Tamil language" + 0.004*"Australia" + 0.004*"Russia" + 0.003*"Italy" + 0.003*"Spain" + 0.003*"Tamil Nadu" + 0.003*"Sweden" + 0.003*"Germany" + 0.003*"Netherlands"
2018-01-01 02:18:18,328 : INFO : topic #9 (0.100): 0.025*"Americans" + 0.011*"United States" + 0.011*"Sweden" + 0.007*"California" + 0.006*"New York City" + 0.004*"Los Ang

2018-01-01 02:18:36,216 : INFO : topic #4 (0.100): 0.004*"Earth" + 0.003*"Africa" + 0.003*"WP:MOSNUM" + 0.003*"water" + 0.003*"Europe" + 0.002*"species" + 0.002*"Animal" + 0.002*"animal" + 0.002*"North America" + 0.002*"Asia"
2018-01-01 02:18:36,230 : INFO : topic #5 (0.100): 0.007*"India" + 0.007*"France" + 0.007*"Russia" + 0.006*"Germany" + 0.006*"Spain" + 0.006*"United Kingdom" + 0.005*"Japan" + 0.005*"Prime Minister" + 0.005*"Italy" + 0.005*"China"
2018-01-01 02:18:36,242 : INFO : topic #1 (0.100): 0.008*"Munch Museum" + 0.004*"philosopher" + 0.003*"Nobel Prize in Physiology or Medicine" + 0.003*"aircraft" + 0.003*"chemist" + 0.003*"bishop" + 0.002*"Christianity" + 0.002*"Roman Empire" + 0.002*"Julian calendar" + 0.002*"Gregorian calendar"
2018-01-01 02:18:36,254 : INFO : topic #6 (0.100): 0.011*"footballer" + 0.010*"2014" + 0.009*"2015" + 0.006*"2016" + 0.006*"2017" + 0.005*"2013" + 0.005*"United States" + 0.004*"1945" + 0.004*"President" + 0.004*"2012"
2018-01-01 02:18:36,287 : I

2018-01-01 02:18:51,959 : INFO : topic #3 (0.100): 0.015*"Football League Championship" + 0.008*"Football League One" + 0.006*"Japan" + 0.005*"German Bundesliga" + 0.005*"English Premier League" + 0.005*"Brazil" + 0.004*"Association football" + 0.003*"Football League Two" + 0.002*"Eredivisie" + 0.002*"association football"
2018-01-01 02:18:51,985 : INFO : topic diff=0.114387, rho=0.102930
2018-01-01 02:18:55,518 : INFO : -14.535 per-word bound, 23732.4 perplexity estimate based on a held-out corpus of 2000 documents with 43697 words
2018-01-01 02:18:55,518 : INFO : PROGRESS: pass 2, at document #20000/182776
2018-01-01 02:18:56,287 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:18:58,716 : INFO : topic #9 (0.100): 0.012*"United States" + 0.011*"actor" + 0.009*"singer" + 0.009*"Americans" + 0.009*"actress" + 0.006*"singer-songwriter" + 0.005*"producer" + 0.005*"California" + 0.005*"Sweden" + 0.005*"movie"
2018-01-01 02:18:58,729 : INFO : topi

2018-01-01 02:19:15,741 : INFO : topic #7 (0.100): 0.012*"England" + 0.011*"Germany" + 0.006*"London" + 0.005*"ice hockey" + 0.005*"Canada" + 0.003*"United Kingdom" + 0.002*"Ontario" + 0.002*"Bavaria" + 0.002*"Berlin" + 0.002*"Borough"
2018-01-01 02:19:15,755 : INFO : topic #5 (0.100): 0.007*"France" + 0.007*"Russia" + 0.006*"Germany" + 0.006*"India" + 0.006*"Spain" + 0.006*"Italy" + 0.006*"United Kingdom" + 0.005*"Australia" + 0.005*"Japan" + 0.005*"China"
2018-01-01 02:19:15,766 : INFO : topic #6 (0.100): 0.008*"footballer" + 0.007*"2014" + 0.006*"2007" + 0.006*"2015" + 0.005*"2005" + 0.005*"2006" + 0.004*"2016" + 0.004*"United States" + 0.004*"2017" + 0.004*"2004"
2018-01-01 02:19:15,778 : INFO : topic #3 (0.100): 0.008*"Football League Championship" + 0.007*"Serie A" + 0.006*"Japan" + 0.004*"Brazil" + 0.004*"La Liga" + 0.004*"Football League One" + 0.004*"English Premier League" + 0.004*"German Bundesliga" + 0.003*"Association football" + 0.002*"Eredivisie"
2018-01-01 02:19:15,786 

2018-01-01 02:19:37,052 : INFO : topic #5 (0.100): 0.007*"France" + 0.007*"Italy" + 0.006*"United Kingdom" + 0.006*"Germany" + 0.006*"Russia" + 0.005*"India" + 0.005*"Spain" + 0.005*"Australia" + 0.005*"China" + 0.004*"Japan"
2018-01-01 02:19:37,062 : INFO : topic #0 (0.100): 0.005*"United States" + 0.003*"English language" + 0.003*"Provinces of Italy" + 0.003*"computer" + 0.002*"music" + 0.002*"law" + 0.002*"Regions of Italy" + 0.002*"Italy" + 0.002*"government" + 0.002*"Istituto Nazionale di Statistica"
2018-01-01 02:19:37,094 : INFO : topic diff=0.121432, rho=0.102930
2018-01-01 02:19:37,116 : INFO : PROGRESS: pass 2, at document #42000/182776
2018-01-01 02:19:37,801 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:19:40,278 : INFO : topic #9 (0.100): 0.014*"United States" + 0.008*"Americans" + 0.006*"actor" + 0.006*"singer" + 0.004*"New York City" + 0.004*"California" + 0.004*"actress" + 0.004*"movie" + 0.004*"2007" + 0.003*"guitar"
2018-0

2018-01-01 02:19:52,929 : INFO : topic diff=0.104647, rho=0.102930
2018-01-01 02:19:52,952 : INFO : PROGRESS: pass 2, at document #52000/182776
2018-01-01 02:19:53,628 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:19:55,842 : INFO : topic #2 (0.100): 0.019*"France" + 0.013*"municipality" + 0.011*"United States" + 0.010*"Communes of France" + 0.008*"Switzerland" + 0.007*"district" + 0.006*"Cantons of Switzerland" + 0.006*"Departments of France" + 0.005*"Regions of France" + 0.005*"Belgium"
2018-01-01 02:19:55,853 : INFO : topic #8 (0.100): 0.005*"WWE" + 0.002*"WWE Championship" + 0.002*"professional wrestling" + 0.002*"Order of the British Empire" + 0.002*"Liège (province)" + 0.001*"Triple H" + 0.001*"Isle of Wight" + 0.001*"John Cena" + 0.001*"Gordie Howe" + 0.001*"World Heavyweight Championship (WWE)"
2018-01-01 02:19:55,866 : INFO : topic #9 (0.100): 0.016*"United States" + 0.007*"Americans" + 0.005*"actor" + 0.005*"singer" + 0.004*"movie

2018-01-01 02:20:11,341 : INFO : topic #2 (0.100): 0.060*"France" + 0.048*"Communes of France" + 0.042*"Departments of France" + 0.041*"Regions of France" + 0.041*"United States" + 0.031*"city" + 0.011*"Iowa" + 0.010*"Pas-de-Calais" + 0.010*"departments of France" + 0.009*"Aisne"
2018-01-01 02:20:11,355 : INFO : topic #1 (0.100): 0.007*"British Rail" + 0.004*"Mayenne" + 0.002*"electric multiple unit" + 0.002*"Christianity" + 0.002*"Route availability" + 0.002*"Nobel Prize in Physiology or Medicine" + 0.002*"A.F.C. Ajax" + 0.002*"Jesus" + 0.002*"BREL" + 0.002*"F.C. Internazionale Milano"
2018-01-01 02:20:11,369 : INFO : topic #5 (0.100): 0.011*"Pakistan" + 0.006*"Romania" + 0.005*"India" + 0.005*"France" + 0.005*"Italy" + 0.005*"United Kingdom" + 0.005*"Australia" + 0.005*"Germany" + 0.004*"Russia" + 0.004*"Scotland"
2018-01-01 02:20:11,380 : INFO : topic #9 (0.100): 0.014*"United States" + 0.006*"Americans" + 0.005*"United States Census, 2000" + 0.005*"actor" + 0.004*"singer" + 0.004*"

2018-01-01 02:20:24,183 : INFO : topic #4 (0.100): 0.006*"Animal" + 0.003*"species" + 0.003*"Chordate" + 0.002*"Mammal" + 0.002*"Plant" + 0.002*"genus" + 0.002*"Earth" + 0.002*"Magnoliopsida" + 0.002*"Africa" + 0.002*"Asia"
2018-01-01 02:20:24,194 : INFO : topic #6 (0.100): 0.015*"Campeonato Brasileiro Série A" + 0.007*"Japan national football team" + 0.007*"North American Central Time Zone" + 0.005*"Spain national football team" + 0.005*"2005" + 0.005*"2007" + 0.004*"1999" + 0.004*"2009" + 0.004*"United States" + 0.004*"2006"
2018-01-01 02:20:24,201 : INFO : topic #0 (0.100): 0.004*"United States" + 0.003*"F.C. Bayern Munich" + 0.002*"English language" + 0.002*"computer" + 0.002*"Image:Green check.png" + 0.002*"music" + 0.002*"United Kingdom" + 0.002*"Internet" + 0.002*"government" + 0.002*"Microsoft Windows"
2018-01-01 02:20:24,214 : INFO : topic #5 (0.100): 0.008*"Pakistan" + 0.005*"Australia" + 0.005*"Romania" + 0.005*"United Kingdom" + 0.005*"Italy" + 0.005*"Germany" + 0.005*"Indi

2018-01-01 02:20:39,278 : INFO : topic #7 (0.100): 0.010*"Germany" + 0.010*"Striker" + 0.008*"England" + 0.005*"Picardie" + 0.005*"London" + 0.005*"Yokohama F. Marinos" + 0.004*"National Hockey League" + 0.004*"Chicago Blackhawks" + 0.004*"Canada" + 0.003*"Stanley Cup"
2018-01-01 02:20:39,288 : INFO : topic #3 (0.100): 0.028*"Association football" + 0.026*"J. League Division 1" + 0.021*"Japan" + 0.016*"La Liga" + 0.014*"J. League Division 2" + 0.014*"Serie A" + 0.012*"Brazil" + 0.011*"Segunda División" + 0.010*"Midfielder" + 0.008*"Ligue 1"
2018-01-01 02:20:39,300 : INFO : topic #1 (0.100): 0.005*"F.C. Internazionale Milano" + 0.003*"A.F.C. Ajax" + 0.003*"British Rail" + 0.003*"Italy" + 0.003*"A.C.F. Fiorentina" + 0.003*"S.S. Lazio" + 0.002*"Parma F.C." + 0.002*"Plantae" + 0.002*"Torino F.C. 1906" + 0.002*"Emperor of Japan"
2018-01-01 02:20:39,314 : INFO : topic #5 (0.100): 0.006*"Pakistan" + 0.005*"Australia" + 0.005*"India" + 0.005*"Russia" + 0.005*"Italy" + 0.005*"Germany" + 0.004*"

2018-01-01 02:20:50,852 : INFO : topic #8 (0.100): 0.004*"WWE" + 0.003*"Canadian Online Explorer" + 0.002*"Paris Saint-Germain F.C." + 0.002*"professional wrestling" + 0.002*"Order of the British Empire" + 0.002*"Rangers F.C." + 0.002*"WWE Championship" + 0.001*"John Cena" + 0.001*"Total Nonstop Action Wrestling" + 0.001*"Professional wrestling"
2018-01-01 02:20:50,862 : INFO : topic #6 (0.100): 0.013*"Campeonato Brasileiro Série A" + 0.010*"Japan national football team" + 0.004*"2009" + 0.003*"Sportsperson" + 0.003*"Spain national football team" + 0.002*"United States" + 0.002*"North American Central Time Zone" + 0.002*"1986" + 0.002*"2010" + 0.002*"1988"
2018-01-01 02:20:50,870 : INFO : topic #0 (0.100): 0.008*"chemical compound" + 0.007*"ion" + 0.005*"oxidation state" + 0.003*"United States" + 0.003*"oxidizing agent" + 0.003*"English language" + 0.003*"acid" + 0.002*"reducing agent" + 0.002*"F.C. Bayern Munich" + 0.002*"United Kingdom"
2018-01-01 02:20:50,881 : INFO : topic #9 (0.10

2018-01-01 02:21:06,543 : INFO : topic #6 (0.100): 0.007*"Campeonato Brasileiro Série A" + 0.006*"Track and field athletics" + 0.006*"Japan national football team" + 0.004*"2006" + 0.003*"Boxing" + 0.003*"Baseball-Reference" + 0.003*"2011" + 0.003*"2009" + 0.003*"2010" + 0.003*"Americans"
2018-01-01 02:21:06,552 : INFO : topic #1 (0.100): 0.003*"Nobel Prize in Physiology or Medicine" + 0.002*"Communauté d'agglomération" + 0.002*"F.C. Internazionale Milano" + 0.002*"Plantae" + 0.002*"DNA" + 0.002*"Italy" + 0.002*"A.F.C. Ajax" + 0.001*"Roman Empire" + 0.001*"bacteria" + 0.001*"protein"
2018-01-01 02:21:06,562 : INFO : topic #7 (0.100): 0.009*"National Hockey League" + 0.007*"Canada" + 0.006*"England" + 0.006*"Germany" + 0.006*"ice hockey" + 0.006*"Italian Grand Prix" + 0.005*"British Grand Prix" + 0.005*"Striker" + 0.005*"Litre" + 0.005*"Monaco Grand Prix"
2018-01-01 02:21:06,572 : INFO : topic #9 (0.100): 0.018*"United States" + 0.009*"Americans" + 0.005*"California" + 0.004*"movie" + 0

2018-01-01 02:21:17,583 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:21:19,374 : INFO : topic #1 (0.100): 0.007*"Japanese era name" + 0.004*"Isaac Titsingh" + 0.004*"Louis-Frédéric" + 0.003*"National Diet Library" + 0.003*"Imperial Household Agency" + 0.002*"Meiji period" + 0.002*"Nobel Prize in Physiology or Medicine" + 0.002*"Edo period" + 0.001*"Emperor Go-Toba" + 0.001*"Italy"
2018-01-01 02:21:19,385 : INFO : topic #2 (0.100): 0.017*"France" + 0.015*"United States" + 0.008*"Communes of France" + 0.007*"Departments of France" + 0.006*"city" + 0.006*"Suffolk County, New York" + 0.005*"Regions of France" + 0.004*"Orange County, New York" + 0.004*"Oneida County, New York" + 0.004*"Ulster County, New York"
2018-01-01 02:21:19,397 : INFO : topic #6 (0.100): 0.006*"Japan national football team" + 0.005*"Track and field athletics" + 0.004*"Campeonato Brasileiro Série A" + 0.003*"Boxing" + 0.003*"2000" + 0.003*"President of the United States" +

2018-01-01 02:21:33,516 : INFO : topic diff=0.103716, rho=0.102930
2018-01-01 02:21:33,538 : INFO : PROGRESS: pass 2, at document #122000/182776
2018-01-01 02:21:34,237 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:21:36,313 : INFO : topic #8 (0.100): 0.003*"Boyacá Department" + 0.002*"Columbia Pictures" + 0.002*"WWE" + 0.002*"Wrestling" + 0.002*"Order of the British Empire" + 0.002*"Shinto shrine" + 0.001*"Paramount Pictures" + 0.001*"Weightlifting" + 0.001*"Emperor Go-Murakami" + 0.001*"Japan women's national football team"
2018-01-01 02:21:36,324 : INFO : topic #1 (0.100): 0.006*"Japanese era name" + 0.004*"Isaac Titsingh" + 0.003*"Louis-Frédéric" + 0.002*"National Diet Library" + 0.002*"Imperial Household Agency" + 0.002*"Nobel Prize in Physiology or Medicine" + 0.002*"Meiji period" + 0.002*"Rome" + 0.001*"Edo period" + 0.001*"Julian calendar"
2018-01-01 02:21:36,334 : INFO : topic #5 (0.100): 0.004*"India" + 0.004*"Australia" + 0.004*"

2018-01-01 02:21:48,681 : INFO : topic diff=0.084898, rho=0.102930
2018-01-01 02:21:48,701 : INFO : PROGRESS: pass 2, at document #132000/182776
2018-01-01 02:21:49,357 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:21:51,836 : INFO : topic #9 (0.100): 0.021*"Americans" + 0.017*"United States" + 0.008*"movie" + 0.006*"California" + 0.006*"New York City" + 0.005*"television" + 0.005*"actress" + 0.004*"actor" + 0.004*"Los Angeles" + 0.003*"New York"
2018-01-01 02:21:51,846 : INFO : topic #3 (0.100): 0.027*"Japan" + 0.012*"Association football" + 0.008*"J. League Division 1" + 0.007*"Brazil" + 0.005*"J. League Division 2" + 0.004*"Midfielder" + 0.004*"Captain (United States)" + 0.003*"Defender (football)" + 0.003*"Tokyo" + 0.003*"La Liga"
2018-01-01 02:21:51,856 : INFO : topic #0 (0.100): 0.004*"United States" + 0.004*"disability" + 0.002*"television program" + 0.002*"English language" + 0.002*"blind" + 0.002*"Usher" + 0.002*"Contemporary R&B" 

2018-01-01 02:22:07,974 : INFO : topic #5 (0.100): 0.006*"Spain" + 0.005*"Sweden" + 0.005*"India" + 0.005*"Australia" + 0.004*"Italy" + 0.004*"Germany" + 0.004*"United Kingdom" + 0.003*"World War II" + 0.003*"China" + 0.003*"Russia"
2018-01-01 02:22:08,006 : INFO : topic diff=0.074943, rho=0.102930
2018-01-01 02:22:08,027 : INFO : PROGRESS: pass 2, at document #142000/182776
2018-01-01 02:22:08,706 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:22:11,250 : INFO : topic #5 (0.100): 0.006*"Spain" + 0.005*"India" + 0.005*"Sweden" + 0.004*"Australia" + 0.004*"Italy" + 0.004*"Germany" + 0.004*"United Kingdom" + 0.003*"World War II" + 0.003*"China" + 0.003*"Russia"
2018-01-01 02:22:11,260 : INFO : topic #7 (0.100): 0.011*"England" + 0.006*"Canada" + 0.005*"London" + 0.005*"civil parish" + 0.005*"National Hockey League" + 0.005*"Chicago Blackhawks" + 0.004*"Ontario" + 0.004*"Germany" + 0.004*"ice hockey" + 0.004*"Suffolk"
2018-01-01 02:22:11,272 : 

2018-01-01 02:22:24,401 : INFO : topic #2 (0.100): 0.018*"United States" + 0.017*"France" + 0.008*"Central European Time" + 0.008*"Central European Summer Time" + 0.007*"Departments of France" + 0.007*"Communes of France" + 0.005*"Country" + 0.004*"city" + 0.004*"List of sovereign states" + 0.003*"Mexico"
2018-01-01 02:22:24,435 : INFO : topic diff=0.106468, rho=0.102930
2018-01-01 02:22:24,460 : INFO : PROGRESS: pass 2, at document #152000/182776
2018-01-01 02:22:25,266 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:22:27,634 : INFO : topic #0 (0.100): 0.005*"United States" + 0.002*"English language" + 0.002*"stroke" + 0.002*"natural causes" + 0.002*"box office" + 0.002*"disability" + 0.002*"Contemporary R&B" + 0.002*"Apple Inc." + 0.002*"murder" + 0.001*"military"
2018-01-01 02:22:27,646 : INFO : topic #9 (0.100): 0.032*"Americans" + 0.017*"United States" + 0.011*"Sweden" + 0.009*"movie" + 0.008*"California" + 0.007*"New York City" + 0.006

2018-01-01 02:22:43,849 : INFO : topic #3 (0.100): 0.012*"Japan" + 0.008*"association football" + 0.007*"Brazil" + 0.006*"Association football" + 0.003*"sports club" + 0.002*"Spain" + 0.002*"Sweden" + 0.002*"Tokyo" + 0.002*"J. League Division 1" + 0.002*"Televisa"
2018-01-01 02:22:43,892 : INFO : topic diff=0.080574, rho=0.102930
2018-01-01 02:22:43,916 : INFO : PROGRESS: pass 2, at document #162000/182776
2018-01-01 02:22:44,680 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:22:46,693 : INFO : topic #6 (0.100): 0.011*"Americans" + 0.008*"Republican Party (United States)" + 0.006*"United States Army" + 0.005*"Democratic Party (United States)" + 0.005*"Australian Labor Party" + 0.004*"United States" + 0.004*"President of the United States" + 0.004*"Ronald Reagan" + 0.004*"United States Senate" + 0.004*"Union Army"
2018-01-01 02:22:46,704 : INFO : topic #8 (0.100): 0.007*"WWE" + 0.003*"professional wrestling" + 0.002*"WWE Championship" + 0.002

2018-01-01 02:23:00,121 : INFO : topic #1 (0.100): 0.002*"Think of the children" + 0.002*"Rome" + 0.002*"aircraft" + 0.002*"bacteria" + 0.001*"jurisdiction" + 0.001*"Christianity" + 0.001*"Italy" + 0.001*"Christian" + 0.001*"Roman Catholic Church" + 0.001*"Lancashire"
2018-01-01 02:23:00,132 : INFO : topic #4 (0.100): 0.004*"Animal" + 0.004*"species" + 0.003*"genus" + 0.002*"television series" + 0.002*"Earth" + 0.002*"constellation" + 0.002*"Europe" + 0.002*"Africa" + 0.002*"bird" + 0.002*"Asia"
2018-01-01 02:23:00,168 : INFO : topic diff=0.105953, rho=0.102930
2018-01-01 02:23:00,189 : INFO : PROGRESS: pass 2, at document #172000/182776
2018-01-01 02:23:00,855 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:23:02,812 : INFO : topic #0 (0.100): 0.005*"United States" + 0.004*"Contemporary R&B" + 0.003*"law" + 0.002*"stroke" + 0.002*"English language" + 0.002*"soldier" + 0.002*"example" + 0.002*"government" + 0.002*"Federal government of the Un

2018-01-01 02:23:19,157 : INFO : topic #7 (0.100): 0.008*"Germany" + 0.008*"England" + 0.007*"Prefectures in France" + 0.007*"BBC Two" + 0.007*"Grand Prix (TV programme)" + 0.006*"Canada" + 0.006*"London" + 0.004*"National Hockey League" + 0.004*"BBC One" + 0.004*"English people"
2018-01-01 02:23:19,172 : INFO : topic #5 (0.100): 0.007*"India" + 0.004*"Russia" + 0.004*"Italy" + 0.004*"Spain" + 0.004*"Sweden" + 0.004*"Australia" + 0.004*"Germany" + 0.003*"Soviet Union" + 0.003*"Netherlands" + 0.003*"Poland"
2018-01-01 02:23:19,184 : INFO : topic #3 (0.100): 0.006*"Japan" + 0.005*"Brazil" + 0.005*"Association football" + 0.005*"association football" + 0.004*"Bachelor of Arts" + 0.002*"Juris Doctor" + 0.002*"São Paulo" + 0.002*"Rio de Janeiro" + 0.002*"Tokyo" + 0.002*"Real Madrid C.F."
2018-01-01 02:23:19,219 : INFO : topic diff=0.097425, rho=0.102930
2018-01-01 02:23:19,239 : INFO : PROGRESS: pass 2, at document #182000/182776
2018-01-01 02:23:20,101 : INFO : merging changes from 2000 do

2018-01-01 02:23:38,871 : INFO : topic #2 (0.100): 0.016*"France" + 0.013*"United States" + 0.011*"Switzerland" + 0.010*"municipality" + 0.009*"Cantons of Switzerland" + 0.007*"city" + 0.007*"district" + 0.006*"Departments of France" + 0.005*"Communes of France" + 0.004*"U.S. state"
2018-01-01 02:23:38,884 : INFO : topic #6 (0.100): 0.012*"footballer" + 0.010*"2014" + 0.009*"2015" + 0.006*"2016" + 0.006*"2017" + 0.005*"2013" + 0.005*"United States" + 0.004*"President" + 0.004*"1945" + 0.004*"2012"
2018-01-01 02:23:38,897 : INFO : topic #8 (0.100): 0.005*"Premier" + 0.004*"professional wrestler" + 0.003*"rugby" + 0.002*"soprano" + 0.002*"tenor" + 0.002*"Order of the British Empire" + 0.002*"WWE" + 0.002*"Cycling" + 0.002*"Philology" + 0.001*"snooker"
2018-01-01 02:23:38,929 : INFO : topic diff=0.291024, rho=0.102389
2018-01-01 02:23:38,947 : INFO : PROGRESS: pass 3, at document #8000/182776
2018-01-01 02:23:39,731 : INFO : merging changes from 2000 documents into a model of 182776 docum

2018-01-01 02:23:55,220 : INFO : topic diff=0.102497, rho=0.102389
2018-01-01 02:23:55,236 : INFO : PROGRESS: pass 3, at document #18000/182776
2018-01-01 02:23:55,982 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:23:58,368 : INFO : topic #8 (0.100): 0.003*"Premier" + 0.002*"professional wrestler" + 0.002*"rugby" + 0.002*"Order of the British Empire" + 0.002*"soprano" + 0.002*"tenor" + 0.001*"WWE" + 0.001*"Cycling" + 0.001*"Philology" + 0.001*"Leipzig"
2018-01-01 02:23:58,376 : INFO : topic #0 (0.100): 0.006*"United States" + 0.003*"computer" + 0.003*"English language" + 0.003*"law" + 0.003*"government" + 0.002*"music" + 0.002*"business" + 0.002*"murder" + 0.002*"scientist" + 0.002*"ship"
2018-01-01 02:23:58,389 : INFO : topic #6 (0.100): 0.010*"footballer" + 0.009*"2014" + 0.008*"2015" + 0.006*"2016" + 0.005*"2017" + 0.005*"2013" + 0.005*"United States" + 0.004*"2012" + 0.004*"1945" + 0.004*"President"
2018-01-01 02:23:58,397 : INFO : topi

2018-01-01 02:54:23,213 : INFO : topic #9 (0.100): 0.012*"United States" + 0.009*"actor" + 0.009*"Americans" + 0.007*"singer" + 0.006*"actress" + 0.004*"California" + 0.004*"movie" + 0.004*"New York City" + 0.004*"Sweden" + 0.004*"singer-songwriter"
2018-01-01 02:54:23,224 : INFO : topic #7 (0.100): 0.012*"England" + 0.011*"Germany" + 0.006*"London" + 0.006*"ice hockey" + 0.005*"Canada" + 0.003*"United Kingdom" + 0.003*"Ontario" + 0.002*"Bavaria" + 0.002*"Borough" + 0.002*"London Boroughs"
2018-01-01 02:54:23,232 : INFO : topic #4 (0.100): 0.004*"Animal" + 0.003*"species" + 0.003*"water" + 0.003*"Earth" + 0.003*"Africa" + 0.003*"Europe" + 0.002*"animal" + 0.002*"plant" + 0.002*"Asia" + 0.002*"genus"
2018-01-01 02:54:23,264 : INFO : topic diff=0.110974, rho=0.102389
2018-01-01 02:54:23,283 : INFO : PROGRESS: pass 3, at document #30000/182776
2018-01-01 02:54:24,021 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:54:26,757 : INFO : topic #8 (0.

2018-01-01 02:54:40,864 : INFO : topic diff=0.112282, rho=0.102389
2018-01-01 02:54:44,433 : INFO : -13.947 per-word bound, 15788.9 perplexity estimate based on a held-out corpus of 2000 documents with 44479 words
2018-01-01 02:54:44,434 : INFO : PROGRESS: pass 3, at document #40000/182776
2018-01-01 02:54:45,172 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 02:54:47,842 : INFO : topic #1 (0.100): 0.003*"Christianity" + 0.002*"Jesus" + 0.002*"Roman Empire" + 0.002*"Christian" + 0.002*"Rome" + 0.002*"Bible" + 0.002*"Munch Museum" + 0.002*"Florence" + 0.002*"Roman Catholic Church" + 0.001*"Italy"
2018-01-01 02:54:47,851 : INFO : topic #4 (0.100): 0.005*"Animal" + 0.003*"species" + 0.003*"Earth" + 0.002*"water" + 0.002*"Africa" + 0.002*"Europe" + 0.002*"Chordate" + 0.002*"genus" + 0.002*"animal" + 0.002*"North America"
2018-01-01 02:54:47,864 : INFO : topic #2 (0.100): 0.014*"Switzerland" + 0.013*"municipality" + 0.012*"United States" + 0.011*"Ca

2018-01-01 02:55:03,939 : INFO : topic #5 (0.100): 0.011*"Pakistan" + 0.006*"France" + 0.006*"India" + 0.006*"Italy" + 0.006*"United Kingdom" + 0.005*"Germany" + 0.005*"Australia" + 0.005*"Russia" + 0.005*"Spain" + 0.005*"China"
2018-01-01 02:55:03,951 : INFO : topic #7 (0.100): 0.008*"Germany" + 0.008*"England" + 0.006*"Montreal Canadiens" + 0.005*"Scuderia Ferrari" + 0.005*"Detroit Red Wings" + 0.004*"Boston Bruins" + 0.004*"London" + 0.004*"New York Rangers" + 0.004*"Toronto Maple Leafs" + 0.003*"National Hockey League"
2018-01-01 02:55:03,964 : INFO : topic #8 (0.100): 0.005*"WWE" + 0.002*"WWE Championship" + 0.002*"professional wrestling" + 0.002*"Liège (province)" + 0.001*"Order of the British Empire" + 0.001*"Triple H" + 0.001*"John Cena" + 0.001*"Punk rock" + 0.001*"Gordie Howe" + 0.001*"World Heavyweight Championship (WWE)"
2018-01-01 02:55:03,972 : INFO : topic #4 (0.100): 0.005*"Animal" + 0.003*"species" + 0.003*"Chordate" + 0.002*"genus" + 0.002*"Earth" + 0.002*"Africa" + 0

2018-01-01 03:15:29,119 : INFO : topic #7 (0.100): 0.017*"Picardie" + 0.008*"England" + 0.007*"Germany" + 0.005*"Scuderia Ferrari" + 0.005*"Montreal Canadiens" + 0.004*"London" + 0.004*"Prefectures in France" + 0.004*"Detroit Red Wings" + 0.004*"Boston Bruins" + 0.003*"Alpes-Maritimes"
2018-01-01 03:15:29,127 : INFO : topic #4 (0.100): 0.006*"Animal" + 0.003*"species" + 0.003*"Chordate" + 0.002*"genus" + 0.002*"Africa" + 0.002*"Mammal" + 0.002*"Europe" + 0.002*"Earth" + 0.002*"Plant" + 0.002*"Asia"
2018-01-01 03:15:29,138 : INFO : topic #9 (0.100): 0.014*"United States" + 0.006*"Americans" + 0.005*"actor" + 0.005*"singer" + 0.004*"United States Census, 2000" + 0.004*"2007" + 0.004*"movie" + 0.003*"California" + 0.003*"New York City" + 0.003*"album"
2018-01-01 03:15:29,149 : INFO : topic #2 (0.100): 0.051*"France" + 0.042*"United States" + 0.041*"Communes of France" + 0.034*"Departments of France" + 0.034*"Regions of France" + 0.030*"city" + 0.012*"Pas-de-Calais" + 0.010*"Aisne" + 0.009

2018-01-01 03:15:41,949 : INFO : topic #0 (0.100): 0.004*"United States" + 0.003*"F.C. Bayern Munich" + 0.002*"Image:Green check.png" + 0.002*"English language" + 0.002*"computer" + 0.002*"Internet" + 0.002*"United Kingdom" + 0.002*"music" + 0.002*"government" + 0.002*"Microsoft Windows"
2018-01-01 03:15:41,957 : INFO : topic #8 (0.100): 0.004*"WWE" + 0.002*"Total Nonstop Action Wrestling" + 0.002*"WWE Championship" + 0.002*"Canadian Online Explorer" + 0.002*"professional wrestling" + 0.001*"Paris Saint-Germain F.C." + 0.001*"John Cena" + 0.001*"Triple H" + 0.001*"World Heavyweight Championship (WWE)" + 0.001*"AFC"
2018-01-01 03:15:41,969 : INFO : topic #6 (0.100): 0.014*"Campeonato Brasileiro Série A" + 0.007*"North American Central Time Zone" + 0.005*"Spain national football team" + 0.005*"2005" + 0.005*"2007" + 0.005*"1999" + 0.005*"Japan national football team" + 0.004*"United States" + 0.004*"2006" + 0.004*"2004"
2018-01-01 03:15:41,999 : INFO : topic diff=0.106361, rho=0.102389
2

2018-01-01 03:15:57,282 : INFO : topic #1 (0.100): 0.006*"F.C. Internazionale Milano" + 0.004*"British Rail" + 0.003*"A.F.C. Ajax" + 0.003*"Italy" + 0.003*"A.C.F. Fiorentina" + 0.003*"S.S. Lazio" + 0.003*"Parma F.C." + 0.002*"Plantae" + 0.002*"Torino F.C. 1906" + 0.002*"P.S.V. Eindhoven"
2018-01-01 03:15:57,290 : INFO : topic #0 (0.100): 0.004*"United States" + 0.003*"English language" + 0.003*"F.C. Bayern Munich" + 0.002*"computer" + 0.002*"Italy" + 0.002*"music" + 0.002*"United Kingdom" + 0.002*"French language" + 0.002*"Microsoft Windows" + 0.002*"Internet"
2018-01-01 03:15:57,302 : INFO : topic #2 (0.100): 0.045*"France" + 0.034*"United States" + 0.032*"Communes of France" + 0.029*"Departments of France" + 0.028*"Regions of France" + 0.020*"city" + 0.007*"Iowa" + 0.006*"Pas-de-Calais" + 0.006*"Calvados (department)" + 0.006*"departments of France"
2018-01-01 03:15:57,316 : INFO : topic #6 (0.100): 0.019*"Campeonato Brasileiro Série A" + 0.008*"Japan national football team" + 0.006*

2018-01-01 03:27:37,863 : INFO : topic #8 (0.100): 0.004*"WWE" + 0.004*"Canadian Online Explorer" + 0.002*"Paris Saint-Germain F.C." + 0.002*"professional wrestling" + 0.002*"Rangers F.C." + 0.002*"Order of the British Empire" + 0.002*"WWE Championship" + 0.001*"Professional wrestling" + 0.001*"John Cena" + 0.001*"Total Nonstop Action Wrestling"
2018-01-01 03:27:37,873 : INFO : topic #6 (0.100): 0.014*"Campeonato Brasileiro Série A" + 0.011*"Japan national football team" + 0.004*"2009" + 0.003*"Sportsperson" + 0.003*"Spain national football team" + 0.003*"North American Central Time Zone" + 0.003*"United States" + 0.002*"Sebastian Vettel" + 0.002*"1988" + 0.002*"1986"
2018-01-01 03:27:37,885 : INFO : topic #1 (0.100): 0.005*"F.C. Internazionale Milano" + 0.003*"A.F.C. Ajax" + 0.002*"S.S. Lazio" + 0.002*"A.C.F. Fiorentina" + 0.002*"Parma F.C." + 0.002*"Italy" + 0.002*"Emperor of Japan" + 0.002*"British Rail" + 0.002*"P.S.V. Eindhoven" + 0.002*"Avatar: The Last Airbender"
2018-01-01 03:2

2018-01-01 03:28:02,908 : INFO : topic #0 (0.100): 0.006*"chemical compound" + 0.006*"ion" + 0.004*"oxidation state" + 0.003*"United States" + 0.003*"English language" + 0.002*"oxidizing agent" + 0.002*"acid" + 0.002*"United Kingdom" + 0.002*"Heavy metal music" + 0.001*"Internet"
2018-01-01 03:28:02,918 : INFO : topic #7 (0.100): 0.010*"National Hockey League" + 0.007*"Canada" + 0.007*"England" + 0.006*"Striker" + 0.006*"Italian Grand Prix" + 0.006*"British Grand Prix" + 0.006*"ice hockey" + 0.006*"Germany" + 0.005*"Litre" + 0.005*"Monaco Grand Prix"
2018-01-01 03:28:02,929 : INFO : topic #9 (0.100): 0.019*"United States" + 0.010*"Americans" + 0.005*"California" + 0.005*"movie" + 0.004*"New York City" + 0.004*"actor" + 0.004*"singer" + 0.004*"Canadians" + 0.003*"actress" + 0.003*"Los Angeles"
2018-01-01 03:28:02,941 : INFO : topic #1 (0.100): 0.003*"Nobel Prize in Physiology or Medicine" + 0.003*"Communauté d'agglomération" + 0.003*"F.C. Internazionale Milano" + 0.002*"Plantae" + 0.002

2018-01-01 03:28:13,458 : INFO : topic diff=0.180789, rho=0.102389
2018-01-01 03:28:13,476 : INFO : PROGRESS: pass 3, at document #110000/182776
2018-01-01 03:28:14,201 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 03:28:16,018 : INFO : topic #0 (0.100): 0.003*"chemical compound" + 0.003*"United States" + 0.003*"ion" + 0.003*"English language" + 0.002*"oxidation state" + 0.002*"United Kingdom" + 0.001*"computer" + 0.001*"Arista Records" + 0.001*"acid" + 0.001*"book"
2018-01-01 03:28:16,028 : INFO : topic #8 (0.100): 0.004*"Wrestling" + 0.003*"WWE" + 0.002*"Weightlifting" + 0.002*"Judo" + 0.002*"Shinto shrine" + 0.002*"Bío Bío Region" + 0.002*"Order of the British Empire" + 0.001*"New Japan Pro Wrestling" + 0.001*"Emperor Go-Murakami" + 0.001*""Weird Al" Yankovic"
2018-01-01 03:28:16,038 : INFO : topic #9 (0.100): 0.019*"United States" + 0.009*"Americans" + 0.005*"movie" + 0.005*"actor" + 0.004*"California" + 0.004*"singer" + 0.004*"New York Ci

2018-01-01 03:28:27,275 : INFO : topic #8 (0.100): 0.003*"Boyacá Department" + 0.002*"Wrestling" + 0.002*"WWE" + 0.002*"Order of the British Empire" + 0.002*"Shinto shrine" + 0.001*"Weightlifting" + 0.001*"Emperor Go-Murakami" + 0.001*"Japan women's national football team" + 0.001*"Jermaine Dupri" + 0.001*"Judo"
2018-01-01 03:28:27,305 : INFO : topic diff=0.165260, rho=0.102389
2018-01-01 03:28:30,361 : INFO : -16.038 per-word bound, 67271.2 perplexity estimate based on a held-out corpus of 2000 documents with 33213 words
2018-01-01 03:28:30,362 : INFO : PROGRESS: pass 3, at document #120000/182776
2018-01-01 03:28:31,086 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 03:28:33,251 : INFO : topic #7 (0.100): 0.006*"England" + 0.006*"civil parish" + 0.006*"Canada" + 0.005*"Ontario" + 0.005*"National Hockey League" + 0.005*"Cumbria" + 0.004*"Germany" + 0.004*"Suffolk" + 0.003*"Stanley Cup" + 0.003*"London"
2018-01-01 03:28:33,261 : INFO : topic #0

2018-01-01 03:28:45,280 : INFO : topic #4 (0.100): 0.007*"Animal" + 0.004*"UNESCO" + 0.004*"species" + 0.003*"genus" + 0.003*"Chordate" + 0.002*"bird" + 0.002*"television series" + 0.002*"Chordata" + 0.002*"Australia" + 0.002*"North America"
2018-01-01 03:28:45,289 : INFO : topic #3 (0.100): 0.028*"Japan" + 0.014*"Association football" + 0.009*"J. League Division 1" + 0.007*"Brazil" + 0.006*"J. League Division 2" + 0.005*"Midfielder" + 0.004*"Captain (United States)" + 0.004*"Defender (football)" + 0.003*"La Liga" + 0.003*"Japan Football League"
2018-01-01 03:28:45,319 : INFO : topic diff=0.081815, rho=0.102389
2018-01-01 03:28:45,336 : INFO : PROGRESS: pass 3, at document #130000/182776
2018-01-01 03:28:46,036 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 03:28:48,610 : INFO : topic #5 (0.100): 0.005*"India" + 0.005*"Australia" + 0.005*"Spain" + 0.004*"Italy" + 0.004*"Germany" + 0.004*"United Kingdom" + 0.004*"China" + 0.004*"Russia" + 0.003*

2018-01-01 03:29:01,912 : INFO : topic #4 (0.100): 0.006*"Animal" + 0.004*"species" + 0.003*"genus" + 0.003*"UNESCO" + 0.003*"Chordate" + 0.002*"television series" + 0.002*"bird" + 0.002*"Australia" + 0.002*"Africa" + 0.002*"Europe"
2018-01-01 03:29:01,942 : INFO : topic diff=0.089504, rho=0.102389
2018-01-01 03:29:04,911 : INFO : -13.808 per-word bound, 14341.6 perplexity estimate based on a held-out corpus of 2000 documents with 26447 words
2018-01-01 03:29:04,912 : INFO : PROGRESS: pass 3, at document #140000/182776
2018-01-01 03:29:05,612 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 03:29:08,308 : INFO : topic #5 (0.100): 0.006*"Spain" + 0.005*"Sweden" + 0.005*"India" + 0.004*"Australia" + 0.004*"Italy" + 0.004*"Germany" + 0.004*"United Kingdom" + 0.003*"World War II" + 0.003*"China" + 0.003*"Russia"
2018-01-01 03:29:08,319 : INFO : topic #4 (0.100): 0.006*"Animal" + 0.004*"species" + 0.003*"genus" + 0.003*"UNESCO" + 0.003*"Chordate" + 0.

2018-01-01 03:29:21,093 : INFO : topic #7 (0.100): 0.011*"England" + 0.007*"Canada" + 0.007*"National Hockey League" + 0.006*"Chicago Blackhawks" + 0.006*"London" + 0.005*"ice hockey" + 0.005*"Ontario" + 0.005*"Germany" + 0.004*"English people" + 0.004*"civil parish"
2018-01-01 03:29:21,103 : INFO : topic #6 (0.100): 0.014*"United States Army" + 0.009*"Union Army" + 0.008*"Americans" + 0.007*"Australian Labor Party" + 0.006*"United States Navy" + 0.005*"Democratic Party (United States)" + 0.005*"Liberal Party of Australia" + 0.005*"Republican Party (United States)" + 0.005*"United States" + 0.004*"President of the United States"
2018-01-01 03:29:21,132 : INFO : topic diff=0.075002, rho=0.102389
2018-01-01 03:29:21,151 : INFO : PROGRESS: pass 3, at document #150000/182776
2018-01-01 03:29:21,924 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 03:29:24,161 : INFO : topic #7 (0.100): 0.011*"England" + 0.008*"Canada" + 0.006*"National Hockey League"

2018-01-01 03:29:35,727 : INFO : topic #2 (0.100): 0.018*"Central European Time" + 0.018*"Central European Summer Time" + 0.017*"France" + 0.016*"United States" + 0.015*"Country" + 0.008*"Departments of France" + 0.008*"Communes of France" + 0.004*"Mexico" + 0.004*"Regions of France" + 0.004*"Västra Götaland County"
2018-01-01 03:29:35,739 : INFO : topic #5 (0.100): 0.010*"Sweden" + 0.006*"Spain" + 0.006*"India" + 0.005*"Australia" + 0.004*"Italy" + 0.004*"Germany" + 0.004*"Church of Sweden" + 0.004*"Russia" + 0.003*"Finland" + 0.003*"World War II"
2018-01-01 03:29:35,769 : INFO : topic diff=0.077484, rho=0.102389
2018-01-01 03:29:38,640 : INFO : -14.457 per-word bound, 22483.5 perplexity estimate based on a held-out corpus of 2000 documents with 28114 words
2018-01-01 03:29:38,641 : INFO : PROGRESS: pass 3, at document #160000/182776
2018-01-01 03:29:39,361 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 03:29:41,368 : INFO : topic #5 (0.100): 

2018-01-01 03:29:52,967 : INFO : topic #5 (0.100): 0.007*"India" + 0.006*"Sweden" + 0.005*"Spain" + 0.005*"Australia" + 0.005*"Russia" + 0.004*"Italy" + 0.004*"Germany" + 0.003*"World War II" + 0.003*"Soviet Union" + 0.003*"Poland"
2018-01-01 03:29:52,982 : INFO : topic #3 (0.100): 0.008*"Japan" + 0.006*"association football" + 0.006*"Brazil" + 0.005*"Association football" + 0.003*"Real Madrid C.F." + 0.003*"São Paulo" + 0.002*"Rio de Janeiro" + 0.002*"Spain" + 0.002*"Cristiano Ronaldo" + 0.002*"Manchester City F.C."
2018-01-01 03:29:52,992 : INFO : topic #8 (0.100): 0.005*"WWE" + 0.003*"Order of the British Empire" + 0.003*"professional wrestling" + 0.002*"WWE Championship" + 0.002*"English Premiership (rugby union)" + 0.002*"pay-per-view" + 0.002*"Belgians" + 0.001*"Tag team" + 0.001*"John Cena" + 0.001*"Kane (wrestler)"
2018-01-01 03:29:53,021 : INFO : topic diff=0.112064, rho=0.102389
2018-01-01 03:29:53,039 : INFO : PROGRESS: pass 3, at document #170000/182776
2018-01-01 03:29:53,

2018-01-01 03:30:07,189 : INFO : topic #2 (0.100): 0.014*"Switzerland" + 0.014*"municipality" + 0.013*"Cantons of Switzerland" + 0.012*"France" + 0.011*"United States" + 0.009*"district" + 0.007*"Departments of France" + 0.006*"Communes of France" + 0.005*"Bern (canton)" + 0.005*"Central European Time"
2018-01-01 03:30:07,198 : INFO : topic #0 (0.100): 0.005*"United States" + 0.003*"Contemporary R&B" + 0.003*"law" + 0.002*"stroke" + 0.002*"English language" + 0.002*"drainage basin" + 0.002*"soldier" + 0.002*"New York (state)" + 0.001*"murder" + 0.001*"government"
2018-01-01 03:30:07,208 : INFO : topic #8 (0.100): 0.004*"WWE" + 0.002*"Order of the British Empire" + 0.002*"professional wrestling" + 0.002*"WWE Championship" + 0.002*"Knesset" + 0.001*"English Premiership (rugby union)" + 0.001*"Belgians" + 0.001*"pay-per-view" + 0.001*"Randy Orton" + 0.001*"WWE SmackDown"
2018-01-01 03:30:07,237 : INFO : topic diff=0.086452, rho=0.102389
2018-01-01 03:30:10,176 : INFO : -15.761 per-word bo

2018-01-01 03:30:27,140 : INFO : topic #4 (0.100): 0.004*"Earth" + 0.003*"water" + 0.003*"Europe" + 0.002*"Africa" + 0.002*"species" + 0.002*"Animal" + 0.002*"Asia" + 0.002*"WP:MOSNUM" + 0.002*"planet" + 0.002*"China"
2018-01-01 03:30:27,154 : INFO : topic #2 (0.100): 0.016*"France" + 0.012*"United States" + 0.012*"Switzerland" + 0.011*"municipality" + 0.010*"Cantons of Switzerland" + 0.008*"district" + 0.007*"Departments of France" + 0.007*"city" + 0.006*"Communes of France" + 0.004*"Capital city"
2018-01-01 03:30:27,168 : INFO : topic #3 (0.100): 0.030*"Football League Championship" + 0.014*"Football League One" + 0.009*"English Premier League" + 0.007*"Japan" + 0.005*"Brazil" + 0.004*"Association football" + 0.004*"association football" + 0.003*"Football League Two" + 0.003*"Bachelor of Arts" + 0.003*"La Liga"
2018-01-01 03:30:27,178 : INFO : topic #8 (0.100): 0.004*"Premier" + 0.003*"professional wrestler" + 0.003*"rugby" + 0.002*"WWE" + 0.002*"Order of the British Empire" + 0.002*

2018-01-01 03:30:43,628 : INFO : topic #6 (0.100): 0.011*"footballer" + 0.009*"2014" + 0.008*"2015" + 0.006*"2016" + 0.005*"2017" + 0.005*"2013" + 0.005*"United States" + 0.004*"2012" + 0.004*"1945" + 0.004*"President"
2018-01-01 03:30:43,658 : INFO : topic diff=0.110091, rho=0.101856
2018-01-01 03:30:43,679 : INFO : PROGRESS: pass 4, at document #16000/182776
2018-01-01 03:30:44,366 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 03:30:46,718 : INFO : topic #3 (0.100): 0.017*"Football League Championship" + 0.009*"Football League One" + 0.006*"English Premier League" + 0.006*"Japan" + 0.005*"Brazil" + 0.004*"German Bundesliga" + 0.004*"Association football" + 0.003*"Football League Two" + 0.002*"association football" + 0.002*"Bachelor of Arts"
2018-01-01 03:30:46,731 : INFO : topic #2 (0.100): 0.015*"United States" + 0.014*"France" + 0.009*"city" + 0.009*"Switzerland" + 0.007*"municipality" + 0.006*"Cantons of Switzerland" + 0.005*"district" + 

2018-01-01 03:31:06,864 : INFO : topic #4 (0.100): 0.004*"Animal" + 0.003*"species" + 0.003*"water" + 0.003*"Earth" + 0.003*"Africa" + 0.002*"Europe" + 0.002*"animal" + 0.002*"plant" + 0.002*"Asia" + 0.002*"genus"
2018-01-01 03:31:06,877 : INFO : topic #8 (0.100): 0.002*"Premier" + 0.002*"Order of the British Empire" + 0.002*"professional wrestler" + 0.001*"soprano" + 0.001*"Dorset" + 0.001*"Paramount Pictures" + 0.001*"WWE" + 0.001*"rugby" + 0.001*"tenor" + 0.001*"Metro-Goldwyn-Mayer"
2018-01-01 03:31:06,890 : INFO : topic #1 (0.100): 0.004*"Munch Museum" + 0.003*"Christianity" + 0.002*"Jesus" + 0.002*"Roman Empire" + 0.002*"Rome" + 0.002*"philosopher" + 0.002*"Roman Catholic Church" + 0.002*"bishop" + 0.002*"Julian calendar" + 0.002*"Bible"
2018-01-01 03:31:06,902 : INFO : topic #2 (0.100): 0.017*"United States" + 0.012*"France" + 0.009*"city" + 0.007*"Switzerland" + 0.005*"municipality" + 0.004*"Cantons of Switzerland" + 0.004*"U.S. state" + 0.003*"Florida" + 0.003*"district" + 0.00

2018-01-01 03:31:24,720 : INFO : topic #3 (0.100): 0.006*"Serie A" + 0.006*"Football League Championship" + 0.005*"Japan" + 0.004*"La Liga" + 0.004*"Brazil" + 0.003*"German Bundesliga" + 0.003*"Football League One" + 0.003*"English Premier League" + 0.003*"Germany national football team" + 0.003*"Association football"
2018-01-01 03:31:24,745 : INFO : topic diff=0.146279, rho=0.101856
2018-01-01 03:31:24,765 : INFO : PROGRESS: pass 4, at document #38000/182776
2018-01-01 03:31:25,447 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 03:31:28,231 : INFO : topic #4 (0.100): 0.004*"Animal" + 0.003*"species" + 0.003*"Earth" + 0.003*"water" + 0.003*"Africa" + 0.002*"Europe" + 0.002*"animal" + 0.002*"genus" + 0.002*"Asia" + 0.002*"Chordate"
2018-01-01 03:31:28,242 : INFO : topic #2 (0.100): 0.015*"Switzerland" + 0.014*"municipality" + 0.012*"Cantons of Switzerland" + 0.012*"United States" + 0.010*"district" + 0.009*"France" + 0.006*"Delaware" + 0.006*"ci

2018-01-01 03:31:45,130 : INFO : topic diff=0.102929, rho=0.101856
2018-01-01 03:31:45,150 : INFO : PROGRESS: pass 4, at document #48000/182776
2018-01-01 03:31:45,823 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 03:31:48,235 : INFO : topic #7 (0.100): 0.008*"Germany" + 0.008*"England" + 0.006*"Montreal Canadiens" + 0.005*"Detroit Red Wings" + 0.005*"Scuderia Ferrari" + 0.004*"Boston Bruins" + 0.004*"London" + 0.004*"New York Rangers" + 0.004*"Toronto Maple Leafs" + 0.003*"Chicago Blackhawks"
2018-01-01 03:31:48,249 : INFO : topic #1 (0.100): 0.008*"Mayenne" + 0.003*"Christianity" + 0.003*"Jesus" + 0.002*"Bible" + 0.002*"Christian" + 0.002*"Roman Empire" + 0.002*"Rome" + 0.002*"Florence" + 0.002*"Roman Catholic Church" + 0.001*"Munch Museum"
2018-01-01 03:31:48,259 : INFO : topic #8 (0.100): 0.005*"WWE" + 0.002*"WWE Championship" + 0.002*"Liège (province)" + 0.002*"Order of the British Empire" + 0.002*"Triple H" + 0.001*"professional wrestlin

2018-01-01 03:32:02,861 : INFO : topic #6 (0.100): 0.006*"2005" + 0.006*"2007" + 0.006*"1999" + 0.005*"2004" + 0.005*"2006" + 0.004*"2008" + 0.004*"2002" + 0.004*"United States" + 0.004*"2001" + 0.004*"2003"
2018-01-01 03:32:02,875 : INFO : topic #3 (0.100): 0.020*"Football League One" + 0.019*"Nord-Pas-de-Calais" + 0.017*"Football League Championship" + 0.015*"Football League Two" + 0.008*"Japan Soccer League" + 0.007*"J. League Division 1" + 0.007*"Ligue 1" + 0.006*"English Premier League" + 0.006*"Serie A" + 0.006*"Association football"
2018-01-01 03:32:02,885 : INFO : topic #8 (0.100): 0.004*"WWE" + 0.002*"WWE Championship" + 0.002*"professional wrestling" + 0.002*"Triple H" + 0.001*"Order of the British Empire" + 0.001*"Liège (province)" + 0.001*"John Cena" + 0.001*"Punk rock" + 0.001*"The Undertaker" + 0.001*"WWE Intercontinental Championship"
2018-01-01 03:32:02,893 : INFO : topic #0 (0.100): 0.005*"United States" + 0.003*"English language" + 0.002*"computer" + 0.002*"music" + 0

2018-01-01 03:32:18,206 : INFO : topic #3 (0.100): 0.015*"Serie A" + 0.014*"Brazil" + 0.014*"La Liga" + 0.014*"Ligue 1" + 0.013*"J. League Division 1" + 0.013*"Association football" + 0.012*"Brazil national football team" + 0.012*"Nord-Pas-de-Calais" + 0.010*"Japan" + 0.009*"German Bundesliga"
2018-01-01 03:32:18,220 : INFO : topic #5 (0.100): 0.008*"Pakistan" + 0.006*"Romania" + 0.005*"Germany" + 0.005*"United Kingdom" + 0.005*"Italy" + 0.005*"Australia" + 0.005*"India" + 0.005*"Spain" + 0.005*"France" + 0.004*"Russia"
2018-01-01 03:32:18,231 : INFO : topic #2 (0.100): 0.056*"France" + 0.045*"Communes of France" + 0.040*"Departments of France" + 0.039*"Regions of France" + 0.038*"United States" + 0.026*"city" + 0.009*"Iowa" + 0.008*"Pas-de-Calais" + 0.008*"departments of France" + 0.008*"Calvados (department)"
2018-01-01 03:32:18,239 : INFO : topic #4 (0.100): 0.006*"Animal" + 0.003*"species" + 0.003*"Chordate" + 0.002*"Plant" + 0.002*"genus" + 0.002*"Magnoliopsida" + 0.002*"Mammal" +

2018-01-01 03:32:30,834 : INFO : topic #4 (0.100): 0.007*"Animal" + 0.004*"species" + 0.003*"North America" + 0.002*"plant" + 0.002*"genus" + 0.002*"Chordate" + 0.002*"Earth" + 0.002*"Europe" + 0.002*"Mammal" + 0.002*"Africa"
2018-01-01 03:32:30,848 : INFO : topic #6 (0.100): 0.018*"Campeonato Brasileiro Série A" + 0.009*"Japan national football team" + 0.005*"North American Central Time Zone" + 0.004*"Spain national football team" + 0.004*"2009" + 0.004*"2005" + 0.004*"2007" + 0.004*"1999" + 0.003*"United States" + 0.003*"2006"
2018-01-01 03:32:30,859 : INFO : topic #1 (0.100): 0.006*"F.C. Internazionale Milano" + 0.004*"British Rail" + 0.003*"Italy" + 0.003*"S.S. Lazio" + 0.003*"A.F.C. Ajax" + 0.003*"A.C.F. Fiorentina" + 0.003*"Parma F.C." + 0.003*"Torino F.C. 1906" + 0.002*"Plantae" + 0.002*"Bologna F.C. 1909"
2018-01-01 03:32:30,888 : INFO : topic diff=0.080121, rho=0.101856
2018-01-01 03:32:33,806 : INFO : -13.337 per-word bound, 10347.3 perplexity estimate based on a held-out cor

2018-01-01 03:32:45,872 : INFO : topic #9 (0.100): 0.014*"United States" + 0.007*"Americans" + 0.005*"Noel Gallagher" + 0.004*"California" + 0.004*"actor" + 0.004*"singer" + 0.004*"New York City" + 0.003*"movie" + 0.003*"Liam Gallagher" + 0.003*"guitar"
2018-01-01 03:32:45,882 : INFO : topic #1 (0.100): 0.005*"F.C. Internazionale Milano" + 0.004*"A.F.C. Ajax" + 0.003*"S.S. Lazio" + 0.003*"A.C.F. Fiorentina" + 0.003*"Parma F.C." + 0.003*"Italy" + 0.002*"Emperor of Japan" + 0.002*"British Rail" + 0.002*"P.S.V. Eindhoven" + 0.002*"Plantae"
2018-01-01 03:32:45,891 : INFO : topic #0 (0.100): 0.004*"English language" + 0.003*"United States" + 0.002*"F.C. Bayern Munich" + 0.002*"chemical compound" + 0.002*"ion" + 0.002*"Microsoft Windows" + 0.002*"computer" + 0.002*"oxidation state" + 0.002*"law" + 0.002*"United Kingdom"
2018-01-01 03:32:45,922 : INFO : topic diff=0.133096, rho=0.101856
2018-01-01 03:32:45,942 : INFO : PROGRESS: pass 4, at document #90000/182776
2018-01-01 03:32:46,621 : INFO

2018-01-01 03:32:58,146 : INFO : topic #1 (0.100): 0.004*"Nobel Prize in Physiology or Medicine" + 0.003*"F.C. Internazionale Milano" + 0.002*"Plantae" + 0.002*"A.F.C. Ajax" + 0.002*"Italy" + 0.002*"Bible" + 0.002*"DNA" + 0.002*"S.S. Lazio" + 0.002*"A.C.F. Fiorentina" + 0.002*"Parma F.C."
2018-01-01 03:32:58,154 : INFO : topic #4 (0.100): 0.008*"Animal" + 0.005*"species" + 0.003*"Chordate" + 0.003*"North America" + 0.003*"genus" + 0.002*"bird" + 0.002*"dinosaur" + 0.002*"plant" + 0.002*"Europe" + 0.002*"mammal"
2018-01-01 03:32:58,162 : INFO : topic #0 (0.100): 0.007*"chemical compound" + 0.006*"ion" + 0.004*"oxidation state" + 0.003*"United States" + 0.003*"English language" + 0.002*"oxidizing agent" + 0.002*"acid" + 0.002*"United Kingdom" + 0.002*"Heavy metal music" + 0.001*"reducing agent"
2018-01-01 03:32:58,187 : INFO : topic diff=0.103166, rho=0.101856
2018-01-01 03:33:02,001 : INFO : -17.229 per-word bound, 153623.8 perplexity estimate based on a held-out corpus of 2000 document

2018-01-01 03:33:14,921 : INFO : topic #6 (0.100): 0.007*"Track and field athletics" + 0.006*"Japan national football team" + 0.005*"Campeonato Brasileiro Série A" + 0.004*"Boxing" + 0.003*"2011" + 0.003*"2006" + 0.003*"2000" + 0.003*"Sportsperson" + 0.003*"2005" + 0.003*"Americans"
2018-01-01 03:33:14,930 : INFO : topic #0 (0.100): 0.004*"chemical compound" + 0.003*"ion" + 0.003*"United States" + 0.003*"English language" + 0.002*"oxidation state" + 0.002*"United Kingdom" + 0.002*"Arista Records" + 0.002*"computer" + 0.001*"acid" + 0.001*"Internet"
2018-01-01 03:33:14,941 : INFO : topic #9 (0.100): 0.019*"United States" + 0.009*"Americans" + 0.005*"movie" + 0.004*"actor" + 0.004*"California" + 0.004*"singer" + 0.004*"New York City" + 0.003*"actress" + 0.003*"Dutchess County, New York" + 0.003*"Los Angeles"
2018-01-01 03:33:14,974 : INFO : topic diff=0.179897, rho=0.101856
2018-01-01 03:33:14,994 : INFO : PROGRESS: pass 4, at document #110000/182776
2018-01-01 03:33:15,711 : INFO : merg

2018-01-01 03:33:28,915 : INFO : topic #5 (0.100): 0.004*"Australia" + 0.004*"India" + 0.004*"Russia" + 0.003*"2012 Summer Olympics" + 0.003*"United Kingdom" + 0.003*"Italy" + 0.003*"Germany" + 0.003*"Belgium" + 0.003*"Maharashtra" + 0.003*"Spain"
2018-01-01 03:33:28,926 : INFO : topic #6 (0.100): 0.005*"Japan national football team" + 0.004*"Track and field athletics" + 0.003*"Sportsperson" + 0.003*"Campeonato Brasileiro Série A" + 0.003*"Republican Party (United States)" + 0.002*"Boxing" + 0.002*"Americans" + 0.002*"President of the United States" + 0.002*"United States" + 0.002*"2000"
2018-01-01 03:33:28,935 : INFO : topic #9 (0.100): 0.015*"United States" + 0.009*"Americans" + 0.005*"movie" + 0.004*"California" + 0.004*"New York City" + 0.004*"actor" + 0.003*"television" + 0.003*"actress" + 0.003*"Los Angeles" + 0.003*"singer"
2018-01-01 03:33:28,964 : INFO : topic diff=0.164205, rho=0.101856
2018-01-01 03:33:31,945 : INFO : -16.027 per-word bound, 66772.3 perplexity estimate based

2018-01-01 03:33:46,799 : INFO : topic #9 (0.100): 0.019*"Americans" + 0.017*"United States" + 0.007*"movie" + 0.005*"California" + 0.005*"New York City" + 0.004*"actress" + 0.004*"actor" + 0.004*"television" + 0.003*"Los Angeles" + 0.003*"New York"
2018-01-01 03:33:46,810 : INFO : topic #2 (0.100): 0.030*"France" + 0.018*"Communes of France" + 0.017*"Departments of France" + 0.014*"United States" + 0.007*"Virginia" + 0.006*"Sergeant#United States" + 0.006*"Allier" + 0.005*"city" + 0.003*"Mexico" + 0.003*"United States Army Center of Military History"
2018-01-01 03:33:46,818 : INFO : topic #7 (0.100): 0.007*"England" + 0.006*"Canada" + 0.005*"National Hockey League" + 0.005*"civil parish" + 0.005*"Ontario" + 0.004*"London" + 0.004*"Chicago Blackhawks" + 0.004*"ice hockey" + 0.004*"Germany" + 0.003*"Cumbria"
2018-01-01 03:33:46,832 : INFO : topic #1 (0.100): 0.005*"Japanese era name" + 0.004*"Louis-Frédéric" + 0.003*"Isaac Titsingh" + 0.003*"Julian calendar" + 0.002*"National Diet Libra

2018-01-01 03:34:03,261 : INFO : topic #4 (0.100): 0.006*"Animal" + 0.004*"species" + 0.003*"genus" + 0.003*"UNESCO" + 0.003*"Chordate" + 0.002*"television series" + 0.002*"bird" + 0.002*"Australia" + 0.002*"Africa" + 0.002*"Europe"
2018-01-01 03:34:03,271 : INFO : topic #0 (0.100): 0.004*"United States" + 0.003*"disability" + 0.003*"English language" + 0.002*"television program" + 0.002*"military" + 0.002*"blind" + 0.002*"Contemporary R&B" + 0.001*"Wikipedia" + 0.001*"United Kingdom" + 0.001*"Paris"
2018-01-01 03:34:03,280 : INFO : topic #7 (0.100): 0.010*"England" + 0.007*"Canada" + 0.006*"civil parish" + 0.005*"Chicago Blackhawks" + 0.005*"National Hockey League" + 0.005*"Ontario" + 0.004*"Suffolk" + 0.004*"London" + 0.004*"ice hockey" + 0.004*"Germany"
2018-01-01 03:34:03,312 : INFO : topic diff=0.089041, rho=0.101856
2018-01-01 03:34:06,335 : INFO : -13.802 per-word bound, 14287.4 perplexity estimate based on a held-out corpus of 2000 documents with 26447 words
2018-01-01 03:34:06

2018-01-01 03:34:22,551 : INFO : topic #2 (0.100): 0.019*"France" + 0.018*"United States" + 0.009*"Central European Time" + 0.009*"Central European Summer Time" + 0.008*"Departments of France" + 0.008*"Communes of France" + 0.005*"Country" + 0.005*"city" + 0.004*"List of sovereign states" + 0.003*"Virginia"
2018-01-01 03:34:22,563 : INFO : topic #1 (0.100): 0.003*"Rome" + 0.003*"aircraft" + 0.002*"Julian calendar" + 0.002*"Japanese era name" + 0.002*"Nobel Prize in Physiology or Medicine" + 0.002*"engine" + 0.002*"Christianity" + 0.002*"Italy" + 0.002*"Louis-Frédéric" + 0.002*"2012 Summer Paralympics"
2018-01-01 03:34:22,595 : INFO : topic diff=0.074594, rho=0.101856
2018-01-01 03:34:22,616 : INFO : PROGRESS: pass 4, at document #150000/182776
2018-01-01 03:34:23,377 : INFO : merging changes from 2000 documents into a model of 182776 documents
2018-01-01 03:34:25,623 : INFO : topic #0 (0.100): 0.005*"United States" + 0.003*"English language" + 0.002*"stroke" + 0.002*"natural causes" + 

2018-01-01 03:34:37,280 : INFO : topic #9 (0.100): 0.030*"Sweden" + 0.029*"Americans" + 0.015*"United States" + 0.011*"Counties of Sweden" + 0.010*"Municipalities of Sweden" + 0.010*"Provinces of Sweden" + 0.009*"Statistics Sweden" + 0.008*"movie" + 0.007*"California" + 0.006*"New York City"
2018-01-01 03:34:37,292 : INFO : topic #8 (0.100): 0.008*"WWE" + 0.004*"professional wrestling" + 0.003*"English Premiership (rugby union)" + 0.003*"WWE Championship" + 0.002*"Order of the British Empire" + 0.002*"pay-per-view" + 0.002*"Tag team" + 0.002*"Triple H" + 0.002*"The Undertaker" + 0.002*"John Cena"
2018-01-01 03:34:37,322 : INFO : topic diff=0.077259, rho=0.101856
2018-01-01 03:34:40,248 : INFO : -14.450 per-word bound, 22386.3 perplexity estimate based on a held-out corpus of 2000 documents with 28114 words
2018-01-01 03:34:40,249 : INFO : PROGRESS: pass 4, at document #160000/182776
2018-01-01 03:34:40,976 : INFO : merging changes from 2000 documents into a model of 182776 documents
20

2018-01-01 03:34:54,441 : INFO : topic #3 (0.100): 0.008*"Japan" + 0.006*"association football" + 0.006*"Brazil" + 0.005*"Association football" + 0.003*"Real Madrid C.F." + 0.003*"São Paulo" + 0.002*"Rio de Janeiro" + 0.002*"Spain" + 0.002*"Cristiano Ronaldo" + 0.002*"Manchester City F.C."
2018-01-01 03:34:54,453 : INFO : topic #5 (0.100): 0.006*"India" + 0.006*"Sweden" + 0.005*"Spain" + 0.005*"Australia" + 0.005*"Russia" + 0.004*"Italy" + 0.004*"Germany" + 0.003*"World War II" + 0.003*"Soviet Union" + 0.003*"Poland"
2018-01-01 03:34:54,466 : INFO : topic #6 (0.100): 0.014*"Americans" + 0.008*"Republican Party (United States)" + 0.006*"Democratic Party (United States)" + 0.005*"United States House of Representatives" + 0.005*"United States Army" + 0.005*"United States Senate" + 0.005*"United States" + 0.005*"President of the United States" + 0.004*"Ronald Reagan" + 0.003*"Australian Labor Party"
2018-01-01 03:34:54,491 : INFO : topic diff=0.111647, rho=0.101856
2018-01-01 03:34:54,511 

2018-01-01 03:35:08,649 : INFO : topic #1 (0.100): 0.002*"Rome" + 0.001*"Think of the children" + 0.001*"aircraft" + 0.001*"Italy" + 0.001*"Roman Catholic Church" + 0.001*"bacteria" + 0.001*"Christianity" + 0.001*"philosopher" + 0.001*"jurisdiction" + 0.001*"Christian"
2018-01-01 03:35:08,661 : INFO : topic #2 (0.100): 0.014*"Switzerland" + 0.014*"municipality" + 0.013*"Cantons of Switzerland" + 0.012*"France" + 0.011*"United States" + 0.009*"district" + 0.007*"Departments of France" + 0.006*"Communes of France" + 0.005*"Bern (canton)" + 0.005*"Central European Time"
2018-01-01 03:35:08,671 : INFO : topic #7 (0.100): 0.008*"England" + 0.008*"BBC Two" + 0.007*"Grand Prix (TV programme)" + 0.007*"Prefectures in France" + 0.007*"Canada" + 0.007*"Germany" + 0.005*"London" + 0.004*"BBC One" + 0.004*"National Hockey League" + 0.004*"English people"
2018-01-01 03:35:08,700 : INFO : topic diff=0.086053, rho=0.101856
2018-01-01 03:35:11,633 : INFO : -15.749 per-word bound, 55080.8 perplexity es