# Word Embeddings notícias de 2018

Os dados são notícias de 2018, coletadas dos principais jornais do Brasil:
* `Carta Capital`, 
* `Estadao`, 
* `Folha de São Paulo`, 
* `O Antagonista`, 
* `O Globo`, 
* `Veja`

Uma análise detalhada dos dados está disponível [aqui](https://pages.github.com/). Objetivo deste notebook é utilizar o modelo word2vec para gerar embeddings a partir dos textos dessas notícias. A arquitetura utilizada pelo modelo é a skip-gram, cada palavra é representada por um vetor de 300 dimensões.

In [1]:
# importing modules and setting log format
import re
import nltk
import gensim, logging
import pandas as pd
from nltk.corpus import stopwords
nltk.download('stopwords')
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
PUNCTUATION = u'[^a-zA-Z0-9áéíóúÁÉÍÓÚâêîôÂÊÎÔãõÃÕçÇ%]' # define news punctuation 

[nltk_data] Downloading package stopwords to /home/diogo/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Definindo Lexicons e Funções

In [2]:
# Mapping words in lexicons
map_lexicons = {'a ponto':'a_ponto','ao menos ':'ao_menos ','ate mesmo ':'ate_mesmo ',
                'nao mais que ':'nao_mais_que ','nem mesmo ':'nem_mesmo ','no minimo ':'no_minimo ',
                'o unico ':'o_unico ','a unica ':'a_unica ','pelo menos ':'pelo_menos ',
                'quando menos ':'quando_menos ','quando muito ':'quando_muito ','a par disso ':'a_par_disso ',
                'e nao ':'e_nao ','em suma ':'em_suma ','mas tambem ': 'mas_tambem ','muito menos ':'muito_menos ',
                'nao so ':'nao_so ','ou mesmo ':'ou_mesmo ','por sinal ':'por_sinal ','com isso ':'com_isso ',
                'como consequencia ':'como_consequencia ','de modo que ':'de_modo_que ','deste modo ':'deste_modo ',
                'em decorrencia ':'em_decorrencia ','nesse sentido ':'nesse_sentido ','por causa ':'por_causa ',
                'por conseguinte ':'por_conseguinte ','por essa razao ':'por_essa_razao ','por isso ':'por_isso ',
                'sendo assim ':'sendo_assim ','ou entao ':'ou_entao ','ou mesmo ':'ou_mesmo ','como se ':'como_se ',
                'de um lado ':'de_um_lado ','por outro lado ':'por_outro_lado ','mais que ':'mais_que ',
                'menos que ':'menos_que ','desde que ':'desde_que ','do contrario ':'do_contrario ',
                'em lugar ':'em_lugar ','em vez ':'em_vez','no caso ':'no_caso ','se acaso ':'se_acaso ',
                'de certa forma ':'de_certa_forma ','desse modo ':'desse_modo ','em funcao ':'em_funcao ',
                'isso e ':'isso_e ','ja que ':'ja_que ','na medida que ':'na_medida_que ','nessa direcao ':'nessa_direcao ',
                'no intuito ':'no_intuito ','no mesmo sentido ':'no_mesmo_sentido ','ou seja ':'ou_seja ',
                'uma vez que ':'uma_vez_que ','tanto que ':'tanto_que ','visto que ':'visto_que ','ainda que ':'ainda_que ',
                'ao contrario ':'ao_contrario ','apesar de ':'apesar_de ','fora isso ':'fora_isso ','mesmo que ':'mesmo_que ',
                'nao obstante ':'nao_obstante ','nao fosse isso ':'nao_fosse_isso ','no entanto ':'no_entanto ',
                'para tanto ':'para_tanto ','pelo contrario ':'pelo_contrario ','por sua vez ':'por_sua_vez ','posto que ':'posto_que '
               }

In [3]:
# Convert word from text into lexicons
def word2lexicon(text):
    for k, v in map_lexicons.items():
        text = str(text).replace(k,v)
    return text

In [4]:
# function for processing sentences
def processSentences(text):
    stop_words = stopwords.words('portuguese') # load stop words
    text = re.sub(PUNCTUATION, ' ', str(text)) # remove punctuation from text
    text = str(text).split() # split sentences by words
    text = [word for word in text if word not in stop_words] # Remove stopwords
    return text

### Carregando Notícias

In [5]:
# load data
carta_capital = pd.read_csv("data/carta_capital.csv") 
estadao = pd.read_csv("data/estadao.csv")
folha = pd.read_csv("data/folha.csv") 
oantagonista = pd.read_csv("data/oantagonista.csv") 
oglobo = pd.read_csv("data/oglobo.csv") 
veja = pd.read_csv("data/veja.csv") 
# concat all news
news = pd.concat((carta_capital, folha, estadao, oantagonista, oglobo, veja), sort=False, ignore_index=True)

In [6]:
# processing news text
news['text'] = news['text'].apply(word2lexicon) 
news['text'] = news['text'].apply(processSentences)

### Treinando Word2Vec

In [7]:
# Train word2vec model - settings: approach skip-gram, size embeddings vectors 300 
model = gensim.models.Word2Vec(news['text'], workers=4, size=300, sg=1, window=5, min_count=5)
# Saving model
model.save('embeddings/news_w2v.bin')
# Saving embeddings
model.wv.save_word2vec_format("embeddings/news_vectors.bin")

2019-04-13 11:17:02,528 : INFO : collecting all words and their counts
2019-04-13 11:17:02,539 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2019-04-13 11:17:03,943 : INFO : PROGRESS: at sentence #10000, processed 4109020 words, keeping 118262 word types
2019-04-13 11:17:04,986 : INFO : PROGRESS: at sentence #20000, processed 7263023 words, keeping 148407 word types
2019-04-13 11:17:06,064 : INFO : PROGRESS: at sentence #30000, processed 10406672 words, keeping 172586 word types
2019-04-13 11:17:06,757 : INFO : PROGRESS: at sentence #40000, processed 12444180 words, keeping 181023 word types
2019-04-13 11:17:06,970 : INFO : PROGRESS: at sentence #50000, processed 13013149 words, keeping 184314 word types
2019-04-13 11:17:08,144 : INFO : PROGRESS: at sentence #60000, processed 16089672 words, keeping 221174 word types
2019-04-13 11:17:09,054 : INFO : PROGRESS: at sentence #70000, processed 18755851 words, keeping 230263 word types
2019-04-13 11:17:09,862 : I

2019-04-13 11:18:16,940 : INFO : EPOCH 1 - PROGRESS: at 4.50% examples, 76711 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:18:18,066 : INFO : EPOCH 1 - PROGRESS: at 4.63% examples, 76725 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:18:19,276 : INFO : EPOCH 1 - PROGRESS: at 4.77% examples, 76988 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:18:20,351 : INFO : EPOCH 1 - PROGRESS: at 4.87% examples, 76845 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:18:21,374 : INFO : EPOCH 1 - PROGRESS: at 4.98% examples, 76975 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:18:22,408 : INFO : EPOCH 1 - PROGRESS: at 5.08% examples, 77257 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:18:23,744 : INFO : EPOCH 1 - PROGRESS: at 5.19% examples, 77091 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:18:24,798 : INFO : EPOCH 1 - PROGRESS: at 5.34% examples, 77520 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:18:25,971 : INFO : EPOCH 1 - PROGRESS: at 5.41% examples, 76750 words/s, in_qsize 7, out_qsize 0
2

2019-04-13 11:19:40,149 : INFO : EPOCH 1 - PROGRESS: at 14.19% examples, 74778 words/s, in_qsize 8, out_qsize 1
2019-04-13 11:19:41,806 : INFO : EPOCH 1 - PROGRESS: at 14.35% examples, 74570 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:19:42,870 : INFO : EPOCH 1 - PROGRESS: at 14.49% examples, 74555 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:19:43,921 : INFO : EPOCH 1 - PROGRESS: at 14.60% examples, 74533 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:19:45,037 : INFO : EPOCH 1 - PROGRESS: at 14.74% examples, 74561 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:19:46,211 : INFO : EPOCH 1 - PROGRESS: at 14.90% examples, 74623 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:19:47,215 : INFO : EPOCH 1 - PROGRESS: at 15.00% examples, 74569 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:19:48,467 : INFO : EPOCH 1 - PROGRESS: at 15.12% examples, 74452 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:19:49,540 : INFO : EPOCH 1 - PROGRESS: at 15.24% examples, 74433 words/s, in_qsize 7, out_

2019-04-13 11:21:02,210 : INFO : EPOCH 1 - PROGRESS: at 30.12% examples, 75423 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:21:03,260 : INFO : EPOCH 1 - PROGRESS: at 30.27% examples, 75463 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:21:04,513 : INFO : EPOCH 1 - PROGRESS: at 30.40% examples, 75378 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:21:05,613 : INFO : EPOCH 1 - PROGRESS: at 30.54% examples, 75355 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:21:06,890 : INFO : EPOCH 1 - PROGRESS: at 30.63% examples, 75218 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:21:08,121 : INFO : EPOCH 1 - PROGRESS: at 30.75% examples, 75148 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:21:09,136 : INFO : EPOCH 1 - PROGRESS: at 30.89% examples, 75198 words/s, in_qsize 7, out_qsize 1
2019-04-13 11:21:10,180 : INFO : EPOCH 1 - PROGRESS: at 31.05% examples, 75280 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:21:11,251 : INFO : EPOCH 1 - PROGRESS: at 31.14% examples, 75178 words/s, in_qsize 7, out_

2019-04-13 11:22:23,852 : INFO : EPOCH 1 - PROGRESS: at 40.89% examples, 75186 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:22:24,920 : INFO : EPOCH 1 - PROGRESS: at 40.91% examples, 75232 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:22:25,989 : INFO : EPOCH 1 - PROGRESS: at 40.92% examples, 75236 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:22:27,086 : INFO : EPOCH 1 - PROGRESS: at 40.93% examples, 75262 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:22:28,252 : INFO : EPOCH 1 - PROGRESS: at 40.96% examples, 75270 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:22:29,341 : INFO : EPOCH 1 - PROGRESS: at 41.08% examples, 75317 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:22:30,540 : INFO : EPOCH 1 - PROGRESS: at 41.19% examples, 75338 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:22:31,686 : INFO : EPOCH 1 - PROGRESS: at 41.31% examples, 75375 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:22:32,719 : INFO : EPOCH 1 - PROGRESS: at 41.42% examples, 75398 words/s, in_qsize 7, out_

2019-04-13 11:23:48,711 : INFO : EPOCH 1 - PROGRESS: at 57.37% examples, 73120 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:23:49,751 : INFO : EPOCH 1 - PROGRESS: at 58.00% examples, 73075 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:23:50,890 : INFO : EPOCH 1 - PROGRESS: at 59.02% examples, 73113 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:23:52,034 : INFO : EPOCH 1 - PROGRESS: at 60.06% examples, 73151 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:23:53,035 : INFO : EPOCH 1 - PROGRESS: at 61.06% examples, 73190 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:23:54,042 : INFO : EPOCH 1 - PROGRESS: at 61.90% examples, 73202 words/s, in_qsize 8, out_qsize 1
2019-04-13 11:23:55,268 : INFO : EPOCH 1 - PROGRESS: at 63.08% examples, 73249 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:23:56,352 : INFO : EPOCH 1 - PROGRESS: at 63.91% examples, 73319 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:23:57,451 : INFO : EPOCH 1 - PROGRESS: at 64.04% examples, 73312 words/s, in_qsize 7, out_

2019-04-13 11:25:09,168 : INFO : EPOCH 1 - PROGRESS: at 74.20% examples, 75055 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:25:10,258 : INFO : EPOCH 1 - PROGRESS: at 74.36% examples, 75065 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:25:11,301 : INFO : EPOCH 1 - PROGRESS: at 74.52% examples, 75104 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:25:12,308 : INFO : EPOCH 1 - PROGRESS: at 74.66% examples, 75127 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:25:13,456 : INFO : EPOCH 1 - PROGRESS: at 74.81% examples, 75128 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:25:14,481 : INFO : EPOCH 1 - PROGRESS: at 74.97% examples, 75170 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:25:15,495 : INFO : EPOCH 1 - PROGRESS: at 75.13% examples, 75193 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:25:16,589 : INFO : EPOCH 1 - PROGRESS: at 75.27% examples, 75203 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:25:17,654 : INFO : EPOCH 1 - PROGRESS: at 75.43% examples, 75236 words/s, in_qsize 7, out_

2019-04-13 11:26:32,097 : INFO : EPOCH 1 - PROGRESS: at 88.28% examples, 75797 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:26:33,116 : INFO : EPOCH 1 - PROGRESS: at 88.47% examples, 75816 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:26:34,167 : INFO : EPOCH 1 - PROGRESS: at 88.64% examples, 75831 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:26:35,240 : INFO : EPOCH 1 - PROGRESS: at 88.83% examples, 75860 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:26:36,254 : INFO : EPOCH 1 - PROGRESS: at 89.01% examples, 75874 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:26:37,290 : INFO : EPOCH 1 - PROGRESS: at 89.20% examples, 75898 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:26:38,448 : INFO : EPOCH 1 - PROGRESS: at 89.39% examples, 75912 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:26:39,531 : INFO : EPOCH 1 - PROGRESS: at 89.59% examples, 75938 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:26:40,655 : INFO : EPOCH 1 - PROGRESS: at 89.80% examples, 75959 words/s, in_qsize 7, out_

2019-04-13 11:27:50,536 : INFO : EPOCH 2 - PROGRESS: at 1.05% examples, 83481 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:27:51,577 : INFO : EPOCH 2 - PROGRESS: at 1.15% examples, 84891 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:27:52,661 : INFO : EPOCH 2 - PROGRESS: at 1.22% examples, 84352 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:27:53,752 : INFO : EPOCH 2 - PROGRESS: at 1.44% examples, 84632 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:27:54,800 : INFO : EPOCH 2 - PROGRESS: at 1.54% examples, 84894 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:27:55,918 : INFO : EPOCH 2 - PROGRESS: at 1.63% examples, 84309 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:27:56,935 : INFO : EPOCH 2 - PROGRESS: at 1.76% examples, 84902 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:27:58,161 : INFO : EPOCH 2 - PROGRESS: at 1.86% examples, 84475 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:27:59,173 : INFO : EPOCH 2 - PROGRESS: at 1.97% examples, 85003 words/s, in_qsize 7, out_qsize 0
2

2019-04-13 11:29:13,582 : INFO : EPOCH 2 - PROGRESS: at 11.43% examples, 85753 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:29:14,590 : INFO : EPOCH 2 - PROGRESS: at 11.56% examples, 85740 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:29:15,708 : INFO : EPOCH 2 - PROGRESS: at 11.72% examples, 85732 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:29:16,807 : INFO : EPOCH 2 - PROGRESS: at 11.86% examples, 85746 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:29:17,810 : INFO : EPOCH 2 - PROGRESS: at 12.01% examples, 85756 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:29:18,824 : INFO : EPOCH 2 - PROGRESS: at 12.15% examples, 85751 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:29:19,985 : INFO : EPOCH 2 - PROGRESS: at 12.29% examples, 85712 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:29:21,068 : INFO : EPOCH 2 - PROGRESS: at 12.44% examples, 85741 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:29:22,218 : INFO : EPOCH 2 - PROGRESS: at 12.58% examples, 85714 words/s, in_qsize 7, out_

2019-04-13 11:30:34,265 : INFO : EPOCH 2 - PROGRESS: at 26.92% examples, 82637 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:30:35,356 : INFO : EPOCH 2 - PROGRESS: at 27.08% examples, 82614 words/s, in_qsize 6, out_qsize 1
2019-04-13 11:30:36,519 : INFO : EPOCH 2 - PROGRESS: at 27.24% examples, 82666 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:30:37,624 : INFO : EPOCH 2 - PROGRESS: at 27.37% examples, 82635 words/s, in_qsize 7, out_qsize 1
2019-04-13 11:30:38,801 : INFO : EPOCH 2 - PROGRESS: at 27.81% examples, 82682 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:30:39,946 : INFO : EPOCH 2 - PROGRESS: at 28.06% examples, 82641 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:30:41,070 : INFO : EPOCH 2 - PROGRESS: at 28.51% examples, 82717 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:30:42,143 : INFO : EPOCH 2 - PROGRESS: at 28.63% examples, 82707 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:30:43,262 : INFO : EPOCH 2 - PROGRESS: at 28.74% examples, 82708 words/s, in_qsize 7, out_

2019-04-13 11:31:56,381 : INFO : EPOCH 2 - PROGRESS: at 40.06% examples, 81666 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:31:57,409 : INFO : EPOCH 2 - PROGRESS: at 40.25% examples, 81677 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:31:58,562 : INFO : EPOCH 2 - PROGRESS: at 40.40% examples, 81689 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:31:59,672 : INFO : EPOCH 2 - PROGRESS: at 40.50% examples, 81671 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:32:00,677 : INFO : EPOCH 2 - PROGRESS: at 40.61% examples, 81722 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:32:01,756 : INFO : EPOCH 2 - PROGRESS: at 40.72% examples, 81716 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:32:02,804 : INFO : EPOCH 2 - PROGRESS: at 40.78% examples, 81723 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:32:03,968 : INFO : EPOCH 2 - PROGRESS: at 40.79% examples, 81733 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:32:05,098 : INFO : EPOCH 2 - PROGRESS: at 40.81% examples, 81739 words/s, in_qsize 8, out_

2019-04-13 11:33:19,101 : INFO : EPOCH 2 - PROGRESS: at 53.02% examples, 80631 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:33:20,264 : INFO : EPOCH 2 - PROGRESS: at 53.99% examples, 80615 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:33:21,510 : INFO : EPOCH 2 - PROGRESS: at 54.91% examples, 80551 words/s, in_qsize 8, out_qsize 1
2019-04-13 11:33:22,733 : INFO : EPOCH 2 - PROGRESS: at 55.74% examples, 80492 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:33:23,738 : INFO : EPOCH 2 - PROGRESS: at 56.95% examples, 80569 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:33:24,972 : INFO : EPOCH 2 - PROGRESS: at 57.89% examples, 80536 words/s, in_qsize 7, out_qsize 1
2019-04-13 11:33:26,241 : INFO : EPOCH 2 - PROGRESS: at 59.13% examples, 80580 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:33:27,321 : INFO : EPOCH 2 - PROGRESS: at 60.18% examples, 80612 words/s, in_qsize 8, out_qsize 1
2019-04-13 11:33:28,342 : INFO : EPOCH 2 - PROGRESS: at 60.96% examples, 80572 words/s, in_qsize 7, out_

2019-04-13 11:34:41,305 : INFO : EPOCH 2 - PROGRESS: at 72.97% examples, 80133 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:34:42,308 : INFO : EPOCH 2 - PROGRESS: at 73.11% examples, 80148 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:34:43,332 : INFO : EPOCH 2 - PROGRESS: at 73.22% examples, 80114 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:34:44,390 : INFO : EPOCH 2 - PROGRESS: at 73.39% examples, 80141 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:34:45,565 : INFO : EPOCH 2 - PROGRESS: at 73.51% examples, 80121 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:34:46,577 : INFO : EPOCH 2 - PROGRESS: at 73.64% examples, 80111 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:34:47,765 : INFO : EPOCH 2 - PROGRESS: at 73.77% examples, 80065 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:34:48,964 : INFO : EPOCH 2 - PROGRESS: at 73.93% examples, 80066 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:34:50,005 : INFO : EPOCH 2 - PROGRESS: at 74.04% examples, 80029 words/s, in_qsize 7, out_

2019-04-13 11:36:04,680 : INFO : EPOCH 2 - PROGRESS: at 82.56% examples, 78245 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:36:05,691 : INFO : EPOCH 2 - PROGRESS: at 82.70% examples, 78239 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:36:06,757 : INFO : EPOCH 2 - PROGRESS: at 82.90% examples, 78247 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:36:07,816 : INFO : EPOCH 2 - PROGRESS: at 83.01% examples, 78232 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:36:08,841 : INFO : EPOCH 2 - PROGRESS: at 83.10% examples, 78239 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:36:10,124 : INFO : EPOCH 2 - PROGRESS: at 83.17% examples, 78208 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:36:11,195 : INFO : EPOCH 2 - PROGRESS: at 83.47% examples, 78194 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:36:12,264 : INFO : EPOCH 2 - PROGRESS: at 83.77% examples, 78126 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:36:13,354 : INFO : EPOCH 2 - PROGRESS: at 84.11% examples, 78091 words/s, in_qsize 7, out_

2019-04-13 11:37:27,506 : INFO : EPOCH 2 - PROGRESS: at 98.21% examples, 77754 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:37:28,551 : INFO : EPOCH 2 - PROGRESS: at 98.37% examples, 77777 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:37:29,722 : INFO : EPOCH 2 - PROGRESS: at 98.55% examples, 77788 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:37:30,789 : INFO : EPOCH 2 - PROGRESS: at 98.72% examples, 77814 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:37:31,943 : INFO : EPOCH 2 - PROGRESS: at 98.90% examples, 77825 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:37:32,996 : INFO : EPOCH 2 - PROGRESS: at 99.04% examples, 77852 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:37:34,044 : INFO : EPOCH 2 - PROGRESS: at 99.12% examples, 77864 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:37:35,077 : INFO : EPOCH 2 - PROGRESS: at 99.22% examples, 77874 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:37:36,137 : INFO : EPOCH 2 - PROGRESS: at 99.32% examples, 77892 words/s, in_qsize 7, out_

2019-04-13 11:38:43,811 : INFO : EPOCH 3 - PROGRESS: at 6.41% examples, 81241 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:38:44,934 : INFO : EPOCH 3 - PROGRESS: at 6.55% examples, 81309 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:38:45,977 : INFO : EPOCH 3 - PROGRESS: at 6.67% examples, 81348 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:38:47,007 : INFO : EPOCH 3 - PROGRESS: at 6.80% examples, 81534 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:38:48,055 : INFO : EPOCH 3 - PROGRESS: at 6.92% examples, 81549 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:38:49,163 : INFO : EPOCH 3 - PROGRESS: at 7.09% examples, 81645 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:38:50,178 : INFO : EPOCH 3 - PROGRESS: at 7.21% examples, 81556 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:38:51,259 : INFO : EPOCH 3 - PROGRESS: at 7.37% examples, 81536 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:38:52,316 : INFO : EPOCH 3 - PROGRESS: at 7.51% examples, 81538 words/s, in_qsize 7, out_qsize 0
2

2019-04-13 11:40:06,114 : INFO : EPOCH 3 - PROGRESS: at 16.93% examples, 80876 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:40:07,164 : INFO : EPOCH 3 - PROGRESS: at 17.06% examples, 80883 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:40:08,184 : INFO : EPOCH 3 - PROGRESS: at 17.17% examples, 80910 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:40:09,309 : INFO : EPOCH 3 - PROGRESS: at 17.30% examples, 80872 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:40:10,399 : INFO : EPOCH 3 - PROGRESS: at 17.60% examples, 80926 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:40:11,428 : INFO : EPOCH 3 - PROGRESS: at 18.54% examples, 80961 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:40:12,933 : INFO : EPOCH 3 - PROGRESS: at 19.44% examples, 80739 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:40:13,952 : INFO : EPOCH 3 - PROGRESS: at 20.60% examples, 80909 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:40:15,026 : INFO : EPOCH 3 - PROGRESS: at 21.13% examples, 80661 words/s, in_qsize 8, out_

2019-04-13 11:41:27,863 : INFO : EPOCH 3 - PROGRESS: at 33.95% examples, 80287 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:41:28,892 : INFO : EPOCH 3 - PROGRESS: at 34.12% examples, 80310 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:41:29,924 : INFO : EPOCH 3 - PROGRESS: at 34.30% examples, 80332 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:41:31,117 : INFO : EPOCH 3 - PROGRESS: at 34.49% examples, 80299 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:41:32,224 : INFO : EPOCH 3 - PROGRESS: at 34.69% examples, 80335 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:41:33,348 : INFO : EPOCH 3 - PROGRESS: at 34.92% examples, 80361 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:41:34,396 : INFO : EPOCH 3 - PROGRESS: at 35.07% examples, 80364 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:41:35,402 : INFO : EPOCH 3 - PROGRESS: at 35.24% examples, 80352 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:41:36,412 : INFO : EPOCH 3 - PROGRESS: at 35.42% examples, 80379 words/s, in_qsize 7, out_

2019-04-13 11:42:49,158 : INFO : EPOCH 3 - PROGRESS: at 43.61% examples, 79642 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:42:50,284 : INFO : EPOCH 3 - PROGRESS: at 43.74% examples, 79661 words/s, in_qsize 7, out_qsize 1
2019-04-13 11:42:51,313 : INFO : EPOCH 3 - PROGRESS: at 43.87% examples, 79676 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:42:52,351 : INFO : EPOCH 3 - PROGRESS: at 43.98% examples, 79653 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:42:53,630 : INFO : EPOCH 3 - PROGRESS: at 44.10% examples, 79602 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:42:54,677 : INFO : EPOCH 3 - PROGRESS: at 44.19% examples, 79553 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:42:55,800 : INFO : EPOCH 3 - PROGRESS: at 44.29% examples, 79484 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:42:56,988 : INFO : EPOCH 3 - PROGRESS: at 44.40% examples, 79430 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:42:58,111 : INFO : EPOCH 3 - PROGRESS: at 44.51% examples, 79394 words/s, in_qsize 7, out_

2019-04-13 11:44:12,278 : INFO : EPOCH 3 - PROGRESS: at 68.19% examples, 79296 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:44:13,340 : INFO : EPOCH 3 - PROGRESS: at 68.34% examples, 79304 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:44:14,382 : INFO : EPOCH 3 - PROGRESS: at 68.51% examples, 79338 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:44:15,488 : INFO : EPOCH 3 - PROGRESS: at 68.64% examples, 79312 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:44:16,524 : INFO : EPOCH 3 - PROGRESS: at 68.77% examples, 79301 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:44:17,545 : INFO : EPOCH 3 - PROGRESS: at 68.91% examples, 79315 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:44:18,830 : INFO : EPOCH 3 - PROGRESS: at 69.06% examples, 79299 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:44:20,150 : INFO : EPOCH 3 - PROGRESS: at 69.24% examples, 79326 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:44:21,171 : INFO : EPOCH 3 - PROGRESS: at 69.42% examples, 79387 words/s, in_qsize 7, out_

2019-04-13 11:45:33,045 : INFO : EPOCH 3 - PROGRESS: at 79.03% examples, 79399 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:45:34,115 : INFO : EPOCH 3 - PROGRESS: at 79.18% examples, 79401 words/s, in_qsize 6, out_qsize 1
2019-04-13 11:45:35,116 : INFO : EPOCH 3 - PROGRESS: at 79.33% examples, 79435 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:45:36,132 : INFO : EPOCH 3 - PROGRESS: at 79.47% examples, 79426 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:45:37,287 : INFO : EPOCH 3 - PROGRESS: at 79.62% examples, 79435 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:45:38,364 : INFO : EPOCH 3 - PROGRESS: at 79.77% examples, 79436 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:45:39,375 : INFO : EPOCH 3 - PROGRESS: at 79.93% examples, 79470 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:45:40,447 : INFO : EPOCH 3 - PROGRESS: at 80.07% examples, 79474 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:45:41,600 : INFO : EPOCH 3 - PROGRESS: at 80.20% examples, 79463 words/s, in_qsize 7, out_

2019-04-13 11:46:56,684 : INFO : EPOCH 3 - PROGRESS: at 93.25% examples, 78271 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:46:57,905 : INFO : EPOCH 3 - PROGRESS: at 93.47% examples, 78272 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:46:59,016 : INFO : EPOCH 3 - PROGRESS: at 93.74% examples, 78305 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:47:00,258 : INFO : EPOCH 3 - PROGRESS: at 93.92% examples, 78287 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:47:01,288 : INFO : EPOCH 3 - PROGRESS: at 94.12% examples, 78315 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:47:02,476 : INFO : EPOCH 3 - PROGRESS: at 94.23% examples, 78253 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:47:03,601 : INFO : EPOCH 3 - PROGRESS: at 94.39% examples, 78234 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:47:04,627 : INFO : EPOCH 3 - PROGRESS: at 94.62% examples, 78280 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:47:05,827 : INFO : EPOCH 3 - PROGRESS: at 94.72% examples, 78198 words/s, in_qsize 7, out_

2019-04-13 11:48:13,647 : INFO : EPOCH 4 - PROGRESS: at 2.51% examples, 74868 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:48:14,706 : INFO : EPOCH 4 - PROGRESS: at 2.60% examples, 75051 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:48:15,895 : INFO : EPOCH 4 - PROGRESS: at 2.70% examples, 75250 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:48:16,993 : INFO : EPOCH 4 - PROGRESS: at 2.82% examples, 75594 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:48:18,134 : INFO : EPOCH 4 - PROGRESS: at 2.96% examples, 75847 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:48:19,172 : INFO : EPOCH 4 - PROGRESS: at 3.08% examples, 76082 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:48:20,199 : INFO : EPOCH 4 - PROGRESS: at 3.21% examples, 76300 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:48:21,295 : INFO : EPOCH 4 - PROGRESS: at 3.36% examples, 76629 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:48:22,377 : INFO : EPOCH 4 - PROGRESS: at 3.49% examples, 76731 words/s, in_qsize 7, out_qsize 0
2

2019-04-13 11:49:33,815 : INFO : EPOCH 4 - PROGRESS: at 12.42% examples, 79086 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:49:34,839 : INFO : EPOCH 4 - PROGRESS: at 12.55% examples, 79130 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:49:36,060 : INFO : EPOCH 4 - PROGRESS: at 12.70% examples, 79200 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:49:37,068 : INFO : EPOCH 4 - PROGRESS: at 12.83% examples, 79255 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:49:38,346 : INFO : EPOCH 4 - PROGRESS: at 12.98% examples, 79289 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:49:39,362 : INFO : EPOCH 4 - PROGRESS: at 13.12% examples, 79343 words/s, in_qsize 6, out_qsize 1
2019-04-13 11:49:40,518 : INFO : EPOCH 4 - PROGRESS: at 13.30% examples, 79482 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:49:41,570 : INFO : EPOCH 4 - PROGRESS: at 13.46% examples, 79508 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:49:42,599 : INFO : EPOCH 4 - PROGRESS: at 13.60% examples, 79554 words/s, in_qsize 7, out_

2019-04-13 11:50:54,869 : INFO : EPOCH 4 - PROGRESS: at 29.37% examples, 81705 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:50:56,154 : INFO : EPOCH 4 - PROGRESS: at 29.54% examples, 81748 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:50:57,179 : INFO : EPOCH 4 - PROGRESS: at 29.70% examples, 81857 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:50:58,375 : INFO : EPOCH 4 - PROGRESS: at 29.84% examples, 81805 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:50:59,365 : INFO : EPOCH 4 - PROGRESS: at 29.98% examples, 81831 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:51:00,617 : INFO : EPOCH 4 - PROGRESS: at 30.15% examples, 81851 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:51:01,682 : INFO : EPOCH 4 - PROGRESS: at 30.33% examples, 81948 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:51:02,865 : INFO : EPOCH 4 - PROGRESS: at 30.48% examples, 81898 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:51:03,930 : INFO : EPOCH 4 - PROGRESS: at 30.65% examples, 81989 words/s, in_qsize 7, out_

2019-04-13 11:52:16,359 : INFO : EPOCH 4 - PROGRESS: at 41.10% examples, 83099 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:52:17,424 : INFO : EPOCH 4 - PROGRESS: at 41.22% examples, 83128 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:52:18,453 : INFO : EPOCH 4 - PROGRESS: at 41.32% examples, 83131 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:52:19,533 : INFO : EPOCH 4 - PROGRESS: at 41.43% examples, 83119 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:52:20,579 : INFO : EPOCH 4 - PROGRESS: at 41.54% examples, 83153 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:52:21,598 : INFO : EPOCH 4 - PROGRESS: at 41.71% examples, 83161 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:52:22,679 : INFO : EPOCH 4 - PROGRESS: at 41.91% examples, 83151 words/s, in_qsize 6, out_qsize 1
2019-04-13 11:52:23,722 : INFO : EPOCH 4 - PROGRESS: at 42.04% examples, 83150 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:52:24,732 : INFO : EPOCH 4 - PROGRESS: at 42.15% examples, 83160 words/s, in_qsize 7, out_

2019-04-13 11:53:36,122 : INFO : EPOCH 4 - PROGRESS: at 65.87% examples, 83825 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:53:37,229 : INFO : EPOCH 4 - PROGRESS: at 66.02% examples, 83834 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:53:38,326 : INFO : EPOCH 4 - PROGRESS: at 66.18% examples, 83845 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:53:39,481 : INFO : EPOCH 4 - PROGRESS: at 66.35% examples, 83843 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:53:40,522 : INFO : EPOCH 4 - PROGRESS: at 66.51% examples, 83866 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:53:41,741 : INFO : EPOCH 4 - PROGRESS: at 66.68% examples, 83847 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:53:42,774 : INFO : EPOCH 4 - PROGRESS: at 66.83% examples, 83874 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:53:44,005 : INFO : EPOCH 4 - PROGRESS: at 67.01% examples, 83855 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:53:45,015 : INFO : EPOCH 4 - PROGRESS: at 67.16% examples, 83887 words/s, in_qsize 7, out_

2019-04-13 11:54:57,830 : INFO : EPOCH 4 - PROGRESS: at 77.51% examples, 84049 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:54:58,864 : INFO : EPOCH 4 - PROGRESS: at 77.67% examples, 84071 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:54:59,935 : INFO : EPOCH 4 - PROGRESS: at 77.84% examples, 84064 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:55:00,967 : INFO : EPOCH 4 - PROGRESS: at 77.98% examples, 84064 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:55:02,036 : INFO : EPOCH 4 - PROGRESS: at 78.15% examples, 84076 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:55:03,103 : INFO : EPOCH 4 - PROGRESS: at 78.29% examples, 84074 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:55:04,114 : INFO : EPOCH 4 - PROGRESS: at 78.44% examples, 84074 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:55:05,140 : INFO : EPOCH 4 - PROGRESS: at 78.58% examples, 84075 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:55:06,145 : INFO : EPOCH 4 - PROGRESS: at 78.74% examples, 84081 words/s, in_qsize 8, out_

2019-04-13 11:56:17,982 : INFO : EPOCH 4 - PROGRESS: at 93.15% examples, 84294 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:56:18,984 : INFO : EPOCH 4 - PROGRESS: at 93.35% examples, 84298 words/s, in_qsize 7, out_qsize 1
2019-04-13 11:56:20,204 : INFO : EPOCH 4 - PROGRESS: at 93.65% examples, 84303 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:56:21,253 : INFO : EPOCH 4 - PROGRESS: at 93.83% examples, 84301 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:56:22,423 : INFO : EPOCH 4 - PROGRESS: at 94.05% examples, 84315 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:56:23,537 : INFO : EPOCH 4 - PROGRESS: at 94.21% examples, 84302 words/s, in_qsize 8, out_qsize 1
2019-04-13 11:56:24,568 : INFO : EPOCH 4 - PROGRESS: at 94.39% examples, 84303 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:56:25,711 : INFO : EPOCH 4 - PROGRESS: at 94.60% examples, 84303 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:56:26,772 : INFO : EPOCH 4 - PROGRESS: at 94.78% examples, 84300 words/s, in_qsize 7, out_

2019-04-13 11:57:35,112 : INFO : EPOCH 5 - PROGRESS: at 3.35% examples, 83277 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:57:36,316 : INFO : EPOCH 5 - PROGRESS: at 3.48% examples, 82910 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:57:37,343 : INFO : EPOCH 5 - PROGRESS: at 3.63% examples, 83471 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:57:38,548 : INFO : EPOCH 5 - PROGRESS: at 3.75% examples, 83122 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:57:39,551 : INFO : EPOCH 5 - PROGRESS: at 3.89% examples, 83447 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:57:40,732 : INFO : EPOCH 5 - PROGRESS: at 4.03% examples, 83353 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:57:41,755 : INFO : EPOCH 5 - PROGRESS: at 4.15% examples, 83385 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:57:42,789 : INFO : EPOCH 5 - PROGRESS: at 4.28% examples, 83638 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:57:43,842 : INFO : EPOCH 5 - PROGRESS: at 4.39% examples, 83573 words/s, in_qsize 8, out_qsize 0
2

2019-04-13 11:58:56,813 : INFO : EPOCH 5 - PROGRESS: at 14.21% examples, 84655 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:58:57,901 : INFO : EPOCH 5 - PROGRESS: at 14.38% examples, 84775 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:58:59,042 : INFO : EPOCH 5 - PROGRESS: at 14.53% examples, 84697 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:59:00,070 : INFO : EPOCH 5 - PROGRESS: at 14.69% examples, 84834 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:59:01,279 : INFO : EPOCH 5 - PROGRESS: at 14.83% examples, 84713 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:59:02,313 : INFO : EPOCH 5 - PROGRESS: at 15.00% examples, 84854 words/s, in_qsize 8, out_qsize 0
2019-04-13 11:59:03,532 : INFO : EPOCH 5 - PROGRESS: at 15.14% examples, 84728 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:59:04,520 : INFO : EPOCH 5 - PROGRESS: at 15.29% examples, 84823 words/s, in_qsize 7, out_qsize 0
2019-04-13 11:59:05,759 : INFO : EPOCH 5 - PROGRESS: at 15.44% examples, 84753 words/s, in_qsize 8, out_

2019-04-13 12:00:16,745 : INFO : EPOCH 5 - PROGRESS: at 31.03% examples, 84953 words/s, in_qsize 8, out_qsize 0
2019-04-13 12:00:17,830 : INFO : EPOCH 5 - PROGRESS: at 31.19% examples, 84971 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:00:18,871 : INFO : EPOCH 5 - PROGRESS: at 31.32% examples, 84965 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:00:19,898 : INFO : EPOCH 5 - PROGRESS: at 31.46% examples, 84959 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:00:20,956 : INFO : EPOCH 5 - PROGRESS: at 31.62% examples, 84993 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:00:22,018 : INFO : EPOCH 5 - PROGRESS: at 31.77% examples, 84977 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:00:23,028 : INFO : EPOCH 5 - PROGRESS: at 31.93% examples, 84981 words/s, in_qsize 8, out_qsize 0
2019-04-13 12:00:24,070 : INFO : EPOCH 5 - PROGRESS: at 32.07% examples, 84920 words/s, in_qsize 8, out_qsize 0
2019-04-13 12:00:25,099 : INFO : EPOCH 5 - PROGRESS: at 32.43% examples, 84961 words/s, in_qsize 7, out_

2019-04-13 12:01:36,088 : INFO : EPOCH 5 - PROGRESS: at 42.29% examples, 85196 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:01:37,262 : INFO : EPOCH 5 - PROGRESS: at 42.43% examples, 85184 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:01:38,320 : INFO : EPOCH 5 - PROGRESS: at 42.56% examples, 85199 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:01:39,492 : INFO : EPOCH 5 - PROGRESS: at 42.66% examples, 85182 words/s, in_qsize 8, out_qsize 0
2019-04-13 12:01:40,556 : INFO : EPOCH 5 - PROGRESS: at 42.77% examples, 85204 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:01:41,727 : INFO : EPOCH 5 - PROGRESS: at 42.91% examples, 85192 words/s, in_qsize 8, out_qsize 0
2019-04-13 12:01:42,793 : INFO : EPOCH 5 - PROGRESS: at 43.05% examples, 85206 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:01:43,953 : INFO : EPOCH 5 - PROGRESS: at 43.19% examples, 85198 words/s, in_qsize 8, out_qsize 0
2019-04-13 12:01:45,039 : INFO : EPOCH 5 - PROGRESS: at 43.33% examples, 85213 words/s, in_qsize 7, out_

2019-04-13 12:02:55,824 : INFO : EPOCH 5 - PROGRESS: at 67.21% examples, 85418 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:02:56,917 : INFO : EPOCH 5 - PROGRESS: at 67.38% examples, 85425 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:02:57,993 : INFO : EPOCH 5 - PROGRESS: at 67.53% examples, 85432 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:02:59,156 : INFO : EPOCH 5 - PROGRESS: at 67.69% examples, 85422 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:03:00,234 : INFO : EPOCH 5 - PROGRESS: at 67.85% examples, 85431 words/s, in_qsize 8, out_qsize 0
2019-04-13 12:03:01,431 : INFO : EPOCH 5 - PROGRESS: at 68.02% examples, 85416 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:03:02,444 : INFO : EPOCH 5 - PROGRESS: at 68.18% examples, 85440 words/s, in_qsize 8, out_qsize 0
2019-04-13 12:03:03,672 : INFO : EPOCH 5 - PROGRESS: at 68.34% examples, 85417 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:03:04,674 : INFO : EPOCH 5 - PROGRESS: at 68.51% examples, 85445 words/s, in_qsize 7, out_

2019-04-13 12:04:17,440 : INFO : EPOCH 5 - PROGRESS: at 78.94% examples, 85434 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:04:18,602 : INFO : EPOCH 5 - PROGRESS: at 79.09% examples, 85426 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:04:19,612 : INFO : EPOCH 5 - PROGRESS: at 79.26% examples, 85447 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:04:20,841 : INFO : EPOCH 5 - PROGRESS: at 79.42% examples, 85425 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:04:21,935 : INFO : EPOCH 5 - PROGRESS: at 79.59% examples, 85452 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:04:23,075 : INFO : EPOCH 5 - PROGRESS: at 79.74% examples, 85427 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:04:24,158 : INFO : EPOCH 5 - PROGRESS: at 79.88% examples, 85413 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:04:25,326 : INFO : EPOCH 5 - PROGRESS: at 80.03% examples, 85406 words/s, in_qsize 6, out_qsize 1
2019-04-13 12:04:26,482 : INFO : EPOCH 5 - PROGRESS: at 80.19% examples, 85401 words/s, in_qsize 8, out_

2019-04-13 12:05:39,252 : INFO : EPOCH 5 - PROGRESS: at 95.16% examples, 85379 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:05:40,384 : INFO : EPOCH 5 - PROGRESS: at 95.33% examples, 85360 words/s, in_qsize 8, out_qsize 0
2019-04-13 12:05:41,441 : INFO : EPOCH 5 - PROGRESS: at 95.56% examples, 85391 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:05:42,644 : INFO : EPOCH 5 - PROGRESS: at 95.75% examples, 85362 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:05:43,658 : INFO : EPOCH 5 - PROGRESS: at 96.03% examples, 85400 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:05:44,900 : INFO : EPOCH 5 - PROGRESS: at 96.26% examples, 85364 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:05:46,255 : INFO : EPOCH 5 - PROGRESS: at 96.58% examples, 85366 words/s, in_qsize 7, out_qsize 0
2019-04-13 12:05:47,611 : INFO : EPOCH 5 - PROGRESS: at 96.93% examples, 85368 words/s, in_qsize 6, out_qsize 2
2019-04-13 12:05:48,649 : INFO : EPOCH 5 - PROGRESS: at 97.10% examples, 85406 words/s, in_qsize 7, out_

In [10]:
print(model.wv.most_similar(positive=[u'pontes'], negative=[u'presidente']))

[('rousseff', 0.5715487003326416), ('impeachment', 0.33618271350860596), ('roussef', 0.3163612186908722), ('pedaladas', 0.3028866648674011), ('cassada', 0.2818000912666321), ('gerentona', 0.276383638381958), ('petrolão', 0.24935714900493622), ('desempregados', 0.2331179678440094), ('janete', 0.2272392213344574), ('desgovernos', 0.22592443227767944)]


In [3]:
from gensim.models import Word2Vec
new_model = Word2Vec.load('embeddings/news_w2v.bin')

2019-04-30 13:12:20,064 : INFO : loading Word2Vec object from embeddings/news_w2v.bin
2019-04-30 13:12:20,777 : INFO : loading wv recursively from embeddings/news_w2v.bin.wv.* with mmap=None
2019-04-30 13:12:20,778 : INFO : loading vectors from embeddings/news_w2v.bin.wv.vectors.npy with mmap=None
2019-04-30 13:12:22,499 : INFO : setting ignored attribute vectors_norm to None
2019-04-30 13:12:22,502 : INFO : loading vocabulary recursively from embeddings/news_w2v.bin.vocabulary.* with mmap=None
2019-04-30 13:12:22,503 : INFO : loading trainables recursively from embeddings/news_w2v.bin.trainables.* with mmap=None
2019-04-30 13:12:22,504 : INFO : loading syn1neg from embeddings/news_w2v.bin.trainables.syn1neg.npy with mmap=None
2019-04-30 13:12:24,017 : INFO : setting ignored attribute cum_table to None
2019-04-30 13:12:24,021 : INFO : loaded embeddings/news_w2v.bin


In [31]:
print(new_model.wv.most_similar(positive=[u'guedes']))

[('superministro', 0.7084858417510986), ('ultraliberal', 0.5786339044570923), ('superministérios', 0.5777881145477295), ('superpasta', 0.5672988891601562), ('delegará', 0.5668069124221802), ('onyx', 0.5664126873016357), ('ministeriável', 0.5586702227592468), ('guru', 0.5561445355415344), ('superministros', 0.5527079105377197), ('superministério', 0.5492182970046997)]
