# Word Embeddings notícias de 2018

Os dados são notícias de 2018, coletadas dos principais jornais do Brasil:
* `Carta Capital`, 
* `El Pais`,
* `Estadao`, 
* `Folha de São Paulo`, 
*  `Gazeta do Povo`,
* `O Antagonista`, 
* `O Globo`, 
* `Veja`

Uma análise detalhada dos dados está disponível [aqui](https://pages.github.com/). Objetivo deste notebook é utilizar o modelo word2vec para gerar embeddings a partir dos textos dessas notícias. A arquitetura utilizada pelo modelo é a skip-gram, cada palavra é representada por um vetor de 300 dimensões.

In [1]:
# importing modules and setting log format
import re
import nltk
import gensim, logging
import pandas as pd
from nltk.corpus import stopwords
from pymongo import MongoClient
nltk.download('stopwords')
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
PUNCTUATION = u'[^a-zA-Z0-9áéíóúÁÉÍÓÚâêîôÂÊÎÔãõÃÕçÇ%]' # define news punctuation 

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/diogoflorencio/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Definindo Lexicons e Funções

In [2]:
# Mapping words in lexicons
map_lexicons = {'a ponto':'a_ponto','ao menos ':'ao_menos ','ate mesmo ':'ate_mesmo ',
                'nao mais que ':'nao_mais_que ','nem mesmo ':'nem_mesmo ','no minimo ':'no_minimo ',
                'o unico ':'o_unico ','a unica ':'a_unica ','pelo menos ':'pelo_menos ',
                'quando menos ':'quando_menos ','quando muito ':'quando_muito ','a par disso ':'a_par_disso ',
                'e nao ':'e_nao ','em suma ':'em_suma ','mas tambem ': 'mas_tambem ','muito menos ':'muito_menos ',
                'nao so ':'nao_so ','ou mesmo ':'ou_mesmo ','por sinal ':'por_sinal ','com isso ':'com_isso ',
                'como consequencia ':'como_consequencia ','de modo que ':'de_modo_que ','deste modo ':'deste_modo ',
                'em decorrencia ':'em_decorrencia ','nesse sentido ':'nesse_sentido ','por causa ':'por_causa ',
                'por conseguinte ':'por_conseguinte ','por essa razao ':'por_essa_razao ','por isso ':'por_isso ',
                'sendo assim ':'sendo_assim ','ou entao ':'ou_entao ','ou mesmo ':'ou_mesmo ','como se ':'como_se ',
                'de um lado ':'de_um_lado ','por outro lado ':'por_outro_lado ','mais que ':'mais_que ',
                'menos que ':'menos_que ','desde que ':'desde_que ','do contrario ':'do_contrario ',
                'em lugar ':'em_lugar ','em vez ':'em_vez','no caso ':'no_caso ','se acaso ':'se_acaso ',
                'de certa forma ':'de_certa_forma ','desse modo ':'desse_modo ','em funcao ':'em_funcao ',
                'isso e ':'isso_e ','ja que ':'ja_que ','na medida que ':'na_medida_que ','nessa direcao ':'nessa_direcao ',
                'no intuito ':'no_intuito ','no mesmo sentido ':'no_mesmo_sentido ','ou seja ':'ou_seja ',
                'uma vez que ':'uma_vez_que ','tanto que ':'tanto_que ','visto que ':'visto_que ','ainda que ':'ainda_que ',
                'ao contrario ':'ao_contrario ','apesar de ':'apesar_de ','fora isso ':'fora_isso ','mesmo que ':'mesmo_que ',
                'nao obstante ':'nao_obstante ','nao fosse isso ':'nao_fosse_isso ','no entanto ':'no_entanto ',
                'para tanto ':'para_tanto ','pelo contrario ':'pelo_contrario ','por sua vez ':'por_sua_vez ','posto que ':'posto_que '
               }

In [3]:
# Convert word from text into lexicons
def word2lexicon(text):
    for k, v in map_lexicons.items():
        text = str(text).replace(k,v)
    return text

In [4]:
# function for processing sentences
def processSentences(text):
    stop_words = stopwords.words('portuguese') # load stop words
    text = re.sub(PUNCTUATION, ' ', str(text)) # remove punctuation from text
    text = str(text).split() # split sentences by words
    text = [word for word in text if word not in stop_words] # Remove stopwords
    return text

### Carregando Notícias

In [5]:
host_ip = '192.168.1.6'
# init mongo client
client = MongoClient(host_ip, 27017)
#select db
db = client['news_2018']
# load data
carta_capital = pd.DataFrame(list(db.get_collection('carta_capital').find()))
el_pais = pd.DataFrame(list(db.get_collection('el_pais').find()))
estadao = pd.DataFrame(list(db.get_collection('estadao').find()))
folha = pd.DataFrame(list(db.get_collection('folha').find()))
gazeta_do_povo = pd.DataFrame(list(db.get_collection('gazeta_do_povo').find()))
oantagonista = pd.DataFrame(list(db.get_collection('oantagonista').find()))
oglobo = pd.DataFrame(list(db.get_collection('oglobo').find()))
veja = pd.DataFrame(list(db.get_collection('veja').find()))

# concat all news
news = pd.concat((carta_capital, el_pais, estadao, folha, gazeta_do_povo, oantagonista, oglobo, veja), sort=False, ignore_index=True)

In [6]:
# processing news text
news['text'] = news['text'].apply(word2lexicon) 
news['text'] = news['text'].apply(processSentences)

### Treinando Word2Vec

In [None]:
# Train word2vec model - settings: approach skip-gram, size embeddings vectors 300 
model = gensim.models.Word2Vec(news['text'], workers=4, size=300, sg=1, window=5, min_count=5)
# Saving model
model.save('../embeddings/news_w2v.bin')
# Saving embeddings
model.wv.save_word2vec_format("../embeddings/news_vectors.bin")

2020-02-10 21:27:29,263 : INFO : collecting all words and their counts
2020-02-10 21:27:29,265 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-02-10 21:27:30,334 : INFO : PROGRESS: at sentence #10000, processed 4410993 words, keeping 122723 word types
2020-02-10 21:27:30,467 : INFO : PROGRESS: at sentence #20000, processed 4964151 words, keeping 133083 word types
2020-02-10 21:27:31,024 : INFO : PROGRESS: at sentence #30000, processed 7306086 words, keeping 201100 word types
2020-02-10 21:27:31,640 : INFO : PROGRESS: at sentence #40000, processed 10163400 words, keeping 224359 word types
2020-02-10 21:27:32,131 : INFO : PROGRESS: at sentence #50000, processed 12576923 words, keeping 238519 word types
2020-02-10 21:27:32,740 : INFO : PROGRESS: at sentence #60000, processed 15407162 words, keeping 256496 word types
2020-02-10 21:27:33,527 : INFO : PROGRESS: at sentence #70000, processed 18811506 words, keeping 275933 word types
2020-02-10 21:27:34,285 : IN

2020-02-10 21:28:55,609 : INFO : EPOCH 1 - PROGRESS: at 19.85% examples, 227403 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:28:56,619 : INFO : EPOCH 1 - PROGRESS: at 20.29% examples, 227882 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:28:57,627 : INFO : EPOCH 1 - PROGRESS: at 20.78% examples, 227963 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:28:58,699 : INFO : EPOCH 1 - PROGRESS: at 21.25% examples, 227924 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:28:59,723 : INFO : EPOCH 1 - PROGRESS: at 21.76% examples, 228500 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:29:00,724 : INFO : EPOCH 1 - PROGRESS: at 22.24% examples, 228599 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:29:01,756 : INFO : EPOCH 1 - PROGRESS: at 22.68% examples, 228528 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:29:02,799 : INFO : EPOCH 1 - PROGRESS: at 23.17% examples, 228622 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:29:03,882 : INFO : EPOCH 1 - PROGRESS: at 23.75% examples, 228910 words/s, in_qsiz

2020-02-10 21:30:11,208 : INFO : EPOCH 1 - PROGRESS: at 47.29% examples, 225537 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:30:12,221 : INFO : EPOCH 1 - PROGRESS: at 47.64% examples, 225584 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:30:13,238 : INFO : EPOCH 1 - PROGRESS: at 47.98% examples, 225675 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:30:14,248 : INFO : EPOCH 1 - PROGRESS: at 49.24% examples, 225795 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:30:15,292 : INFO : EPOCH 1 - PROGRESS: at 51.81% examples, 225870 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:30:16,296 : INFO : EPOCH 1 - PROGRESS: at 54.42% examples, 226093 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:30:17,310 : INFO : EPOCH 1 - PROGRESS: at 56.87% examples, 226140 words/s, in_qsize 8, out_qsize 1
2020-02-10 21:30:18,322 : INFO : EPOCH 1 - PROGRESS: at 59.33% examples, 226262 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:30:19,377 : INFO : EPOCH 1 - PROGRESS: at 61.94% examples, 226388 words/s, in_qsiz

2020-02-10 21:31:26,720 : INFO : EPOCH 1 - PROGRESS: at 93.79% examples, 228136 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:31:27,738 : INFO : EPOCH 1 - PROGRESS: at 94.28% examples, 228276 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:31:28,765 : INFO : EPOCH 1 - PROGRESS: at 94.68% examples, 228112 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:31:29,874 : INFO : EPOCH 1 - PROGRESS: at 95.14% examples, 228002 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:31:30,921 : INFO : EPOCH 1 - PROGRESS: at 95.61% examples, 228003 words/s, in_qsize 6, out_qsize 1
2020-02-10 21:31:31,977 : INFO : EPOCH 1 - PROGRESS: at 96.12% examples, 227906 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:31:33,025 : INFO : EPOCH 1 - PROGRESS: at 96.67% examples, 227775 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:31:34,062 : INFO : EPOCH 1 - PROGRESS: at 97.14% examples, 227736 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:31:35,079 : INFO : EPOCH 1 - PROGRESS: at 97.53% examples, 227773 words/s, in_qsiz

2020-02-10 21:32:38,100 : INFO : EPOCH 2 - PROGRESS: at 24.69% examples, 225597 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:32:39,135 : INFO : EPOCH 2 - PROGRESS: at 25.13% examples, 225429 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:32:40,166 : INFO : EPOCH 2 - PROGRESS: at 25.58% examples, 225119 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:32:41,217 : INFO : EPOCH 2 - PROGRESS: at 26.08% examples, 225220 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:32:42,274 : INFO : EPOCH 2 - PROGRESS: at 26.60% examples, 225730 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:32:43,299 : INFO : EPOCH 2 - PROGRESS: at 27.04% examples, 225766 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:32:44,306 : INFO : EPOCH 2 - PROGRESS: at 27.56% examples, 226319 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:32:45,327 : INFO : EPOCH 2 - PROGRESS: at 28.00% examples, 226414 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:32:46,347 : INFO : EPOCH 2 - PROGRESS: at 28.30% examples, 226403 words/s, in_qsiz

2020-02-10 21:33:53,381 : INFO : EPOCH 2 - PROGRESS: at 65.42% examples, 228499 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:33:54,407 : INFO : EPOCH 2 - PROGRESS: at 65.79% examples, 228273 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:33:55,451 : INFO : EPOCH 2 - PROGRESS: at 66.20% examples, 228229 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:33:56,460 : INFO : EPOCH 2 - PROGRESS: at 66.58% examples, 228318 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:33:57,495 : INFO : EPOCH 2 - PROGRESS: at 67.00% examples, 228369 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:33:58,563 : INFO : EPOCH 2 - PROGRESS: at 67.41% examples, 228357 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:33:59,571 : INFO : EPOCH 2 - PROGRESS: at 67.82% examples, 228428 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:34:00,574 : INFO : EPOCH 2 - PROGRESS: at 68.21% examples, 228522 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:34:01,605 : INFO : EPOCH 2 - PROGRESS: at 68.60% examples, 228486 words/s, in_qsiz

2020-02-10 21:35:08,733 : INFO : EPOCH 2 - PROGRESS: at 98.72% examples, 228636 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:35:09,796 : INFO : EPOCH 2 - PROGRESS: at 99.11% examples, 228757 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:35:10,816 : INFO : EPOCH 2 - PROGRESS: at 99.33% examples, 228739 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:35:11,892 : INFO : EPOCH 2 - PROGRESS: at 99.56% examples, 228656 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:35:12,907 : INFO : EPOCH 2 - PROGRESS: at 99.89% examples, 228683 words/s, in_qsize 6, out_qsize 1
2020-02-10 21:35:13,059 : INFO : worker thread finished; awaiting finish of 3 more threads
2020-02-10 21:35:13,063 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-02-10 21:35:13,070 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-02-10 21:35:13,079 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-02-10 21:35:13,080 : INFO : EPOCH - 2 : training on 48977627 raw w

2020-02-10 21:36:20,356 : INFO : EPOCH 3 - PROGRESS: at 29.71% examples, 231110 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:36:21,396 : INFO : EPOCH 3 - PROGRESS: at 30.07% examples, 231361 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:36:22,452 : INFO : EPOCH 3 - PROGRESS: at 30.39% examples, 231297 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:36:23,461 : INFO : EPOCH 3 - PROGRESS: at 30.71% examples, 231368 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:36:24,478 : INFO : EPOCH 3 - PROGRESS: at 31.07% examples, 231523 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:36:25,497 : INFO : EPOCH 3 - PROGRESS: at 31.44% examples, 231467 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:36:26,536 : INFO : EPOCH 3 - PROGRESS: at 31.77% examples, 231098 words/s, in_qsize 8, out_qsize 2
2020-02-10 21:36:27,580 : INFO : EPOCH 3 - PROGRESS: at 32.16% examples, 231361 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:36:28,604 : INFO : EPOCH 3 - PROGRESS: at 32.51% examples, 231044 words/s, in_qsiz

2020-02-10 21:37:35,955 : INFO : EPOCH 3 - PROGRESS: at 69.60% examples, 226724 words/s, in_qsize 8, out_qsize 1
2020-02-10 21:37:36,988 : INFO : EPOCH 3 - PROGRESS: at 69.91% examples, 226433 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:37:37,989 : INFO : EPOCH 3 - PROGRESS: at 70.20% examples, 226058 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:37:39,001 : INFO : EPOCH 3 - PROGRESS: at 70.50% examples, 225745 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:37:40,104 : INFO : EPOCH 3 - PROGRESS: at 70.84% examples, 225557 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:37:41,104 : INFO : EPOCH 3 - PROGRESS: at 71.20% examples, 225534 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:37:42,112 : INFO : EPOCH 3 - PROGRESS: at 71.54% examples, 225308 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:37:43,137 : INFO : EPOCH 3 - PROGRESS: at 71.85% examples, 224999 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:37:44,203 : INFO : EPOCH 3 - PROGRESS: at 72.19% examples, 224817 words/s, in_qsiz

2020-02-10 21:38:47,611 : INFO : EPOCH - 3 : training on 48977627 raw words (48175470 effective words) took 214.5s, 224568 effective words/s
2020-02-10 21:38:48,690 : INFO : EPOCH 4 - PROGRESS: at 0.36% examples, 224985 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:38:49,714 : INFO : EPOCH 4 - PROGRESS: at 0.58% examples, 221745 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:38:50,766 : INFO : EPOCH 4 - PROGRESS: at 0.81% examples, 223305 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:38:51,792 : INFO : EPOCH 4 - PROGRESS: at 1.05% examples, 229398 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:38:52,834 : INFO : EPOCH 4 - PROGRESS: at 1.39% examples, 229112 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:38:53,851 : INFO : EPOCH 4 - PROGRESS: at 1.65% examples, 228848 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:38:54,932 : INFO : EPOCH 4 - PROGRESS: at 1.94% examples, 227882 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:38:55,933 : INFO : EPOCH 4 - PROGRESS: at 2.20% examples, 227

2020-02-10 21:40:02,869 : INFO : EPOCH 4 - PROGRESS: at 32.26% examples, 229914 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:40:03,915 : INFO : EPOCH 4 - PROGRESS: at 32.65% examples, 229811 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:40:04,936 : INFO : EPOCH 4 - PROGRESS: at 33.06% examples, 229902 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:40:05,949 : INFO : EPOCH 4 - PROGRESS: at 33.43% examples, 229889 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:40:06,968 : INFO : EPOCH 4 - PROGRESS: at 33.71% examples, 229836 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:40:07,996 : INFO : EPOCH 4 - PROGRESS: at 34.05% examples, 229773 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:40:09,012 : INFO : EPOCH 4 - PROGRESS: at 34.39% examples, 229743 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:40:10,013 : INFO : EPOCH 4 - PROGRESS: at 34.75% examples, 229996 words/s, in_qsize 6, out_qsize 1
2020-02-10 21:40:11,024 : INFO : EPOCH 4 - PROGRESS: at 35.06% examples, 229871 words/s, in_qsiz

2020-02-10 21:41:18,198 : INFO : EPOCH 4 - PROGRESS: at 72.55% examples, 226960 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:41:19,232 : INFO : EPOCH 4 - PROGRESS: at 72.96% examples, 227078 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:41:20,245 : INFO : EPOCH 4 - PROGRESS: at 73.27% examples, 226823 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:41:21,265 : INFO : EPOCH 4 - PROGRESS: at 73.55% examples, 226459 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:41:22,327 : INFO : EPOCH 4 - PROGRESS: at 73.89% examples, 226278 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:41:23,350 : INFO : EPOCH 4 - PROGRESS: at 74.28% examples, 226295 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:41:24,373 : INFO : EPOCH 4 - PROGRESS: at 74.67% examples, 226295 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:41:25,376 : INFO : EPOCH 4 - PROGRESS: at 75.04% examples, 226268 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:41:26,431 : INFO : EPOCH 4 - PROGRESS: at 75.46% examples, 226291 words/s, in_qsiz

2020-02-10 21:42:29,950 : INFO : EPOCH 5 - PROGRESS: at 1.89% examples, 192450 words/s, in_qsize 8, out_qsize 1
2020-02-10 21:42:31,011 : INFO : EPOCH 5 - PROGRESS: at 2.18% examples, 196842 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:42:32,017 : INFO : EPOCH 5 - PROGRESS: at 2.42% examples, 195540 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:42:33,058 : INFO : EPOCH 5 - PROGRESS: at 2.69% examples, 196822 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:42:34,065 : INFO : EPOCH 5 - PROGRESS: at 2.96% examples, 199393 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:42:35,105 : INFO : EPOCH 5 - PROGRESS: at 3.17% examples, 202383 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:42:36,157 : INFO : EPOCH 5 - PROGRESS: at 3.41% examples, 204797 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:42:37,198 : INFO : EPOCH 5 - PROGRESS: at 3.66% examples, 205290 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:42:38,201 : INFO : EPOCH 5 - PROGRESS: at 3.91% examples, 206112 words/s, in_qsize 8, out_

2020-02-10 21:43:45,308 : INFO : EPOCH 5 - PROGRESS: at 33.44% examples, 214942 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:43:46,363 : INFO : EPOCH 5 - PROGRESS: at 33.68% examples, 214641 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:43:47,467 : INFO : EPOCH 5 - PROGRESS: at 33.97% examples, 214236 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:43:48,492 : INFO : EPOCH 5 - PROGRESS: at 34.33% examples, 214367 words/s, in_qsize 7, out_qsize 1
2020-02-10 21:43:49,503 : INFO : EPOCH 5 - PROGRESS: at 34.66% examples, 214533 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:43:50,658 : INFO : EPOCH 5 - PROGRESS: at 34.98% examples, 214362 words/s, in_qsize 8, out_qsize 1
2020-02-10 21:43:51,721 : INFO : EPOCH 5 - PROGRESS: at 35.29% examples, 214405 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:43:52,742 : INFO : EPOCH 5 - PROGRESS: at 35.60% examples, 214553 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:43:53,744 : INFO : EPOCH 5 - PROGRESS: at 35.88% examples, 214534 words/s, in_qsiz

2020-02-10 21:45:00,946 : INFO : EPOCH 5 - PROGRESS: at 73.79% examples, 219117 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:45:02,026 : INFO : EPOCH 5 - PROGRESS: at 74.18% examples, 219154 words/s, in_qsize 6, out_qsize 2
2020-02-10 21:45:03,050 : INFO : EPOCH 5 - PROGRESS: at 74.55% examples, 219140 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:45:04,062 : INFO : EPOCH 5 - PROGRESS: at 74.91% examples, 219087 words/s, in_qsize 6, out_qsize 1
2020-02-10 21:45:05,067 : INFO : EPOCH 5 - PROGRESS: at 75.24% examples, 218928 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:45:06,113 : INFO : EPOCH 5 - PROGRESS: at 75.62% examples, 218952 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:45:07,173 : INFO : EPOCH 5 - PROGRESS: at 76.01% examples, 218895 words/s, in_qsize 8, out_qsize 0
2020-02-10 21:45:08,217 : INFO : EPOCH 5 - PROGRESS: at 76.41% examples, 218907 words/s, in_qsize 7, out_qsize 0
2020-02-10 21:45:09,221 : INFO : EPOCH 5 - PROGRESS: at 76.81% examples, 218988 words/s, in_qsiz

In [None]:
print(model.wv.most_similar(positive=[u'pontes'], negative=[u'presidente']))

In [None]:
from gensim.models import Word2Vec
new_model = Word2Vec.load('../embeddings/news_w2v.bin')
print(new_model.wv.most_similar(positive=[u'pontes'], negative=[u'presidente']))