In [10]:
import pandas as pd
import numpy as np

# Считываем данные
n = ['id', 'date', 'name', 'text', 'typr', 'rep', 'rtw', 'faw', 'stcount', 'foll', 'frien', 'listcount']
data_positive = pd.read_csv('positive.csv', sep=';', error_bad_lines=False, names=n, usecols=['text'])
data_negative = pd.read_csv('negative.csv', sep=';', error_bad_lines=False, names=n, usecols=['text'])

# Формируем сбалансированный датасет
sample_size = min(data_positive.shape[0], data_negative.shape[0])
raw_data = np.concatenate((data_positive['text'].values[:sample_size],
                           data_negative['text'].values[:sample_size]), axis=0)
labels = [1] * sample_size + [0] * sample_size

In [11]:
import re

def preprocess_text(text):
    text = text.lower().replace("ё", "е")
    text = re.sub('((www\.[^\s]+)|(https?://[^\s]+))', 'URL', text)
    text = re.sub('@[^\s]+', 'USER', text)
    text = re.sub('[^a-zA-Zа-яА-Я1-9]+', ' ', text)
    text = re.sub(' +', ' ', text)
    return text.strip()


data = [preprocess_text(t) for t in raw_data]

In [12]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=1)

In [18]:
import sqlite3

# Открываем SQLite базу данных
conn = sqlite3.connect('tweet.db')
c = conn.cursor()

with open('tweets.txt', 'w', encoding='utf-8') as f:
    # Считываем тексты твитов 
    i = 0
    for row in c.execute('SELECT ttext FROM sentiment'):
        i += 1
        if row[0]:
            tweet = preprocess_text(row[0])
            # Записываем предобработанные твиты в файл
            print(tweet, file=f)
        if i % 200000 == 0:
            print(i)

200000
400000
600000
800000
1000000
1200000
1400000
1600000
1800000
2000000
2200000
2400000
2600000
2800000
3000000
3200000
3400000
3600000
3800000
4000000
4200000
4400000
4600000
4800000
5000000
5200000
5400000
5600000
5800000
6000000
6200000
6400000
6600000
6800000
7000000
7200000
7400000
7600000
7800000
8000000
8200000
8400000
8600000
8800000
9000000
9200000
9400000
9600000
9800000
10000000
10200000
10400000
10600000
10800000
11000000
11200000
11400000
11600000
11800000
12000000
12200000
12400000
12600000
12800000
13000000
13200000
13400000
13600000
13800000
14000000
14200000
14400000
14600000
14800000
15000000
15200000
15400000
15600000
15800000
16000000
16200000
16400000
16600000
16800000
17000000
17200000
17400000
17600000


In [20]:
import logging
import multiprocessing
import gensim
from gensim.models import Word2Vec

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
# Считываем файл с предобработанными твитами
data = gensim.models.word2vec.LineSentence('tweets.txt')
# Обучаем модель 
model = Word2Vec(data, size=200, window=5, min_count=3, workers=multiprocessing.cpu_count())
model.save("models/w2v/model.w2v")

2019-07-06 10:58:37,950 : INFO : collecting all words and their counts
2019-07-06 10:58:37,951 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2019-07-06 10:58:38,001 : INFO : PROGRESS: at sentence #10000, processed 98345 words, keeping 24248 word types
2019-07-06 10:58:38,044 : INFO : PROGRESS: at sentence #20000, processed 194782 words, keeping 38847 word types
2019-07-06 10:58:38,088 : INFO : PROGRESS: at sentence #30000, processed 290743 words, keeping 50531 word types
2019-07-06 10:58:38,132 : INFO : PROGRESS: at sentence #40000, processed 386269 words, keeping 60653 word types
2019-07-06 10:58:38,176 : INFO : PROGRESS: at sentence #50000, processed 482755 words, keeping 69689 word types
2019-07-06 10:58:38,226 : INFO : PROGRESS: at sentence #60000, processed 578104 words, keeping 78235 word types
2019-07-06 10:58:38,270 : INFO : PROGRESS: at sentence #70000, processed 672686 words, keeping 85840 word types
2019-07-06 10:58:38,320 : INFO : PROGRESS: at s

2019-07-06 10:58:41,265 : INFO : PROGRESS: at sentence #720000, processed 6856226 words, keeping 329362 word types
2019-07-06 10:58:41,315 : INFO : PROGRESS: at sentence #730000, processed 6953689 words, keeping 331913 word types
2019-07-06 10:58:41,361 : INFO : PROGRESS: at sentence #740000, processed 7054203 words, keeping 334539 word types
2019-07-06 10:58:41,405 : INFO : PROGRESS: at sentence #750000, processed 7153523 words, keeping 336844 word types
2019-07-06 10:58:41,451 : INFO : PROGRESS: at sentence #760000, processed 7253230 words, keeping 339124 word types
2019-07-06 10:58:41,496 : INFO : PROGRESS: at sentence #770000, processed 7352424 words, keeping 341418 word types
2019-07-06 10:58:41,540 : INFO : PROGRESS: at sentence #780000, processed 7450987 words, keeping 343877 word types
2019-07-06 10:58:41,586 : INFO : PROGRESS: at sentence #790000, processed 7545233 words, keeping 346595 word types
2019-07-06 10:58:41,629 : INFO : PROGRESS: at sentence #800000, processed 763865

2019-07-06 10:58:44,537 : INFO : PROGRESS: at sentence #1430000, processed 13698594 words, keeping 494012 word types
2019-07-06 10:58:44,585 : INFO : PROGRESS: at sentence #1440000, processed 13799436 words, keeping 496301 word types
2019-07-06 10:58:44,633 : INFO : PROGRESS: at sentence #1450000, processed 13898908 words, keeping 498556 word types
2019-07-06 10:58:44,680 : INFO : PROGRESS: at sentence #1460000, processed 13996483 words, keeping 500687 word types
2019-07-06 10:58:44,728 : INFO : PROGRESS: at sentence #1470000, processed 14094360 words, keeping 502819 word types
2019-07-06 10:58:44,775 : INFO : PROGRESS: at sentence #1480000, processed 14192679 words, keeping 504855 word types
2019-07-06 10:58:44,824 : INFO : PROGRESS: at sentence #1490000, processed 14291910 words, keeping 507157 word types
2019-07-06 10:58:44,871 : INFO : PROGRESS: at sentence #1500000, processed 14391159 words, keeping 509297 word types
2019-07-06 10:58:44,919 : INFO : PROGRESS: at sentence #1510000,

2019-07-06 10:58:47,917 : INFO : PROGRESS: at sentence #2140000, processed 20623466 words, keeping 625917 word types
2019-07-06 10:58:47,966 : INFO : PROGRESS: at sentence #2150000, processed 20722248 words, keeping 627462 word types
2019-07-06 10:58:48,014 : INFO : PROGRESS: at sentence #2160000, processed 20819863 words, keeping 629122 word types
2019-07-06 10:58:48,062 : INFO : PROGRESS: at sentence #2170000, processed 20917554 words, keeping 630708 word types
2019-07-06 10:58:48,110 : INFO : PROGRESS: at sentence #2180000, processed 21015641 words, keeping 632393 word types
2019-07-06 10:58:48,159 : INFO : PROGRESS: at sentence #2190000, processed 21113827 words, keeping 634124 word types
2019-07-06 10:58:48,206 : INFO : PROGRESS: at sentence #2200000, processed 21212464 words, keeping 635844 word types
2019-07-06 10:58:48,255 : INFO : PROGRESS: at sentence #2210000, processed 21310897 words, keeping 637585 word types
2019-07-06 10:58:48,304 : INFO : PROGRESS: at sentence #2220000,

2019-07-06 10:58:51,362 : INFO : PROGRESS: at sentence #2850000, processed 27587700 words, keeping 742350 word types
2019-07-06 10:58:51,412 : INFO : PROGRESS: at sentence #2860000, processed 27686628 words, keeping 744033 word types
2019-07-06 10:58:51,462 : INFO : PROGRESS: at sentence #2870000, processed 27788789 words, keeping 745708 word types
2019-07-06 10:58:51,509 : INFO : PROGRESS: at sentence #2880000, processed 27885614 words, keeping 747258 word types
2019-07-06 10:58:51,555 : INFO : PROGRESS: at sentence #2890000, processed 27979645 words, keeping 748829 word types
2019-07-06 10:58:51,601 : INFO : PROGRESS: at sentence #2900000, processed 28075614 words, keeping 750449 word types
2019-07-06 10:58:51,648 : INFO : PROGRESS: at sentence #2910000, processed 28172950 words, keeping 752004 word types
2019-07-06 10:58:51,694 : INFO : PROGRESS: at sentence #2920000, processed 28270410 words, keeping 753472 word types
2019-07-06 10:58:51,741 : INFO : PROGRESS: at sentence #2930000,

2019-07-06 10:58:54,683 : INFO : PROGRESS: at sentence #3560000, processed 34438798 words, keeping 843762 word types
2019-07-06 10:58:54,729 : INFO : PROGRESS: at sentence #3570000, processed 34531015 words, keeping 844977 word types
2019-07-06 10:58:54,777 : INFO : PROGRESS: at sentence #3580000, processed 34624100 words, keeping 846165 word types
2019-07-06 10:58:54,826 : INFO : PROGRESS: at sentence #3590000, processed 34719148 words, keeping 847374 word types
2019-07-06 10:58:54,878 : INFO : PROGRESS: at sentence #3600000, processed 34819053 words, keeping 848814 word types
2019-07-06 10:58:54,928 : INFO : PROGRESS: at sentence #3610000, processed 34918864 words, keeping 850243 word types
2019-07-06 10:58:54,977 : INFO : PROGRESS: at sentence #3620000, processed 35018239 words, keeping 851702 word types
2019-07-06 10:58:55,030 : INFO : PROGRESS: at sentence #3630000, processed 35117362 words, keeping 853081 word types
2019-07-06 10:58:55,079 : INFO : PROGRESS: at sentence #3640000,

2019-07-06 10:58:58,131 : INFO : PROGRESS: at sentence #4270000, processed 41464980 words, keeping 938539 word types
2019-07-06 10:58:58,179 : INFO : PROGRESS: at sentence #4280000, processed 41568173 words, keeping 939803 word types
2019-07-06 10:58:58,230 : INFO : PROGRESS: at sentence #4290000, processed 41671892 words, keeping 941077 word types
2019-07-06 10:58:58,278 : INFO : PROGRESS: at sentence #4300000, processed 41775515 words, keeping 942401 word types
2019-07-06 10:58:58,328 : INFO : PROGRESS: at sentence #4310000, processed 41878294 words, keeping 943781 word types
2019-07-06 10:58:58,378 : INFO : PROGRESS: at sentence #4320000, processed 41981059 words, keeping 945058 word types
2019-07-06 10:58:58,428 : INFO : PROGRESS: at sentence #4330000, processed 42081929 words, keeping 946331 word types
2019-07-06 10:58:58,476 : INFO : PROGRESS: at sentence #4340000, processed 42185413 words, keeping 947628 word types
2019-07-06 10:58:58,527 : INFO : PROGRESS: at sentence #4350000,

2019-07-06 10:59:01,536 : INFO : PROGRESS: at sentence #4970000, processed 48525276 words, keeping 1025892 word types
2019-07-06 10:59:01,583 : INFO : PROGRESS: at sentence #4980000, processed 48623603 words, keeping 1027015 word types
2019-07-06 10:59:01,628 : INFO : PROGRESS: at sentence #4990000, processed 48721784 words, keeping 1028147 word types
2019-07-06 10:59:01,674 : INFO : PROGRESS: at sentence #5000000, processed 48820650 words, keeping 1029291 word types
2019-07-06 10:59:01,726 : INFO : PROGRESS: at sentence #5010000, processed 48919694 words, keeping 1030453 word types
2019-07-06 10:59:01,777 : INFO : PROGRESS: at sentence #5020000, processed 49015912 words, keeping 1031664 word types
2019-07-06 10:59:01,825 : INFO : PROGRESS: at sentence #5030000, processed 49114805 words, keeping 1032739 word types
2019-07-06 10:59:01,875 : INFO : PROGRESS: at sentence #5040000, processed 49214888 words, keeping 1033813 word types
2019-07-06 10:59:01,926 : INFO : PROGRESS: at sentence #

2019-07-06 10:59:04,937 : INFO : PROGRESS: at sentence #5670000, processed 55510788 words, keeping 1106006 word types
2019-07-06 10:59:04,984 : INFO : PROGRESS: at sentence #5680000, processed 55607005 words, keeping 1107034 word types
2019-07-06 10:59:05,028 : INFO : PROGRESS: at sentence #5690000, processed 55701085 words, keeping 1108151 word types
2019-07-06 10:59:05,077 : INFO : PROGRESS: at sentence #5700000, processed 55796372 words, keeping 1109193 word types
2019-07-06 10:59:05,124 : INFO : PROGRESS: at sentence #5710000, processed 55894523 words, keeping 1110264 word types
2019-07-06 10:59:05,171 : INFO : PROGRESS: at sentence #5720000, processed 55991793 words, keeping 1111332 word types
2019-07-06 10:59:05,219 : INFO : PROGRESS: at sentence #5730000, processed 56089952 words, keeping 1112456 word types
2019-07-06 10:59:05,266 : INFO : PROGRESS: at sentence #5740000, processed 56186683 words, keeping 1113552 word types
2019-07-06 10:59:05,314 : INFO : PROGRESS: at sentence #

2019-07-06 10:59:08,205 : INFO : PROGRESS: at sentence #6370000, processed 62203029 words, keeping 1181585 word types
2019-07-06 10:59:08,252 : INFO : PROGRESS: at sentence #6380000, processed 62302854 words, keeping 1182501 word types
2019-07-06 10:59:08,299 : INFO : PROGRESS: at sentence #6390000, processed 62401570 words, keeping 1183421 word types
2019-07-06 10:59:08,348 : INFO : PROGRESS: at sentence #6400000, processed 62500855 words, keeping 1184453 word types
2019-07-06 10:59:08,396 : INFO : PROGRESS: at sentence #6410000, processed 62600552 words, keeping 1185543 word types
2019-07-06 10:59:08,446 : INFO : PROGRESS: at sentence #6420000, processed 62701173 words, keeping 1186512 word types
2019-07-06 10:59:08,494 : INFO : PROGRESS: at sentence #6430000, processed 62803158 words, keeping 1187464 word types
2019-07-06 10:59:08,541 : INFO : PROGRESS: at sentence #6440000, processed 62902217 words, keeping 1188466 word types
2019-07-06 10:59:08,590 : INFO : PROGRESS: at sentence #

2019-07-06 10:59:11,547 : INFO : PROGRESS: at sentence #7070000, processed 69035200 words, keeping 1252053 word types
2019-07-06 10:59:11,594 : INFO : PROGRESS: at sentence #7080000, processed 69130297 words, keeping 1253007 word types
2019-07-06 10:59:11,639 : INFO : PROGRESS: at sentence #7090000, processed 69223480 words, keeping 1253889 word types
2019-07-06 10:59:11,685 : INFO : PROGRESS: at sentence #7100000, processed 69317953 words, keeping 1254842 word types
2019-07-06 10:59:11,732 : INFO : PROGRESS: at sentence #7110000, processed 69415198 words, keeping 1255833 word types
2019-07-06 10:59:11,780 : INFO : PROGRESS: at sentence #7120000, processed 69513574 words, keeping 1256819 word types
2019-07-06 10:59:11,828 : INFO : PROGRESS: at sentence #7130000, processed 69611254 words, keeping 1257787 word types
2019-07-06 10:59:11,874 : INFO : PROGRESS: at sentence #7140000, processed 69707222 words, keeping 1258763 word types
2019-07-06 10:59:11,922 : INFO : PROGRESS: at sentence #

2019-07-06 10:59:14,978 : INFO : PROGRESS: at sentence #7770000, processed 75909381 words, keeping 1318646 word types
2019-07-06 10:59:15,030 : INFO : PROGRESS: at sentence #7780000, processed 76010253 words, keeping 1319488 word types
2019-07-06 10:59:15,081 : INFO : PROGRESS: at sentence #7790000, processed 76111534 words, keeping 1320375 word types
2019-07-06 10:59:15,129 : INFO : PROGRESS: at sentence #7800000, processed 76211486 words, keeping 1321392 word types
2019-07-06 10:59:15,180 : INFO : PROGRESS: at sentence #7810000, processed 76310716 words, keeping 1322475 word types
2019-07-06 10:59:15,228 : INFO : PROGRESS: at sentence #7820000, processed 76409693 words, keeping 1323457 word types
2019-07-06 10:59:15,278 : INFO : PROGRESS: at sentence #7830000, processed 76509025 words, keeping 1324473 word types
2019-07-06 10:59:15,325 : INFO : PROGRESS: at sentence #7840000, processed 76607382 words, keeping 1325463 word types
2019-07-06 10:59:15,375 : INFO : PROGRESS: at sentence #

2019-07-06 10:59:18,412 : INFO : PROGRESS: at sentence #8470000, processed 82809572 words, keeping 1386159 word types
2019-07-06 10:59:18,459 : INFO : PROGRESS: at sentence #8480000, processed 82904271 words, keeping 1387102 word types
2019-07-06 10:59:18,507 : INFO : PROGRESS: at sentence #8490000, processed 82999867 words, keeping 1388060 word types
2019-07-06 10:59:18,559 : INFO : PROGRESS: at sentence #8500000, processed 83094558 words, keeping 1389032 word types
2019-07-06 10:59:18,606 : INFO : PROGRESS: at sentence #8510000, processed 83189128 words, keeping 1389945 word types
2019-07-06 10:59:18,652 : INFO : PROGRESS: at sentence #8520000, processed 83282635 words, keeping 1390838 word types
2019-07-06 10:59:18,698 : INFO : PROGRESS: at sentence #8530000, processed 83374490 words, keeping 1391762 word types
2019-07-06 10:59:18,745 : INFO : PROGRESS: at sentence #8540000, processed 83466286 words, keeping 1392703 word types
2019-07-06 10:59:18,790 : INFO : PROGRESS: at sentence #

2019-07-06 10:59:21,703 : INFO : PROGRESS: at sentence #9170000, processed 89285232 words, keeping 1451251 word types
2019-07-06 10:59:21,751 : INFO : PROGRESS: at sentence #9180000, processed 89380235 words, keeping 1452107 word types
2019-07-06 10:59:21,802 : INFO : PROGRESS: at sentence #9190000, processed 89477117 words, keeping 1452964 word types
2019-07-06 10:59:21,850 : INFO : PROGRESS: at sentence #9200000, processed 89575575 words, keeping 1453890 word types
2019-07-06 10:59:21,908 : INFO : PROGRESS: at sentence #9210000, processed 89672559 words, keeping 1454738 word types
2019-07-06 10:59:21,955 : INFO : PROGRESS: at sentence #9220000, processed 89770833 words, keeping 1455595 word types
2019-07-06 10:59:22,004 : INFO : PROGRESS: at sentence #9230000, processed 89870226 words, keeping 1456390 word types
2019-07-06 10:59:22,055 : INFO : PROGRESS: at sentence #9240000, processed 89969808 words, keeping 1457193 word types
2019-07-06 10:59:22,104 : INFO : PROGRESS: at sentence #

2019-07-06 10:59:24,986 : INFO : PROGRESS: at sentence #9870000, processed 96004917 words, keeping 1514911 word types
2019-07-06 10:59:25,033 : INFO : PROGRESS: at sentence #9880000, processed 96102008 words, keeping 1515861 word types
2019-07-06 10:59:25,077 : INFO : PROGRESS: at sentence #9890000, processed 96196101 words, keeping 1516875 word types
2019-07-06 10:59:25,123 : INFO : PROGRESS: at sentence #9900000, processed 96291175 words, keeping 1517864 word types
2019-07-06 10:59:25,170 : INFO : PROGRESS: at sentence #9910000, processed 96388460 words, keeping 1518948 word types
2019-07-06 10:59:25,215 : INFO : PROGRESS: at sentence #9920000, processed 96481603 words, keeping 1519991 word types
2019-07-06 10:59:25,261 : INFO : PROGRESS: at sentence #9930000, processed 96576704 words, keeping 1521064 word types
2019-07-06 10:59:25,309 : INFO : PROGRESS: at sentence #9940000, processed 96674355 words, keeping 1522098 word types
2019-07-06 10:59:25,357 : INFO : PROGRESS: at sentence #

2019-07-06 10:59:28,250 : INFO : PROGRESS: at sentence #10560000, processed 102606905 words, keeping 1576612 word types
2019-07-06 10:59:28,296 : INFO : PROGRESS: at sentence #10570000, processed 102702567 words, keeping 1577476 word types
2019-07-06 10:59:28,343 : INFO : PROGRESS: at sentence #10580000, processed 102797396 words, keeping 1578351 word types
2019-07-06 10:59:28,389 : INFO : PROGRESS: at sentence #10590000, processed 102891834 words, keeping 1579282 word types
2019-07-06 10:59:28,437 : INFO : PROGRESS: at sentence #10600000, processed 102986390 words, keeping 1580138 word types
2019-07-06 10:59:28,486 : INFO : PROGRESS: at sentence #10610000, processed 103084646 words, keeping 1580969 word types
2019-07-06 10:59:28,533 : INFO : PROGRESS: at sentence #10620000, processed 103180341 words, keeping 1581750 word types
2019-07-06 10:59:28,580 : INFO : PROGRESS: at sentence #10630000, processed 103274833 words, keeping 1582533 word types
2019-07-06 10:59:28,629 : INFO : PROGRES

2019-07-06 10:59:31,483 : INFO : PROGRESS: at sentence #11250000, processed 109154721 words, keeping 1638430 word types
2019-07-06 10:59:31,530 : INFO : PROGRESS: at sentence #11260000, processed 109251155 words, keeping 1639412 word types
2019-07-06 10:59:31,575 : INFO : PROGRESS: at sentence #11270000, processed 109346147 words, keeping 1640374 word types
2019-07-06 10:59:31,623 : INFO : PROGRESS: at sentence #11280000, processed 109441538 words, keeping 1641315 word types
2019-07-06 10:59:31,669 : INFO : PROGRESS: at sentence #11290000, processed 109534608 words, keeping 1642267 word types
2019-07-06 10:59:31,714 : INFO : PROGRESS: at sentence #11300000, processed 109626733 words, keeping 1643231 word types
2019-07-06 10:59:31,760 : INFO : PROGRESS: at sentence #11310000, processed 109719845 words, keeping 1644169 word types
2019-07-06 10:59:31,805 : INFO : PROGRESS: at sentence #11320000, processed 109813545 words, keeping 1645127 word types
2019-07-06 10:59:31,850 : INFO : PROGRES

2019-07-06 10:59:34,759 : INFO : PROGRESS: at sentence #11940000, processed 115841478 words, keeping 1699989 word types
2019-07-06 10:59:34,807 : INFO : PROGRESS: at sentence #11950000, processed 115942635 words, keeping 1700988 word types
2019-07-06 10:59:34,859 : INFO : PROGRESS: at sentence #11960000, processed 116043980 words, keeping 1702043 word types
2019-07-06 10:59:34,907 : INFO : PROGRESS: at sentence #11970000, processed 116143661 words, keeping 1703109 word types
2019-07-06 10:59:34,957 : INFO : PROGRESS: at sentence #11980000, processed 116244703 words, keeping 1704093 word types
2019-07-06 10:59:35,005 : INFO : PROGRESS: at sentence #11990000, processed 116343337 words, keeping 1704940 word types
2019-07-06 10:59:35,054 : INFO : PROGRESS: at sentence #12000000, processed 116441841 words, keeping 1705775 word types
2019-07-06 10:59:35,100 : INFO : PROGRESS: at sentence #12010000, processed 116538928 words, keeping 1706590 word types
2019-07-06 10:59:35,149 : INFO : PROGRES

2019-07-06 10:59:38,114 : INFO : PROGRESS: at sentence #12630000, processed 122598017 words, keeping 1761715 word types
2019-07-06 10:59:38,167 : INFO : PROGRESS: at sentence #12640000, processed 122700678 words, keeping 1762468 word types
2019-07-06 10:59:38,216 : INFO : PROGRESS: at sentence #12650000, processed 122800259 words, keeping 1763297 word types
2019-07-06 10:59:38,266 : INFO : PROGRESS: at sentence #12660000, processed 122900785 words, keeping 1764112 word types
2019-07-06 10:59:38,314 : INFO : PROGRESS: at sentence #12670000, processed 122999604 words, keeping 1765094 word types
2019-07-06 10:59:38,363 : INFO : PROGRESS: at sentence #12680000, processed 123098977 words, keeping 1766136 word types
2019-07-06 10:59:38,412 : INFO : PROGRESS: at sentence #12690000, processed 123197184 words, keeping 1767258 word types
2019-07-06 10:59:38,460 : INFO : PROGRESS: at sentence #12700000, processed 123295377 words, keeping 1768378 word types
2019-07-06 10:59:38,508 : INFO : PROGRES

2019-07-06 10:59:41,444 : INFO : PROGRESS: at sentence #13320000, processed 129325171 words, keeping 1826916 word types
2019-07-06 10:59:41,490 : INFO : PROGRESS: at sentence #13330000, processed 129420674 words, keeping 1827695 word types
2019-07-06 10:59:41,537 : INFO : PROGRESS: at sentence #13340000, processed 129513554 words, keeping 1828526 word types
2019-07-06 10:59:41,585 : INFO : PROGRESS: at sentence #13350000, processed 129610443 words, keeping 1829378 word types
2019-07-06 10:59:41,631 : INFO : PROGRESS: at sentence #13360000, processed 129707380 words, keeping 1830268 word types
2019-07-06 10:59:41,680 : INFO : PROGRESS: at sentence #13370000, processed 129802457 words, keeping 1831081 word types
2019-07-06 10:59:41,726 : INFO : PROGRESS: at sentence #13380000, processed 129894732 words, keeping 1831910 word types
2019-07-06 10:59:41,774 : INFO : PROGRESS: at sentence #13390000, processed 129991909 words, keeping 1832917 word types
2019-07-06 10:59:41,824 : INFO : PROGRES

2019-07-06 10:59:44,773 : INFO : PROGRESS: at sentence #14010000, processed 136070958 words, keeping 1887448 word types
2019-07-06 10:59:44,825 : INFO : PROGRESS: at sentence #14020000, processed 136166863 words, keeping 1888284 word types
2019-07-06 10:59:44,874 : INFO : PROGRESS: at sentence #14030000, processed 136263379 words, keeping 1889143 word types
2019-07-06 10:59:44,922 : INFO : PROGRESS: at sentence #14040000, processed 136359728 words, keeping 1890046 word types
2019-07-06 10:59:44,968 : INFO : PROGRESS: at sentence #14050000, processed 136455220 words, keeping 1890886 word types
2019-07-06 10:59:45,016 : INFO : PROGRESS: at sentence #14060000, processed 136550414 words, keeping 1891731 word types
2019-07-06 10:59:45,062 : INFO : PROGRESS: at sentence #14070000, processed 136646325 words, keeping 1892609 word types
2019-07-06 10:59:45,110 : INFO : PROGRESS: at sentence #14080000, processed 136739730 words, keeping 1893452 word types
2019-07-06 10:59:45,156 : INFO : PROGRES

2019-07-06 10:59:48,144 : INFO : PROGRESS: at sentence #14700000, processed 143006905 words, keeping 1943821 word types
2019-07-06 10:59:48,195 : INFO : PROGRESS: at sentence #14710000, processed 143109019 words, keeping 1944582 word types
2019-07-06 10:59:48,243 : INFO : PROGRESS: at sentence #14720000, processed 143208086 words, keeping 1945346 word types
2019-07-06 10:59:48,292 : INFO : PROGRESS: at sentence #14730000, processed 143311173 words, keeping 1946104 word types
2019-07-06 10:59:48,339 : INFO : PROGRESS: at sentence #14740000, processed 143411355 words, keeping 1946950 word types
2019-07-06 10:59:48,389 : INFO : PROGRESS: at sentence #14750000, processed 143512200 words, keeping 1947724 word types
2019-07-06 10:59:48,437 : INFO : PROGRESS: at sentence #14760000, processed 143614032 words, keeping 1948501 word types
2019-07-06 10:59:48,485 : INFO : PROGRESS: at sentence #14770000, processed 143714602 words, keeping 1949214 word types
2019-07-06 10:59:48,533 : INFO : PROGRES

2019-07-06 10:59:51,515 : INFO : PROGRESS: at sentence #15390000, processed 150023182 words, keeping 1994076 word types
2019-07-06 10:59:51,566 : INFO : PROGRESS: at sentence #15400000, processed 150128098 words, keeping 1994769 word types
2019-07-06 10:59:51,614 : INFO : PROGRESS: at sentence #15410000, processed 150236432 words, keeping 1995359 word types
2019-07-06 10:59:51,663 : INFO : PROGRESS: at sentence #15420000, processed 150342291 words, keeping 1996092 word types
2019-07-06 10:59:51,714 : INFO : PROGRESS: at sentence #15430000, processed 150446301 words, keeping 1996796 word types
2019-07-06 10:59:51,762 : INFO : PROGRESS: at sentence #15440000, processed 150548098 words, keeping 1997480 word types
2019-07-06 10:59:51,810 : INFO : PROGRESS: at sentence #15450000, processed 150652984 words, keeping 1998217 word types
2019-07-06 10:59:51,859 : INFO : PROGRESS: at sentence #15460000, processed 150757792 words, keeping 1998920 word types
2019-07-06 10:59:51,910 : INFO : PROGRES

2019-07-06 10:59:54,941 : INFO : PROGRESS: at sentence #16080000, processed 157120810 words, keeping 2042640 word types
2019-07-06 10:59:54,987 : INFO : PROGRESS: at sentence #16090000, processed 157215938 words, keeping 2043071 word types
2019-07-06 10:59:55,033 : INFO : PROGRESS: at sentence #16100000, processed 157312632 words, keeping 2043552 word types
2019-07-06 10:59:55,082 : INFO : PROGRESS: at sentence #16110000, processed 157412784 words, keeping 2043984 word types
2019-07-06 10:59:55,130 : INFO : PROGRESS: at sentence #16120000, processed 157510619 words, keeping 2044459 word types
2019-07-06 10:59:55,178 : INFO : PROGRESS: at sentence #16130000, processed 157611753 words, keeping 2044961 word types
2019-07-06 10:59:55,228 : INFO : PROGRESS: at sentence #16140000, processed 157717081 words, keeping 2045650 word types
2019-07-06 10:59:55,278 : INFO : PROGRESS: at sentence #16150000, processed 157822616 words, keeping 2046290 word types
2019-07-06 10:59:55,328 : INFO : PROGRES

2019-07-06 10:59:58,333 : INFO : PROGRESS: at sentence #16770000, processed 164238020 words, keeping 2089234 word types
2019-07-06 10:59:58,385 : INFO : PROGRESS: at sentence #16780000, processed 164350429 words, keeping 2089880 word types
2019-07-06 10:59:58,436 : INFO : PROGRESS: at sentence #16790000, processed 164462354 words, keeping 2090593 word types
2019-07-06 10:59:58,492 : INFO : PROGRESS: at sentence #16800000, processed 164574381 words, keeping 2091332 word types
2019-07-06 10:59:58,543 : INFO : PROGRESS: at sentence #16810000, processed 164689265 words, keeping 2092032 word types
2019-07-06 10:59:58,595 : INFO : PROGRESS: at sentence #16820000, processed 164799490 words, keeping 2092718 word types
2019-07-06 10:59:58,655 : INFO : PROGRESS: at sentence #16830000, processed 164916238 words, keeping 2093339 word types
2019-07-06 10:59:58,707 : INFO : PROGRESS: at sentence #16840000, processed 165023237 words, keeping 2094001 word types
2019-07-06 10:59:58,757 : INFO : PROGRES

2019-07-06 11:00:01,850 : INFO : PROGRESS: at sentence #17460000, processed 171453734 words, keeping 2135864 word types
2019-07-06 11:00:01,900 : INFO : PROGRESS: at sentence #17470000, processed 171557228 words, keeping 2136414 word types
2019-07-06 11:00:01,949 : INFO : PROGRESS: at sentence #17480000, processed 171663024 words, keeping 2137015 word types
2019-07-06 11:00:01,999 : INFO : PROGRESS: at sentence #17490000, processed 171770139 words, keeping 2137569 word types
2019-07-06 11:00:02,049 : INFO : PROGRESS: at sentence #17500000, processed 171875301 words, keeping 2138118 word types
2019-07-06 11:00:02,099 : INFO : PROGRESS: at sentence #17510000, processed 171980078 words, keeping 2138687 word types
2019-07-06 11:00:02,148 : INFO : PROGRESS: at sentence #17520000, processed 172081530 words, keeping 2139410 word types
2019-07-06 11:00:02,201 : INFO : PROGRESS: at sentence #17530000, processed 172185323 words, keeping 2140096 word types
2019-07-06 11:00:02,249 : INFO : PROGRES

2019-07-06 11:01:01,215 : INFO : EPOCH 1 - PROGRESS: at 24.98% examples, 745773 words/s, in_qsize 24, out_qsize 0
2019-07-06 11:01:02,227 : INFO : EPOCH 1 - PROGRESS: at 25.48% examples, 745267 words/s, in_qsize 12, out_qsize 0
2019-07-06 11:01:03,233 : INFO : EPOCH 1 - PROGRESS: at 26.02% examples, 745637 words/s, in_qsize 19, out_qsize 0
2019-07-06 11:01:04,241 : INFO : EPOCH 1 - PROGRESS: at 26.61% examples, 747865 words/s, in_qsize 0, out_qsize 1
2019-07-06 11:01:05,242 : INFO : EPOCH 1 - PROGRESS: at 27.10% examples, 747137 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:01:06,258 : INFO : EPOCH 1 - PROGRESS: at 27.59% examples, 745887 words/s, in_qsize 19, out_qsize 0
2019-07-06 11:01:07,292 : INFO : EPOCH 1 - PROGRESS: at 28.11% examples, 744985 words/s, in_qsize 13, out_qsize 4
2019-07-06 11:01:08,298 : INFO : EPOCH 1 - PROGRESS: at 28.68% examples, 745943 words/s, in_qsize 15, out_qsize 0
2019-07-06 11:01:09,302 : INFO : EPOCH 1 - PROGRESS: at 29.26% examples, 748087 words/s, i

2019-07-06 11:02:15,255 : INFO : EPOCH 1 - PROGRESS: at 64.78% examples, 744140 words/s, in_qsize 6, out_qsize 1
2019-07-06 11:02:16,261 : INFO : EPOCH 1 - PROGRESS: at 65.29% examples, 743479 words/s, in_qsize 22, out_qsize 0
2019-07-06 11:02:17,269 : INFO : EPOCH 1 - PROGRESS: at 65.91% examples, 744333 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:02:18,289 : INFO : EPOCH 1 - PROGRESS: at 66.37% examples, 743464 words/s, in_qsize 20, out_qsize 0
2019-07-06 11:02:19,303 : INFO : EPOCH 1 - PROGRESS: at 66.93% examples, 743687 words/s, in_qsize 17, out_qsize 1
2019-07-06 11:02:20,312 : INFO : EPOCH 1 - PROGRESS: at 67.42% examples, 743180 words/s, in_qsize 24, out_qsize 0
2019-07-06 11:02:21,314 : INFO : EPOCH 1 - PROGRESS: at 67.97% examples, 743528 words/s, in_qsize 20, out_qsize 1
2019-07-06 11:02:22,321 : INFO : EPOCH 1 - PROGRESS: at 68.58% examples, 744461 words/s, in_qsize 1, out_qsize 0
2019-07-06 11:02:23,333 : INFO : EPOCH 1 - PROGRESS: at 69.09% examples, 744059 words/s, in

2019-07-06 11:03:23,074 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-07-06 11:03:23,075 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-06 11:03:23,075 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-06 11:03:23,079 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-06 11:03:23,090 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-06 11:03:23,091 : INFO : EPOCH - 1 : training on 173212741 raw words (140584979 effective words) took 188.4s, 746172 effective words/s
2019-07-06 11:03:24,103 : INFO : EPOCH 2 - PROGRESS: at 0.55% examples, 753300 words/s, in_qsize 0, out_qsize 1
2019-07-06 11:03:25,164 : INFO : EPOCH 2 - PROGRESS: at 1.11% examples, 743419 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:03:26,168 : INFO : EPOCH 2 - PROGRESS: at 1.68% examples, 741008 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:03:27,203 : INFO : EPOCH 2 - PROGRESS: at 2.23% exam

2019-07-06 11:04:33,089 : INFO : EPOCH 2 - PROGRESS: at 36.97% examples, 736973 words/s, in_qsize 11, out_qsize 0
2019-07-06 11:04:34,096 : INFO : EPOCH 2 - PROGRESS: at 37.48% examples, 736706 words/s, in_qsize 22, out_qsize 1
2019-07-06 11:04:35,098 : INFO : EPOCH 2 - PROGRESS: at 38.12% examples, 738576 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:04:36,107 : INFO : EPOCH 2 - PROGRESS: at 38.62% examples, 737985 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:04:37,117 : INFO : EPOCH 2 - PROGRESS: at 39.15% examples, 738204 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:04:38,125 : INFO : EPOCH 2 - PROGRESS: at 39.67% examples, 738164 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:04:39,139 : INFO : EPOCH 2 - PROGRESS: at 40.13% examples, 736568 words/s, in_qsize 17, out_qsize 0
2019-07-06 11:04:40,144 : INFO : EPOCH 2 - PROGRESS: at 40.74% examples, 737788 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:04:41,161 : INFO : EPOCH 2 - PROGRESS: at 41.20% examples, 736631 words/s, in_q

2019-07-06 11:05:46,887 : INFO : EPOCH 2 - PROGRESS: at 75.20% examples, 725412 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:05:47,902 : INFO : EPOCH 2 - PROGRESS: at 75.75% examples, 725454 words/s, in_qsize 1, out_qsize 0
2019-07-06 11:05:48,923 : INFO : EPOCH 2 - PROGRESS: at 76.24% examples, 725061 words/s, in_qsize 0, out_qsize 1
2019-07-06 11:05:49,924 : INFO : EPOCH 2 - PROGRESS: at 76.79% examples, 725481 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:05:50,930 : INFO : EPOCH 2 - PROGRESS: at 77.31% examples, 725539 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:05:51,946 : INFO : EPOCH 2 - PROGRESS: at 77.78% examples, 725168 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:05:52,948 : INFO : EPOCH 2 - PROGRESS: at 78.33% examples, 725489 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:05:53,957 : INFO : EPOCH 2 - PROGRESS: at 78.85% examples, 725246 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:05:54,968 : INFO : EPOCH 2 - PROGRESS: at 79.38% examples, 725311 words/s, in_qsiz

2019-07-06 11:06:49,817 : INFO : EPOCH 3 - PROGRESS: at 7.58% examples, 736207 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:06:50,821 : INFO : EPOCH 3 - PROGRESS: at 8.10% examples, 735543 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:06:51,832 : INFO : EPOCH 3 - PROGRESS: at 8.58% examples, 731158 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:06:52,837 : INFO : EPOCH 3 - PROGRESS: at 9.11% examples, 731123 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:06:53,840 : INFO : EPOCH 3 - PROGRESS: at 9.57% examples, 725636 words/s, in_qsize 8, out_qsize 0
2019-07-06 11:06:54,851 : INFO : EPOCH 3 - PROGRESS: at 10.14% examples, 729598 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:06:55,853 : INFO : EPOCH 3 - PROGRESS: at 10.69% examples, 731184 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:06:56,859 : INFO : EPOCH 3 - PROGRESS: at 11.18% examples, 728650 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:06:57,861 : INFO : EPOCH 3 - PROGRESS: at 11.71% examples, 728336 words/s, in_qsize 0, 

2019-07-06 11:08:03,661 : INFO : EPOCH 3 - PROGRESS: at 45.91% examples, 730529 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:08:04,687 : INFO : EPOCH 3 - PROGRESS: at 46.40% examples, 729791 words/s, in_qsize 1, out_qsize 1
2019-07-06 11:08:05,695 : INFO : EPOCH 3 - PROGRESS: at 46.96% examples, 730190 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:08:06,701 : INFO : EPOCH 3 - PROGRESS: at 47.43% examples, 729490 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:08:07,704 : INFO : EPOCH 3 - PROGRESS: at 47.96% examples, 729556 words/s, in_qsize 1, out_qsize 0
2019-07-06 11:08:08,708 : INFO : EPOCH 3 - PROGRESS: at 48.49% examples, 729298 words/s, in_qsize 1, out_qsize 0
2019-07-06 11:08:09,718 : INFO : EPOCH 3 - PROGRESS: at 48.98% examples, 728216 words/s, in_qsize 1, out_qsize 1
2019-07-06 11:08:10,725 : INFO : EPOCH 3 - PROGRESS: at 49.49% examples, 727631 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:08:11,730 : INFO : EPOCH 3 - PROGRESS: at 49.98% examples, 726501 words/s, in_qsiz

2019-07-06 11:09:17,318 : INFO : EPOCH 3 - PROGRESS: at 84.09% examples, 723645 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:09:18,324 : INFO : EPOCH 3 - PROGRESS: at 84.56% examples, 723224 words/s, in_qsize 0, out_qsize 2
2019-07-06 11:09:19,335 : INFO : EPOCH 3 - PROGRESS: at 85.06% examples, 723167 words/s, in_qsize 21, out_qsize 0
2019-07-06 11:09:20,339 : INFO : EPOCH 3 - PROGRESS: at 85.53% examples, 722934 words/s, in_qsize 20, out_qsize 1
2019-07-06 11:09:21,339 : INFO : EPOCH 3 - PROGRESS: at 86.11% examples, 723688 words/s, in_qsize 3, out_qsize 0
2019-07-06 11:09:22,360 : INFO : EPOCH 3 - PROGRESS: at 86.67% examples, 724132 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:09:23,362 : INFO : EPOCH 3 - PROGRESS: at 87.15% examples, 724127 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:09:24,365 : INFO : EPOCH 3 - PROGRESS: at 87.65% examples, 724301 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:09:25,374 : INFO : EPOCH 3 - PROGRESS: at 88.11% examples, 723915 words/s, in_qs

2019-07-06 11:10:20,422 : INFO : EPOCH 4 - PROGRESS: at 16.67% examples, 736962 words/s, in_qsize 0, out_qsize 1
2019-07-06 11:10:21,443 : INFO : EPOCH 4 - PROGRESS: at 17.14% examples, 733334 words/s, in_qsize 20, out_qsize 0
2019-07-06 11:10:22,460 : INFO : EPOCH 4 - PROGRESS: at 17.73% examples, 735222 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:10:23,465 : INFO : EPOCH 4 - PROGRESS: at 18.29% examples, 735875 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:10:24,484 : INFO : EPOCH 4 - PROGRESS: at 18.74% examples, 732745 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:10:25,487 : INFO : EPOCH 4 - PROGRESS: at 19.26% examples, 731718 words/s, in_qsize 7, out_qsize 1
2019-07-06 11:10:26,488 : INFO : EPOCH 4 - PROGRESS: at 19.76% examples, 730533 words/s, in_qsize 17, out_qsize 2
2019-07-06 11:10:27,505 : INFO : EPOCH 4 - PROGRESS: at 20.35% examples, 731559 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:10:28,505 : INFO : EPOCH 4 - PROGRESS: at 20.88% examples, 732017 words/s, in_qs

2019-07-06 11:11:34,192 : INFO : EPOCH 4 - PROGRESS: at 54.79% examples, 725554 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:11:35,209 : INFO : EPOCH 4 - PROGRESS: at 55.34% examples, 725118 words/s, in_qsize 23, out_qsize 1
2019-07-06 11:11:36,211 : INFO : EPOCH 4 - PROGRESS: at 55.94% examples, 725865 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:11:37,221 : INFO : EPOCH 4 - PROGRESS: at 56.46% examples, 725723 words/s, in_qsize 0, out_qsize 2
2019-07-06 11:11:38,230 : INFO : EPOCH 4 - PROGRESS: at 57.06% examples, 726511 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:11:39,246 : INFO : EPOCH 4 - PROGRESS: at 57.55% examples, 725725 words/s, in_qsize 17, out_qsize 0
2019-07-06 11:11:40,259 : INFO : EPOCH 4 - PROGRESS: at 58.12% examples, 726223 words/s, in_qsize 16, out_qsize 0
2019-07-06 11:11:41,267 : INFO : EPOCH 4 - PROGRESS: at 58.63% examples, 725965 words/s, in_qsize 21, out_qsize 0
2019-07-06 11:11:42,273 : INFO : EPOCH 4 - PROGRESS: at 59.23% examples, 726665 words/s, in_

2019-07-06 11:12:47,886 : INFO : EPOCH 4 - PROGRESS: at 93.70% examples, 733470 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:12:48,895 : INFO : EPOCH 4 - PROGRESS: at 94.25% examples, 733779 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:12:49,909 : INFO : EPOCH 4 - PROGRESS: at 94.78% examples, 734145 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:12:50,933 : INFO : EPOCH 4 - PROGRESS: at 95.19% examples, 733579 words/s, in_qsize 22, out_qsize 0
2019-07-06 11:12:51,944 : INFO : EPOCH 4 - PROGRESS: at 95.77% examples, 734453 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:12:52,962 : INFO : EPOCH 4 - PROGRESS: at 96.27% examples, 734329 words/s, in_qsize 0, out_qsize 1
2019-07-06 11:12:53,963 : INFO : EPOCH 4 - PROGRESS: at 96.72% examples, 733984 words/s, in_qsize 19, out_qsize 0
2019-07-06 11:12:54,968 : INFO : EPOCH 4 - PROGRESS: at 97.30% examples, 734729 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:12:55,975 : INFO : EPOCH 4 - PROGRESS: at 97.76% examples, 734530 words/s, in_qs

2019-07-06 11:13:50,872 : INFO : EPOCH 5 - PROGRESS: at 26.65% examples, 733898 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:13:51,878 : INFO : EPOCH 5 - PROGRESS: at 27.17% examples, 734341 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:13:52,895 : INFO : EPOCH 5 - PROGRESS: at 27.69% examples, 733925 words/s, in_qsize 5, out_qsize 2
2019-07-06 11:13:53,897 : INFO : EPOCH 5 - PROGRESS: at 28.24% examples, 734478 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:13:54,917 : INFO : EPOCH 5 - PROGRESS: at 28.72% examples, 733360 words/s, in_qsize 7, out_qsize 7
2019-07-06 11:13:55,922 : INFO : EPOCH 5 - PROGRESS: at 29.22% examples, 733484 words/s, in_qsize 17, out_qsize 0
2019-07-06 11:13:56,929 : INFO : EPOCH 5 - PROGRESS: at 29.82% examples, 735578 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:13:57,929 : INFO : EPOCH 5 - PROGRESS: at 30.34% examples, 735936 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:13:58,932 : INFO : EPOCH 5 - PROGRESS: at 30.79% examples, 734514 words/s, in_qsi

2019-07-06 11:15:04,689 : INFO : EPOCH 5 - PROGRESS: at 65.62% examples, 730193 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:15:05,690 : INFO : EPOCH 5 - PROGRESS: at 66.05% examples, 729255 words/s, in_qsize 18, out_qsize 1
2019-07-06 11:15:06,695 : INFO : EPOCH 5 - PROGRESS: at 66.67% examples, 730247 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:15:07,709 : INFO : EPOCH 5 - PROGRESS: at 67.15% examples, 729789 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:15:08,719 : INFO : EPOCH 5 - PROGRESS: at 67.70% examples, 730107 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:15:09,733 : INFO : EPOCH 5 - PROGRESS: at 68.23% examples, 730325 words/s, in_qsize 1, out_qsize 0
2019-07-06 11:15:10,751 : INFO : EPOCH 5 - PROGRESS: at 68.68% examples, 729504 words/s, in_qsize 9, out_qsize 1
2019-07-06 11:15:11,756 : INFO : EPOCH 5 - PROGRESS: at 69.31% examples, 730408 words/s, in_qsize 0, out_qsize 0
2019-07-06 11:15:12,757 : INFO : EPOCH 5 - PROGRESS: at 69.79% examples, 729908 words/s, in_qsi

2019-07-06 11:16:11,313 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-07-06 11:16:11,314 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-07-06 11:16:11,316 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-07-06 11:16:11,323 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-07-06 11:16:11,324 : INFO : EPOCH - 5 : training on 173212741 raw words (140583772 effective words) took 191.0s, 735925 effective words/s
2019-07-06 11:16:11,324 : INFO : training on a 866063705 raw words (702925361 effective words) took 956.6s, 734781 effective words/s
2019-07-06 11:16:11,325 : INFO : saving Word2Vec object under models/w2v/model.w2v, separately None
2019-07-06 11:16:11,326 : INFO : storing np array 'vectors' to models/w2v/model.w2v.wv.vectors.npy


FileNotFoundError: [Errno 2] No such file or directory: 'models/w2v/model.w2v.wv.vectors.npy'

In [21]:
model.save('model.w2v')

2019-07-06 11:16:50,199 : INFO : saving Word2Vec object under model.w2v, separately None
2019-07-06 11:16:50,199 : INFO : storing np array 'vectors' to model.w2v.wv.vectors.npy
2019-07-06 11:16:51,949 : INFO : not storing attribute vectors_norm
2019-07-06 11:16:51,950 : INFO : storing np array 'syn1neg' to model.w2v.trainables.syn1neg.npy
  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL
2019-07-06 11:16:55,013 : INFO : saved model.w2v
