In [1]:
import gzip
import gensim 
import logging

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)



In [2]:
data_file = 'reviews_data.txt.gz'

with gzip.open('reviews_data.txt.gz', 'rb') as f:
    for i, line in enumerate(f):
        print(line)
        break

b"Oct 12 2009 \tNice trendy hotel location not too bad.\tI stayed in this hotel for one night. As this is a fairly new place some of the taxi drivers did not know where it was and/or did not want to drive there. Once I have eventually arrived at the hotel, I was very pleasantly surprised with the decor of the lobby/ground floor area. It was very stylish and modern. I found the reception's staff geeting me with 'Aloha' a bit out of place, but I guess they are briefed to say that to keep up the coroporate image.As I have a Starwood Preferred Guest member, I was given a small gift upon-check in. It was only a couple of fridge magnets in a gift box, but nevertheless a nice gesture.My room was nice and roomy, there are tea and coffee facilities in each room and you get two complimentary bottles of water plus some toiletries by 'bliss'.The location is not great. It is at the last metro stop and you then need to take a taxi, but if you are not planning on going to see the historic sites in Be

In [9]:
def read_input(input_file):

    logging.info("reading file {0}...this may take a while".format(input_file))
    
    with gzip.open(input_file, 'rb') as f:
        for i, line in enumerate(f):
            
            if (i%10000 == 0 ):
                logging.info("read {0} reviews".format (i))
            yield gensim.utils.simple_preprocess(line)

In [10]:
documents = list(read_input(data_file))
logging.info("Done reading the Data File.")

2019-01-30 14:51:40,025 : INFO : reading file reviews_data.txt.gz...this may take a while
2019-01-30 14:51:40,030 : INFO : read 0 reviews
2019-01-30 14:51:44,481 : INFO : read 10000 reviews
2019-01-30 14:51:49,021 : INFO : read 20000 reviews
2019-01-30 14:51:54,263 : INFO : read 30000 reviews
2019-01-30 14:51:59,028 : INFO : read 40000 reviews
2019-01-30 14:52:05,110 : INFO : read 50000 reviews
2019-01-30 14:52:10,468 : INFO : read 60000 reviews
2019-01-30 14:52:14,797 : INFO : read 70000 reviews
2019-01-30 14:52:18,844 : INFO : read 80000 reviews
2019-01-30 14:52:23,032 : INFO : read 90000 reviews
2019-01-30 14:52:27,087 : INFO : read 100000 reviews
2019-01-30 14:52:31,521 : INFO : read 110000 reviews
2019-01-30 14:52:35,556 : INFO : read 120000 reviews
2019-01-30 14:52:39,704 : INFO : read 130000 reviews
2019-01-30 14:52:44,107 : INFO : read 140000 reviews
2019-01-30 14:52:48,165 : INFO : read 150000 reviews
2019-01-30 14:52:52,342 : INFO : read 160000 reviews
2019-01-30 14:52:56,408

In [11]:
model = gensim.models.Word2Vec(documents, size=150, window=10, min_count=2, workers=10)
model.train(documents,total_examples=len(documents),epochs=10)

2019-01-30 14:53:43,386 : INFO : collecting all words and their counts
2019-01-30 14:53:43,389 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2019-01-30 14:53:43,931 : INFO : PROGRESS: at sentence #10000, processed 1655714 words, keeping 25777 word types
2019-01-30 14:53:44,450 : INFO : PROGRESS: at sentence #20000, processed 3317863 words, keeping 35016 word types
2019-01-30 14:53:45,083 : INFO : PROGRESS: at sentence #30000, processed 5264072 words, keeping 47518 word types
2019-01-30 14:53:45,685 : INFO : PROGRESS: at sentence #40000, processed 7081746 words, keeping 56675 word types
2019-01-30 14:53:46,333 : INFO : PROGRESS: at sentence #50000, processed 9089491 words, keeping 63744 word types
2019-01-30 14:53:46,953 : INFO : PROGRESS: at sentence #60000, processed 11013723 words, keeping 76781 word types
2019-01-30 14:53:47,476 : INFO : PROGRESS: at sentence #70000, processed 12637525 words, keeping 83194 word types
2019-01-30 14:53:47,949 : INFO : PROG

2019-01-30 14:54:39,167 : INFO : EPOCH 1 - PROGRESS: at 41.89% examples, 350521 words/s, in_qsize 20, out_qsize 0
2019-01-30 14:54:40,191 : INFO : EPOCH 1 - PROGRESS: at 43.06% examples, 350158 words/s, in_qsize 20, out_qsize 2
2019-01-30 14:54:41,226 : INFO : EPOCH 1 - PROGRESS: at 44.41% examples, 349945 words/s, in_qsize 17, out_qsize 2
2019-01-30 14:54:42,248 : INFO : EPOCH 1 - PROGRESS: at 45.71% examples, 350013 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:54:43,250 : INFO : EPOCH 1 - PROGRESS: at 46.99% examples, 350726 words/s, in_qsize 20, out_qsize 0
2019-01-30 14:54:44,251 : INFO : EPOCH 1 - PROGRESS: at 48.20% examples, 350803 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:54:45,261 : INFO : EPOCH 1 - PROGRESS: at 49.55% examples, 351240 words/s, in_qsize 20, out_qsize 0
2019-01-30 14:54:46,274 : INFO : EPOCH 1 - PROGRESS: at 50.83% examples, 351506 words/s, in_qsize 20, out_qsize 2
2019-01-30 14:54:47,282 : INFO : EPOCH 1 - PROGRESS: at 52.08% examples, 352047 words/s,

2019-01-30 14:55:43,428 : INFO : EPOCH 2 - PROGRESS: at 18.17% examples, 352581 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:55:44,439 : INFO : EPOCH 2 - PROGRESS: at 19.11% examples, 352580 words/s, in_qsize 20, out_qsize 0
2019-01-30 14:55:45,545 : INFO : EPOCH 2 - PROGRESS: at 20.05% examples, 351354 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:55:46,553 : INFO : EPOCH 2 - PROGRESS: at 20.89% examples, 350584 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:55:47,576 : INFO : EPOCH 2 - PROGRESS: at 22.03% examples, 348438 words/s, in_qsize 17, out_qsize 2
2019-01-30 14:55:48,597 : INFO : EPOCH 2 - PROGRESS: at 23.02% examples, 349384 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:55:49,681 : INFO : EPOCH 2 - PROGRESS: at 23.96% examples, 349686 words/s, in_qsize 20, out_qsize 0
2019-01-30 14:55:50,711 : INFO : EPOCH 2 - PROGRESS: at 24.89% examples, 347631 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:55:51,801 : INFO : EPOCH 2 - PROGRESS: at 26.14% examples, 346076 words/s,

2019-01-30 14:56:51,299 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-01-30 14:56:51,302 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-01-30 14:56:51,305 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-01-30 14:56:51,307 : INFO : EPOCH - 2 : training on 41519355 raw words (30349963 effective words) took 85.2s, 356218 effective words/s
2019-01-30 14:56:52,334 : INFO : EPOCH 3 - PROGRESS: at 1.01% examples, 329014 words/s, in_qsize 20, out_qsize 0
2019-01-30 14:56:53,353 : INFO : EPOCH 3 - PROGRESS: at 2.26% examples, 352175 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:56:54,416 : INFO : EPOCH 3 - PROGRESS: at 3.60% examples, 361563 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:56:55,442 : INFO : EPOCH 3 - PROGRESS: at 4.88% examples, 364612 words/s, in_qsize 18, out_qsize 1
2019-01-30 14:56:56,455 : INFO : EPOCH 3 - PROGRESS: at 6.10% examples, 366815 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:56:57,46

2019-01-30 14:58:03,104 : INFO : EPOCH 3 - PROGRESS: at 84.57% examples, 360920 words/s, in_qsize 18, out_qsize 1
2019-01-30 14:58:04,117 : INFO : EPOCH 3 - PROGRESS: at 85.71% examples, 360647 words/s, in_qsize 20, out_qsize 0
2019-01-30 14:58:05,122 : INFO : EPOCH 3 - PROGRESS: at 87.08% examples, 360908 words/s, in_qsize 20, out_qsize 0
2019-01-30 14:58:06,137 : INFO : EPOCH 3 - PROGRESS: at 88.49% examples, 361215 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:58:07,194 : INFO : EPOCH 3 - PROGRESS: at 89.74% examples, 360953 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:58:08,207 : INFO : EPOCH 3 - PROGRESS: at 91.10% examples, 361083 words/s, in_qsize 20, out_qsize 0
2019-01-30 14:58:09,240 : INFO : EPOCH 3 - PROGRESS: at 92.34% examples, 360833 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:58:10,254 : INFO : EPOCH 3 - PROGRESS: at 93.32% examples, 360216 words/s, in_qsize 20, out_qsize 0
2019-01-30 14:58:11,297 : INFO : EPOCH 3 - PROGRESS: at 94.55% examples, 359947 words/s,

2019-01-30 14:59:07,645 : INFO : EPOCH 4 - PROGRESS: at 59.23% examples, 351102 words/s, in_qsize 18, out_qsize 1
2019-01-30 14:59:08,646 : INFO : EPOCH 4 - PROGRESS: at 60.42% examples, 351046 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:59:09,682 : INFO : EPOCH 4 - PROGRESS: at 61.66% examples, 351021 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:59:10,775 : INFO : EPOCH 4 - PROGRESS: at 62.84% examples, 350217 words/s, in_qsize 20, out_qsize 0
2019-01-30 14:59:11,795 : INFO : EPOCH 4 - PROGRESS: at 64.10% examples, 349516 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:59:12,866 : INFO : EPOCH 4 - PROGRESS: at 65.26% examples, 348763 words/s, in_qsize 19, out_qsize 0
2019-01-30 14:59:13,924 : INFO : EPOCH 4 - PROGRESS: at 66.31% examples, 348095 words/s, in_qsize 20, out_qsize 0
2019-01-30 14:59:14,923 : INFO : EPOCH 4 - PROGRESS: at 67.23% examples, 346819 words/s, in_qsize 20, out_qsize 0
2019-01-30 14:59:15,993 : INFO : EPOCH 4 - PROGRESS: at 68.04% examples, 344610 words/s,

2019-01-30 15:00:12,785 : INFO : EPOCH 5 - PROGRESS: at 35.57% examples, 375811 words/s, in_qsize 17, out_qsize 2
2019-01-30 15:00:13,809 : INFO : EPOCH 5 - PROGRESS: at 36.96% examples, 376181 words/s, in_qsize 20, out_qsize 1
2019-01-30 15:00:14,837 : INFO : EPOCH 5 - PROGRESS: at 38.28% examples, 375770 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:00:15,845 : INFO : EPOCH 5 - PROGRESS: at 39.62% examples, 375883 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:00:16,951 : INFO : EPOCH 5 - PROGRESS: at 41.11% examples, 375327 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:00:17,973 : INFO : EPOCH 5 - PROGRESS: at 42.58% examples, 376063 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:00:18,978 : INFO : EPOCH 5 - PROGRESS: at 43.96% examples, 376005 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:00:20,015 : INFO : EPOCH 5 - PROGRESS: at 45.35% examples, 375588 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:00:21,079 : INFO : EPOCH 5 - PROGRESS: at 46.46% examples, 373669 words/s,

2019-01-30 15:01:14,788 : INFO : EPOCH 1 - PROGRESS: at 10.58% examples, 367566 words/s, in_qsize 15, out_qsize 4
2019-01-30 15:01:15,805 : INFO : EPOCH 1 - PROGRESS: at 11.60% examples, 368662 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:01:16,836 : INFO : EPOCH 1 - PROGRESS: at 12.59% examples, 369311 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:01:17,869 : INFO : EPOCH 1 - PROGRESS: at 13.83% examples, 371656 words/s, in_qsize 17, out_qsize 2
2019-01-30 15:01:18,871 : INFO : EPOCH 1 - PROGRESS: at 14.89% examples, 371292 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:01:19,886 : INFO : EPOCH 1 - PROGRESS: at 16.09% examples, 373315 words/s, in_qsize 17, out_qsize 2
2019-01-30 15:01:20,881 : INFO : EPOCH 1 - PROGRESS: at 17.14% examples, 374261 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:01:21,895 : INFO : EPOCH 1 - PROGRESS: at 18.12% examples, 373184 words/s, in_qsize 20, out_qsize 1
2019-01-30 15:01:22,932 : INFO : EPOCH 1 - PROGRESS: at 19.21% examples, 374359 words/s,

2019-01-30 15:02:28,433 : INFO : EPOCH 1 - PROGRESS: at 99.70% examples, 365256 words/s, in_qsize 12, out_qsize 0
2019-01-30 15:02:28,603 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-01-30 15:02:28,606 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-01-30 15:02:28,611 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-01-30 15:02:28,617 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-01-30 15:02:28,626 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-01-30 15:02:28,629 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-01-30 15:02:28,641 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-01-30 15:02:28,645 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-01-30 15:02:28,647 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-01-30 15:02:28,662 : INFO : worker thread finished; awaiting f

2019-01-30 15:03:33,255 : INFO : EPOCH 2 - PROGRESS: at 74.19% examples, 351093 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:03:34,262 : INFO : EPOCH 2 - PROGRESS: at 75.13% examples, 350186 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:03:35,268 : INFO : EPOCH 2 - PROGRESS: at 76.24% examples, 350312 words/s, in_qsize 18, out_qsize 1
2019-01-30 15:03:36,371 : INFO : EPOCH 2 - PROGRESS: at 77.33% examples, 349710 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:03:37,373 : INFO : EPOCH 2 - PROGRESS: at 78.45% examples, 349736 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:03:38,414 : INFO : EPOCH 2 - PROGRESS: at 79.63% examples, 349670 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:03:39,451 : INFO : EPOCH 2 - PROGRESS: at 80.78% examples, 349643 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:03:40,491 : INFO : EPOCH 2 - PROGRESS: at 81.99% examples, 349594 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:03:41,498 : INFO : EPOCH 2 - PROGRESS: at 83.26% examples, 349913 words/s,

2019-01-30 15:04:37,962 : INFO : EPOCH 3 - PROGRESS: at 50.56% examples, 363856 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:04:39,018 : INFO : EPOCH 3 - PROGRESS: at 51.93% examples, 364465 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:04:40,020 : INFO : EPOCH 3 - PROGRESS: at 53.00% examples, 364153 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:04:41,094 : INFO : EPOCH 3 - PROGRESS: at 54.37% examples, 364476 words/s, in_qsize 17, out_qsize 2
2019-01-30 15:04:42,142 : INFO : EPOCH 3 - PROGRESS: at 55.87% examples, 364960 words/s, in_qsize 18, out_qsize 1
2019-01-30 15:04:43,143 : INFO : EPOCH 3 - PROGRESS: at 57.27% examples, 365776 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:04:44,164 : INFO : EPOCH 3 - PROGRESS: at 58.52% examples, 365804 words/s, in_qsize 18, out_qsize 1
2019-01-30 15:04:45,189 : INFO : EPOCH 3 - PROGRESS: at 59.79% examples, 365557 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:04:46,189 : INFO : EPOCH 3 - PROGRESS: at 61.24% examples, 366629 words/s,

2019-01-30 15:05:43,568 : INFO : EPOCH 4 - PROGRESS: at 30.09% examples, 371689 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:05:44,621 : INFO : EPOCH 4 - PROGRESS: at 31.54% examples, 371124 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:05:45,665 : INFO : EPOCH 4 - PROGRESS: at 32.90% examples, 371176 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:05:46,693 : INFO : EPOCH 4 - PROGRESS: at 34.19% examples, 371480 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:05:47,740 : INFO : EPOCH 4 - PROGRESS: at 35.48% examples, 371052 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:05:48,740 : INFO : EPOCH 4 - PROGRESS: at 36.97% examples, 372360 words/s, in_qsize 18, out_qsize 1
2019-01-30 15:05:49,783 : INFO : EPOCH 4 - PROGRESS: at 38.32% examples, 372275 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:05:50,788 : INFO : EPOCH 4 - PROGRESS: at 39.57% examples, 371903 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:05:51,802 : INFO : EPOCH 4 - PROGRESS: at 40.92% examples, 371840 words/s,

2019-01-30 15:06:48,647 : INFO : EPOCH 5 - PROGRESS: at 11.76% examples, 372176 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:06:49,663 : INFO : EPOCH 5 - PROGRESS: at 12.72% examples, 370491 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:06:50,674 : INFO : EPOCH 5 - PROGRESS: at 13.98% examples, 372742 words/s, in_qsize 18, out_qsize 2
2019-01-30 15:06:51,711 : INFO : EPOCH 5 - PROGRESS: at 15.09% examples, 372920 words/s, in_qsize 18, out_qsize 1
2019-01-30 15:06:52,721 : INFO : EPOCH 5 - PROGRESS: at 16.24% examples, 373871 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:06:53,746 : INFO : EPOCH 5 - PROGRESS: at 17.30% examples, 374578 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:06:54,778 : INFO : EPOCH 5 - PROGRESS: at 18.26% examples, 372624 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:06:55,787 : INFO : EPOCH 5 - PROGRESS: at 19.31% examples, 374039 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:06:56,792 : INFO : EPOCH 5 - PROGRESS: at 20.26% examples, 373847 words/s,

2019-01-30 15:07:59,425 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-01-30 15:07:59,437 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-01-30 15:07:59,449 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-01-30 15:07:59,453 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-01-30 15:07:59,463 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-01-30 15:07:59,482 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-01-30 15:07:59,488 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-01-30 15:07:59,490 : INFO : EPOCH - 5 : training on 41519355 raw words (30347607 effective words) took 81.2s, 373762 effective words/s
2019-01-30 15:08:00,585 : INFO : EPOCH 6 - PROGRESS: at 1.02% examples, 310115 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:08:01,609 : INFO : EPOCH 6 - PROGRESS: at 2.33% examples, 350894 words/s, in_qsize 16, out_qsize

2019-01-30 15:09:08,311 : INFO : EPOCH 6 - PROGRESS: at 82.04% examples, 365105 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:09:09,319 : INFO : EPOCH 6 - PROGRESS: at 83.14% examples, 364485 words/s, in_qsize 18, out_qsize 1
2019-01-30 15:09:10,345 : INFO : EPOCH 6 - PROGRESS: at 84.28% examples, 364276 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:09:11,367 : INFO : EPOCH 6 - PROGRESS: at 85.48% examples, 364377 words/s, in_qsize 19, out_qsize 3
2019-01-30 15:09:12,418 : INFO : EPOCH 6 - PROGRESS: at 86.76% examples, 364076 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:09:13,424 : INFO : EPOCH 6 - PROGRESS: at 88.03% examples, 363806 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:09:14,427 : INFO : EPOCH 6 - PROGRESS: at 89.18% examples, 363286 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:09:15,431 : INFO : EPOCH 6 - PROGRESS: at 90.30% examples, 362672 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:09:16,447 : INFO : EPOCH 6 - PROGRESS: at 91.25% examples, 361443 words/s,

2019-01-30 15:10:13,262 : INFO : EPOCH 7 - PROGRESS: at 56.14% examples, 351785 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:10:14,293 : INFO : EPOCH 7 - PROGRESS: at 57.49% examples, 352341 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:10:15,307 : INFO : EPOCH 7 - PROGRESS: at 58.76% examples, 352684 words/s, in_qsize 18, out_qsize 1
2019-01-30 15:10:16,321 : INFO : EPOCH 7 - PROGRESS: at 60.08% examples, 353186 words/s, in_qsize 18, out_qsize 1
2019-01-30 15:10:17,355 : INFO : EPOCH 7 - PROGRESS: at 61.35% examples, 353272 words/s, in_qsize 16, out_qsize 3
2019-01-30 15:10:18,401 : INFO : EPOCH 7 - PROGRESS: at 62.52% examples, 352732 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:10:19,462 : INFO : EPOCH 7 - PROGRESS: at 63.96% examples, 352756 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:10:20,463 : INFO : EPOCH 7 - PROGRESS: at 65.32% examples, 353376 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:10:21,473 : INFO : EPOCH 7 - PROGRESS: at 66.59% examples, 354020 words/s,

2019-01-30 15:11:18,914 : INFO : EPOCH 8 - PROGRESS: at 35.37% examples, 370675 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:11:19,972 : INFO : EPOCH 8 - PROGRESS: at 36.82% examples, 370974 words/s, in_qsize 19, out_qsize 2
2019-01-30 15:11:21,020 : INFO : EPOCH 8 - PROGRESS: at 38.24% examples, 371440 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:11:22,064 : INFO : EPOCH 8 - PROGRESS: at 39.52% examples, 370773 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:11:23,108 : INFO : EPOCH 8 - PROGRESS: at 40.86% examples, 370424 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:11:24,114 : INFO : EPOCH 8 - PROGRESS: at 42.35% examples, 371055 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:11:25,126 : INFO : EPOCH 8 - PROGRESS: at 43.70% examples, 371255 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:11:26,126 : INFO : EPOCH 8 - PROGRESS: at 45.05% examples, 370760 words/s, in_qsize 20, out_qsize 1
2019-01-30 15:11:27,136 : INFO : EPOCH 8 - PROGRESS: at 46.40% examples, 371148 words/s,

2019-01-30 15:12:23,550 : INFO : EPOCH 9 - PROGRESS: at 15.89% examples, 365283 words/s, in_qsize 17, out_qsize 2
2019-01-30 15:12:24,551 : INFO : EPOCH 9 - PROGRESS: at 16.96% examples, 366304 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:12:25,579 : INFO : EPOCH 9 - PROGRESS: at 17.92% examples, 365809 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:12:26,606 : INFO : EPOCH 9 - PROGRESS: at 18.98% examples, 366435 words/s, in_qsize 16, out_qsize 3
2019-01-30 15:12:27,627 : INFO : EPOCH 9 - PROGRESS: at 19.90% examples, 365941 words/s, in_qsize 15, out_qsize 4
2019-01-30 15:12:28,643 : INFO : EPOCH 9 - PROGRESS: at 20.84% examples, 366039 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:12:29,657 : INFO : EPOCH 9 - PROGRESS: at 22.11% examples, 365674 words/s, in_qsize 17, out_qsize 3
2019-01-30 15:12:30,661 : INFO : EPOCH 9 - PROGRESS: at 23.10% examples, 366691 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:12:31,700 : INFO : EPOCH 9 - PROGRESS: at 24.11% examples, 367320 words/s,

2019-01-30 15:13:30,722 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-01-30 15:13:30,729 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-01-30 15:13:30,731 : INFO : EPOCH - 9 : training on 41519355 raw words (30349756 effective words) took 81.6s, 371861 effective words/s
2019-01-30 15:13:31,799 : INFO : EPOCH 10 - PROGRESS: at 1.00% examples, 311436 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:13:32,815 : INFO : EPOCH 10 - PROGRESS: at 2.33% examples, 356871 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:13:33,817 : INFO : EPOCH 10 - PROGRESS: at 3.54% examples, 357878 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:13:34,854 : INFO : EPOCH 10 - PROGRESS: at 4.88% examples, 366241 words/s, in_qsize 18, out_qsize 1
2019-01-30 15:13:35,878 : INFO : EPOCH 10 - PROGRESS: at 6.08% examples, 365729 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:13:36,881 : INFO : EPOCH 10 - PROGRESS: at 7.25% examples, 365757 words/s, in_qsize 20, out_qs

2019-01-30 15:14:42,953 : INFO : EPOCH 10 - PROGRESS: at 85.40% examples, 362374 words/s, in_qsize 20, out_qsize 1
2019-01-30 15:14:43,980 : INFO : EPOCH 10 - PROGRESS: at 86.85% examples, 362795 words/s, in_qsize 17, out_qsize 2
2019-01-30 15:14:44,990 : INFO : EPOCH 10 - PROGRESS: at 88.12% examples, 362582 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:14:46,087 : INFO : EPOCH 10 - PROGRESS: at 89.47% examples, 362444 words/s, in_qsize 18, out_qsize 1
2019-01-30 15:14:47,123 : INFO : EPOCH 10 - PROGRESS: at 90.93% examples, 362926 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:14:48,144 : INFO : EPOCH 10 - PROGRESS: at 92.29% examples, 363083 words/s, in_qsize 19, out_qsize 0
2019-01-30 15:14:49,166 : INFO : EPOCH 10 - PROGRESS: at 93.52% examples, 363215 words/s, in_qsize 20, out_qsize 0
2019-01-30 15:14:50,181 : INFO : EPOCH 10 - PROGRESS: at 94.78% examples, 363218 words/s, in_qsize 18, out_qsize 1
2019-01-30 15:14:51,220 : INFO : EPOCH 10 - PROGRESS: at 96.23% examples, 363786

(303495109, 415193550)

In [12]:
w1 = "dirty"
model.wv.most_similar (positive=w1)

2019-01-30 15:15:16,564 : INFO : precomputing L2-norms of word weight vectors


[('filthy', 0.8679323196411133),
 ('unclean', 0.7863996028900146),
 ('stained', 0.77724289894104),
 ('smelly', 0.7596755027770996),
 ('grubby', 0.7581830024719238),
 ('dusty', 0.74628746509552),
 ('soiled', 0.7328138947486877),
 ('grimy', 0.7261374592781067),
 ('dingy', 0.7171832919120789),
 ('disgusting', 0.7156532406806946)]

In [13]:
w1 = ["polite"]
model.wv.most_similar (positive=w1,topn=6)

[('courteous', 0.9196229577064514),
 ('friendly', 0.8268195986747742),
 ('cordial', 0.8100727796554565),
 ('professional', 0.7872114777565002),
 ('curteous', 0.7856423258781433),
 ('attentive', 0.7775445580482483)]

In [14]:
w1 = ["france"]
model.wv.most_similar (positive=w1,topn=6)

[('germany', 0.6699728965759277),
 ('canada', 0.6626535654067993),
 ('spain', 0.6104305982589722),
 ('gaulle', 0.6013795733451843),
 ('hawaii', 0.5869171023368835),
 ('rome', 0.581384539604187)]

In [15]:
w1 = ["shocked"]
model.wv.most_similar (positive=w1,topn=6)

[('amazed', 0.8019899725914001),
 ('horrified', 0.7977362275123596),
 ('astonished', 0.7897680401802063),
 ('stunned', 0.7676576375961304),
 ('appalled', 0.7590023279190063),
 ('dismayed', 0.7497164011001587)]

In [16]:
w1 = ["bed",'sheet','pillow']
w2 = ['couch']
model.wv.most_similar(positive=w1, negative=w2, topn=10)

[('duvet', 0.7118744254112244),
 ('blanket', 0.706264853477478),
 ('mattress', 0.6904749870300293),
 ('quilt', 0.6716538071632385),
 ('matress', 0.6661882400512695),
 ('pillowcase', 0.6639519333839417),
 ('pillows', 0.6417618989944458),
 ('sheets', 0.6374083757400513),
 ('foam', 0.6327905654907227),
 ('pillowcases', 0.6133320331573486)]

In [17]:
model.wv.similarity(w1='dirty',w2='smelly')

0.7596755611888756

In [18]:
model.wv.similarity(w1="dirty",w2="dirty")

1.0

In [19]:
model.wv.similarity(w1="dirty",w2="clean")

0.2731449163130837

In [20]:
model.wv.doesnt_match(["cat","dog","france"])

  vectors = vstack(self.word_vec(word, use_norm=True) for word in used_words).astype(REAL)


'france'

In [21]:
model.wv.doesnt_match(["bed","pillow","duvet","shower"])

  vectors = vstack(self.word_vec(word, use_norm=True) for word in used_words).astype(REAL)


'shower'