In [1]:
import gensim



In [20]:
gensim.__version__

'4.0.1'

In [10]:
import time
import multiprocessing
from datetime import timedelta

from gensim.models import word2vec
from gensim.test.utils import get_tmpfile

from gensim.models.callbacks import CallbackAny2Vec
class EpochSaver(CallbackAny2Vec):
      
    
    def __init__(self, path_prefix):
        self.path_prefix = path_prefix
        self.epoch = 0
        self.start_time = 0
        self.finish_time = 0
        
    def on_epoch_begin(self, model):
        self.start_time = time.time()
        print("Epoch #{} start".format(self.epoch))


    def on_epoch_end(self, model):
        print("Epoch #{} end".format(self.epoch))
        self.finish_time = time.time()
        print('Elapsed time Epoch #{}: {}'.format(self.epoch, timedelta(seconds=self.finish_time-self.start_time)))
        output_path = get_tmpfile('{}_epoch{}.model'.format(self.path_prefix, self.epoch))
        model.save(output_path)
        self.epoch += 1

In [11]:
start_time = time.time()
print('Creating Corpus...')
sentences = word2vec.LineSentence('idwiki.txt')
callback = EpochSaver("model_every_epoch")

print('Start Training Word2Vec Model...')
id_w2v = word2vec.Word2Vec(sentences, vector_size=300, workers=multiprocessing.cpu_count()-1, epochs=10, callbacks=[callback])
id_w2v.save('idwiki_word2vec_300_last.model')
finish_time = time.time()

print('Finished. Elapsed time: {}'.format(timedelta(seconds=finish_time-start_time)))

Creating Corpus...
Start Training Word2Vec Model...
Epoch #0 start
Epoch #0 end
Elapsed time Epoch #0: 0:03:17.755113
Epoch #1 start
Epoch #1 end
Elapsed time Epoch #1: 0:03:22.832046
Epoch #2 start
Epoch #2 end
Elapsed time Epoch #2: 0:03:18.794456
Epoch #3 start
Epoch #3 end
Elapsed time Epoch #3: 0:03:20.516340
Epoch #4 start
Epoch #4 end
Elapsed time Epoch #4: 0:03:22.523046
Epoch #5 start
Epoch #5 end
Elapsed time Epoch #5: 0:03:21.847453
Epoch #6 start
Epoch #6 end
Elapsed time Epoch #6: 0:03:18.540611
Epoch #7 start
Epoch #7 end
Elapsed time Epoch #7: 0:03:17.860952
Epoch #8 start
Epoch #8 end
Elapsed time Epoch #8: 0:03:19.873374
Epoch #9 start
Epoch #9 end
Elapsed time Epoch #9: 0:03:21.977705
Finished. Elapsed time: 0:34:47.428674


In [15]:
import os

In [18]:
class EpochSaver(CallbackAny2Vec):
      
    
    def __init__(self, path_prefix):
        self.path_prefix = path_prefix
        self.epoch = 0
        self.start_time = 0
        self.finish_time = 0
        
    def on_epoch_begin(self, model):
        self.start_time = time.time()
        print("Epoch #{} start".format(self.epoch))


    def on_epoch_end(self, model):
        print("Epoch #{} end".format(self.epoch))
        self.finish_time = time.time()
        print('Elapsed time Epoch #{}: {}'.format(self.epoch, timedelta(seconds=self.finish_time-self.start_time)))
        output_path = os.path.join(self.path_prefix, "idwiki_word2vec_300_last_sq_epoch{}.model".format(self.epoch))
        model.save(output_path)
        self.epoch += 1

In [19]:
start_time = time.time()
print('Creating Corpus...')
sentences = word2vec.LineSentence('idwiki.txt')
callback = EpochSaver("skip-gram")

print('Start Training Word2Vec Model...')
id_w2v = word2vec.Word2Vec(sentences, vector_size=300, workers=multiprocessing.cpu_count()-1, epochs=10, callbacks=[callback], sg=1)
id_w2v.save('idwiki_word2vec_300_last_sq.model')
finish_time = time.time()

print('Finished. Elapsed time: {}'.format(timedelta(seconds=finish_time-start_time)))

Creating Corpus...
Start Training Word2Vec Model...
Epoch #0 start
Epoch #0 end
Elapsed time Epoch #0: 0:07:39.445532
Epoch #1 start
Epoch #1 end
Elapsed time Epoch #1: 0:07:44.569143
Epoch #2 start
Epoch #2 end
Elapsed time Epoch #2: 0:07:54.668850
Epoch #3 start
Epoch #3 end
Elapsed time Epoch #3: 0:08:04.196679
Epoch #4 start
Epoch #4 end
Elapsed time Epoch #4: 0:07:50.081711
Epoch #5 start
Epoch #5 end
Elapsed time Epoch #5: 0:07:40.845050
Epoch #6 start
Epoch #6 end
Elapsed time Epoch #6: 0:07:48.999962
Epoch #7 start
Epoch #7 end
Elapsed time Epoch #7: 0:07:43.239083
Epoch #8 start
Epoch #8 end
Elapsed time Epoch #8: 0:07:44.751890
Epoch #9 start
Epoch #9 end
Elapsed time Epoch #9: 0:07:53.042851
Finished. Elapsed time: 1:19:56.092950
