You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I have gotten W2V with the package to work with the Gutenberg corpus, but I am having trouble getting other corpora to work. I have downloaded the corpora as per your instructions.
The Wikipedia-corpus simply keeps loading when I do w2v = Word2Vec().
The LCC-corpus is not able to find the file word2vec.pkl.gz. It seems to be related to this issue: #6. In my dasem_data/lcc folder I only have a dan-dk_web_2014_10K.tar.gz-file, so I am wondering if it is simply a naming issue?
Here is my full error:
/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/smart_open/smart_open_lib.py:398: UserWarning: This function is deprecated, use smart_open.open instead. See the migration notes for details: https://github.com/RaRe-Technologies/smart_open/blob/master/README.rst#migrating-to-the-new-open-function
'See the migration notes for details: %s' % _MIGRATION_NOTES_URL
Traceback (most recent call last):
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/dasem/models.py", line 421, in __init__
self.load()
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/dasem/models.py", line 460, in load
self.model = gensim.models.Word2Vec.load(full_filename)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/gensim/models/word2vec.py", line 1330, in load
model = super(Word2Vec, cls).load(*args, **kwargs)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/gensim/models/base_any2vec.py", line 1244, in load
model = super(BaseWordEmbeddingsModel, cls).load(*args, **kwargs)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/gensim/models/base_any2vec.py", line 603, in load
return super(BaseAny2VecModel, cls).load(fname_or_handle, **kwargs)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/gensim/utils.py", line 426, in load
obj = unpickle(fname)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/gensim/utils.py", line 1381, in unpickle
with smart_open(fname, 'rb') as f:
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/smart_open/smart_open_lib.py", line 453, in smart_open
return open(uri, mode, ignore_ext=ignore_extension, transport_params=transport_params, **scrubbed_kwargs)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/smart_open/smart_open_lib.py", line 348, in open
binary, filename = _open_binary_stream(uri, binary_mode, transport_params)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/smart_open/smart_open_lib.py", line 544, in _open_binary_stream
fobj = io.open(parsed_uri.uri_path, mode)
FileNotFoundError: [Errno 2] No such file or directory: '/home/bertil/dasem_data/lcc/word2vec.pkl.gz'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/tarfile.py", line 1645, in gzopen
t = cls.taropen(name, mode, fileobj, **kwargs)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/tarfile.py", line 1621, in taropen
return cls(name, mode, fileobj, **kwargs)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/tarfile.py", line 1484, in __init__
self.firstmember = self.next()
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/tarfile.py", line 2289, in next
tarinfo = self.tarinfo.fromtarfile(self)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/tarfile.py", line 1094, in fromtarfile
buf = tarfile.fileobj.read(BLOCKSIZE)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/gzip.py", line 276, in read
return self._buffer.read(size)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/_compression.py", line 68, in readinto
data = self.read(len(byte_view))
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/gzip.py", line 463, in read
if not self._read_gzip_header():
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/gzip.py", line 411, in _read_gzip_header
raise OSError('Not a gzipped file (%r)' % magic)
OSError: Not a gzipped file (b'<!')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "test.py", line 3, in <module>
Word2Vec()
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/dasem/models.py", line 425, in __init__
self.train()
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/dasem/models.py", line 527, in train
workers=workers)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/gensim/models/word2vec.py", line 783, in __init__
fast_version=FAST_VERSION)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/gensim/models/base_any2vec.py", line 759, in __init__
self.build_vocab(sentences=sentences, corpus_file=corpus_file, trim_rule=trim_rule)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/gensim/models/base_any2vec.py", line 936, in build_vocab
sentences=sentences, corpus_file=corpus_file, progress_per=progress_per, trim_rule=trim_rule)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/gensim/models/word2vec.py", line 1591, in scan_vocab
total_words, corpus_count = self._scan_vocab(sentences, progress_per, trim_rule)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/gensim/models/word2vec.py", line 1560, in _scan_vocab
for sentence_no, sentence in enumerate(sentences):
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/dasem/lcc.py", line 255, in iter_sentence_words
for word_list in lcc_file.iter_sentence_words():
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/dasem/lcc.py", line 141, in iter_sentence_words
for n, sentence in enumerate(self.iter_sentences()):
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/site-packages/dasem/lcc.py", line 112, in iter_sentences
with tarfile.open(self.filename, "r:gz") as tar:
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/tarfile.py", line 1591, in open
return func(name, filemode, fileobj, **kwargs)
File "/home/bertil/anaconda3/envs/word2vec_tool/lib/python3.7/tarfile.py", line 1649, in gzopen
raise ReadError("not a gzip file")
tarfile.ReadError: not a gzip file
The text was updated successfully, but these errors were encountered:
I have gotten W2V with the package to work with the Gutenberg corpus, but I am having trouble getting other corpora to work. I have downloaded the corpora as per your instructions.
The Wikipedia-corpus simply keeps loading when I do
w2v = Word2Vec()
.The LCC-corpus is not able to find the file
word2vec.pkl.gz
. It seems to be related to this issue: #6. In mydasem_data/lcc
folder I only have adan-dk_web_2014_10K.tar.gz
-file, so I am wondering if it is simply a naming issue?Here is my full error:
The text was updated successfully, but these errors were encountered: