Skip to content

Commit

Permalink
change freqs.json data format, fixes #5
Browse files Browse the repository at this point in the history
  • Loading branch information
henningpeters committed Mar 6, 2016
1 parent 12d4e90 commit 804373c
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 5 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ spacy>=0.100,<0.101
preshed>=0.46,<0.47
murmurhash>=0.26,<0.27
cymem>=1.30,<1.31
sputnik>=0.9.0,<0.10.0
2 changes: 1 addition & 1 deletion sense2vec/about.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
__email__ = 'matt@spacy.io'
__license__ = 'MIT'
__release__ = False
__default_model__ = 'reddit_vectors'
__default_model__ = 'reddit_vectors>=1.1.0,<1.2.0'
11 changes: 8 additions & 3 deletions sense2vec/vectors.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,12 @@ cdef class VectorMap:
self.data.save(path.join(data_dir, 'vectors.bin'))
freqs = []
cdef uint64_t hashed
for hashed, freq in self.freqs.items():
freqs.append([hashed, freq])
for string in self.strings:
hashed = hash_string(string)
freq = self.freqs[hashed]
if not freq:
continue
freqs.append([string, freq])
with open(path.join(data_dir, 'freqs.json'), 'w') as file_:
json.dump(freqs, file_)

Expand All @@ -101,7 +105,8 @@ cdef class VectorMap:
with open(path.join(data_dir, 'freqs.json')) as file_:
freqs = json.load(file_)
cdef uint64_t hashed
for hashed, freq in freqs:
for string, freq in freqs:
hashed = hash_string(string)
self.freqs[hashed] = freq


Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,8 @@ def setup_package():
'spacy>=0.100,<0.101',
'preshed>=0.46,<0.47',
'murmurhash>=0.26,<0.27',
'cymem>=1.30,<1.31'],
'cymem>=1.30,<1.31',
'sputnik>=0.9.0,<0.10.0'],
cmdclass = {
'build_ext': build_ext_subclass},
)
Expand Down

0 comments on commit 804373c

Please sign in to comment.