diff --git a/demo.py b/demo.py index 5ab4812..2d6ce60 100755 --- a/demo.py +++ b/demo.py @@ -137,6 +137,13 @@ def test_nearby(self): def test_badcase_1(self): synonyms.display("人脸") # synonyms.display calls synonyms.nearby + + def test_basecase_2(self): + print("test_basecase_2") + sen1 = "今天天气" + sen2 = "今天天气怎么样" + r = synonyms.compare(sen1, sen2, seg=True) + def test(): unittest.main() diff --git a/setup.py b/setup.py index ca834f0..ba3a834 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ setup( name='synonyms', - version='3.10.0', + version='3.10.2', description='Chinese Synonyms for Natural Language Processing and Understanding', long_description=LONGDOC, author='Hai Liang Wang, Hu Ying Xi', @@ -40,9 +40,9 @@ install_requires=[ 'six>=1.11.0', 'numpy>=1.13.1', - 'scipy==1.0.0', - 'scikit-learn==0.19.1', - 'absl-py==0.1.10' + 'scipy>=1.0.0', + 'scikit-learn>=0.19.1', + 'absl-py>=0.4' ], package_data={ 'synonyms': [ diff --git a/synonyms/synonyms.py b/synonyms/synonyms.py index 4a217d5..d90d897 100755 --- a/synonyms/synonyms.py +++ b/synonyms/synonyms.py @@ -247,8 +247,10 @@ def _nearby_levenshtein_distance(s1, s2): scores = [] for x in second: - scores.append(max([_levenshtein_distance(x, y) for y in ft])) - s = np.sum(scores) / maxlen + choices = [_levenshtein_distance(x, y) for y in ft] + if len(choices) > 0: scores.append(max(choices)) + + s = np.sum(scores) / maxlen if len(scores) > 0 else 0 return s def _similarity_distance(s1, s2, ignore): @@ -319,8 +321,8 @@ def compare(s1, s2, seg=True, ignore=False, stopwords=False): s2_words = [] if seg: - s1 = [x for x in jieba.cut(s1)] - s2 = [x for x in jieba.cut(s2)] + s1 = [x for x in jieba.cut(s1, cut_all=False, HMM=False)] + s2 = [x for x in jieba.cut(s2, cut_all=False, HMM=False)] else: s1 = s1.split() s2 = s2.split()