Skip to content
Browse files

improvement:

  • Loading branch information...
1 parent d8bb568 commit e4e0136d1149fc8ba3f6883141ce599fb2629619 @fxsjy fxsjy committed
Showing with 11 additions and 11 deletions.
  1. +8 −10 snailseg/__init__.py
  2. +3 −1 snailseg/dict.txt
View
18 snailseg/__init__.py
@@ -17,11 +17,11 @@ def get_model(file_dict):
prob_v[length] += 1
word_all.append(word)
for i,char in enumerate(word):
- if not (char in prob_cv):
- prob_cv[char] = {}
- if not (i in prob_cv[char]):
- prob_cv[char][i] = 0
- prob_cv[char][i] += freq
+ if not (i in prob_cv):
+ prob_cv[i] = {}
+ if not (char in prob_cv[i]):
+ prob_cv[i][char] = 0
+ prob_cv[i][char] += freq
total_freq = sum(prob_v.values())
@@ -67,10 +67,8 @@ def __cut(sentence):
while j< length:
char = sentence[j]
- if not (char in prob_cv):
- prob_cv[char]={}
- p_1 = prob_v[j-i] * prob_cv[char].get(0,0)# probility of seg
- p_2 = prob_v[j-i+1] * prob_cv[char].get(j-i,0)#probility of not seg
+ p_1 = prob_v[j-i] * prob_cv[0].get(char,0)# probility of seg
+ p_2 = prob_v[j-i+1] * prob_cv[j-i].get(char,0)#probility of not seg
part = sentence[i:j]
if (p_1 > p_2) and in_dict(part):
result.append(part)
@@ -99,4 +97,4 @@ def cut(sentence):
else:
tmp = re.split(ur"[^a-zA-Z0-9+#]",blk)
result.extend([x for x in tmp if x.strip()!=""])
- return result
+ return result
View
4 snailseg/dict.txt
@@ -298032,4 +298032,6 @@
千 1511
工信部 281
工信处 280
-草泥马 100
+草泥马 100
+说的 250
+长春 200

0 comments on commit e4e0136

Please sign in to comment.
Something went wrong with that request. Please try again.