-
Notifications
You must be signed in to change notification settings - Fork 1
/
distance.py
73 lines (67 loc) · 1.88 KB
/
distance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python
#coding:gbk
import re
def ditstance(source, target):
""" ditstance(source, target)->int return the levenshtein ditstance
>>> ditstance("126.com", "127.com")
1
>>> ditstance("127.com", "126.com")
1
>>> ditstance("hotmail.com", "hotmail.com")
0
>>> ditstance("", "")
0
>>> ditstance("sina.com", "")
8
>>> ditstance("", "gmail.com")
9
>>> ditstance("sina.com", "sina.cn")
2
>>> ditstance("qq.cn", "qq.com")
2
>>> ditstance("139.com", ".139.com")
1
>>> ditstance("qq.", "qq.com")
3
>>> ditstance("gmail.com", ".gmail.com")
1
>>> ditstance(".gmail.com", "gmail.com")
1
"""
src_length = len(source)+1
tgt_length = len(target)+1
if src_length == 1:
return tgt_length - 1
if tgt_length == 1:
return src_length - 1
matrix = [range(tgt_length)]
for i in range(1, src_length):
row = [0]*tgt_length
row[0] = i
matrix.append(row)
for i in range(1, src_length):
src_char = source[i-1]
for j in range(1, tgt_length):
tgt_char = target[j-1]
cost = 0 if src_char == tgt_char else 1
above = matrix[i-1][j]+1
left = matrix[i][j-1]+1
diag = matrix[i-1][j-1]+cost
value = min(above, left, diag)
matrix[i][j]=value
#return matrix[src_length-1][tgt_length-1]
return 1-float(matrix[src_length-1][tgt_length-1])/(max(tgt_length,src_length)-1)
if __name__=="__main__":
#import doctest
#doctest.testmod()
#print ditstance("qq", "qaq")
xq = open('C:\\Users\\suchao\\Desktop\\xq.txt').readlines()
dic = {}
for i in xq:
dic[i[:-1]] = ditstance("¹û԰СÇø", i[:-1])
tmp = 0
for r in sorted(dic.iteritems(),key = lambda d:d[1],reverse = True):
print r[0],r[1]
if tmp==10:
break
tmp = tmp + 1