-
Notifications
You must be signed in to change notification settings - Fork 0
/
find.py
128 lines (127 loc) · 2.88 KB
/
find.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import csv
import re
noUsewords=['了','《','》','‘','’','“','”',',','?','。','(',')']
def refine(paras, keywords):
line = ""
penalty = 1
sign = "[。,!?]"
regions = []
for p in paras:
sentences = re.split(sign, p)
if sentences[-1] == "":
sentences = sentences[0:-1]
penalty = 1
line = ""
for s in sentences:
end = 0
for w in keywords:
if end == 1:
break
wordbag = [w]
file = ""
ok = 1
try:
file = open("./syn/"+w)
except:
ok = 0
if ok == 1:
fline = ""
fline = file.readline()
while fline != "":
fline = fline.strip()
wordbag.append(fline)
fline = file.readline()
for word in wordbag:
if s.find(word) > 0:
penalty = 0
line += s + ","
end = 1
break
if penalty == 0 and end == 0:
line += s + ","
penalty = 1
regions.append(line)
return regions
def findPara(cirticalwords):#包含关键词,最后一个词是中心词类型
file1=open('wiki.txt','r')
returnPara=[]
totalScore=-1
for line in file1:
cirNum=len(cirticalwords)-1#关键词长度
tempscore=0
for i in range(cirNum):
for word in cirticalwords[i]:
if line.find(word)!=-1:
if word in cirticalwords[-1]:
tempscore+=0.5
break
else:
tempscore+=1
break
if tempscore>totalScore:
returnPara=[]
line=line.strip()
returnPara.append(line)
totalScore=tempscore
continue
if tempscore==totalScore:
line=line.strip()
returnPara.append(line)
#print(str(totalScore))
return returnPara
def disscore(wordSyn,string):
NumCri = len(wordSyn)-1
firstin=-1
lastin=-1
matchNum=0 #匹配到关键词的数量
for i in range(NumCri):
matchthis=False
thislast = -1
for myword in wordSyn[i]:
thisplace=string.find(myword)
if thisplace!=-1:
matchthis=True
if thislast==-1:
thislast=thisplace
else:
thislast = min(thislast,thisplace)
if matchthis:
matchNum+=1
if lastin ==-1:
lastin = thislast
else:
lastin = max(thislast,lastin)
if firstin == -1:
firstin = thislast
else:
firstin = min(firstin,thislast)
return (firstin - lastin)/matchNum
def doFindPara(line):# line 是 一个词语的list
wordSyn=[]
for word in line:
thisLine=set()
thisLine.add(word)
find=False
try:
file2=open('syn/'+word,'r')
find=True
except:
pass
if find:
for l in file2:
l=l.strip()
thisLine.add(l)
file2.close()
wordSyn.append(thisLine)
returnPara=findPara(wordSyn)
stringscore=dict()
for string in returnPara:
string=string.strip()
score1= disscore(wordSyn,string)
stringscore[string]=score1
stringscorelist = sorted(stringscore.items(),key = lambda a:a[1],reverse=True)
returnPara=[]
for k in range(min(len(stringscorelist),10)):
returnPara.append(stringscorelist[k][0].strip())
#returnParaRes=refine(returnPara,line[0:-1])
return returnPara