-
Notifications
You must be signed in to change notification settings - Fork 3
/
4.rouge.py
38 lines (32 loc) · 987 Bytes
/
4.rouge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import os, numpy as np
from nltk.tokenize import word_tokenize as wt
from string import punctuation as punct
path1 = './4.summarized/'
path2 = './summary/'
def bigram(path):
result = []
for file in os.listdir(path):
f = open(path+file).read()
f = f.splitlines()[2:]
temp = []
for row in f:
row = wt(row.lower())
row = [i for i in row if i not in punct]
for i in range(len(row)-1):
temp.append((row[i]+' '+row[i+1]))
result.append(temp)
return result
resl = bigram(path1)
sums = bigram(path2)
f = open('RECALL.txt','w')
rerata = []
for i in range(20):
N = len(sums[i])
I = len(set(resl[i])&set(sums[i]))
R = (I/N)*100
rerata.append(R)
mystr = 'Recall Summary Dokumen ke-'+str(i+1)+': '+str(round(R,2))+'%'
f.write(mystr+'\n')
print(mystr)
print('\nRERATA RECALL:',str(round(np.average(rerata),2))+'%')
f.close()