-
Notifications
You must be signed in to change notification settings - Fork 3
/
BlockDetector.py
executable file
·70 lines (53 loc) · 1.55 KB
/
BlockDetector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import json
import os
from sklearn import svm
features_train = []
labels_train = []
for i in range(1, 10):
path = 'blocks/Blocks - CV%d.json' % i
if os.path.exists(path):
with open(path, 'r') as f:
data = json.load(f)
for elements in data:
features_train.append(
[elements['font_change'], elements['size_change'], elements['has_dot'], elements['has_keyword']])
labels_train.append(elements['label'])
features_test = []
labels_test = []
with open('./Blocks.json', 'r') as test:
data_test = json.load(test)
content = []
for e in data_test:
content.append(e['content'][0])
features_test.append(
[e['font_change'], e['size_change'], e['has_dot'], e['has_keyword']])
# print(features_train)
# print(labels_train)
# print(features_test)
clf = svm.SVC(kernel='rbf')
clf.fit(features_train, labels_train)
for f in features_test:
labels_test.append(clf.predict([f]))
# for i in range(len(content)):
# print(content[i], labels_test[i])
new_content = []
l = 0
m = 0
while l < len(labels_test):
if labels_test[l] == 1:
con = content[m:l]
new_content.append(con)
m = l
l += 1
# for i in new_content:
# print(i)
all_text = []
for i in range(len(new_content)):
txt = ''
for j in range(len(new_content[i])):
for k in range(len(new_content[i][j])):
txt += new_content[i][j][k][0] + ' '
all_text.append({'content': txt})
with open('Paragraph.json', 'w+') as file:
json.dump(all_text, file, indent=2)
file.close()