/
Pattern.py
83 lines (69 loc) · 2.59 KB
/
Pattern.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import uuid
__author__ = "David S. Batista"
__email__ = "dsbatista@inesc-id.pt"
class Pattern(object):
def __init__(self, t=None):
self.id = uuid.uuid4()
self.positive = 0
self.negative = 0
self.unknown = 0
self.confidence = 0
self.tuples = set()
self.bet_uniques_vectors = set()
self.bet_uniques_words = set()
if tuple is not None:
self.tuples.add(t)
def __eq__(self, other):
return self.tuples == other.tuples
def __cmp__(self, other):
if other.confidence > self.confidence:
return -1
elif other.confidence < self.confidence:
return 1
else:
return 0
def update_confidence(self, config):
if self.positive > 0:
self.confidence = (
float(self.positive) / float(self.positive +
self.unknown * config.wUnk +
self.negative * config.wNeg
)
)
elif self.positive == 0:
self.confidence = 0
def add_tuple(self, t):
self.tuples.add(t)
# put all tuples with BET vectors into a set so that comparison
# with repeated vectors is eliminated
def merge_all_tuples_bet(self):
self.bet_uniques_vectors = set()
self.bet_uniques_words = set()
for t in self.tuples:
# transform numpy array into a tuple
# so it can be hashed and added into a set
self.bet_uniques_vectors.add(tuple(t.bet_vector))
self.bet_uniques_words.add(t.bet_words)
def update_selectivity(self, t, config):
matched_both = False
matched_e1 = False
for s in config.positive_seed_tuples:
if s.e1 == t.e1 or s.e1.strip() == t.e1.strip():
matched_e1 = True
if s.e2 == t.e2.strip() or s.e2.strip() == t.e2.strip():
self.positive += 1
matched_both = True
break
if matched_e1 is True and matched_both is False:
self.negative += 1
if matched_both is False:
for n in config.negative_seed_tuples:
if n.e1 == t.e1 or n.e1.strip() == t.e1.strip():
if n.e2 == t.e2.strip() or n.e2.strip() == t.e2.strip():
self.negative += 1
matched_both = True
break
if matched_both is False:
self.unknown += 1