-
Notifications
You must be signed in to change notification settings - Fork 2
/
metrics.py
90 lines (86 loc) · 2.36 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import re
STATES = {
'AK':'Alaska',
'AL':'Alabama',
'AR':'Arkansas',
'AZ':'Arizona',
'CA':'California',
'CO':'Colorado',
'CT':'Connecticut',
'DC':'District of Columbia',
'DE':'Delaware',
'FL':'Florida',
'GA':'Georgia',
'HI':'Hawaii',
'IA':'Iowa',
'ID':'Idaho',
'IL':'Illinois',
'IN':'Indiana',
'KS':'Kansas',
'KY':'Kentucky',
'LA':'Louisiana',
'MA':'Massachusetts',
'MD':'Maryland',
'ME':'Maine',
'MI':'Michigan',
'MN':'Minnesota',
'MO':'Missouri',
'MS':'Mississippi',
'MT':'Montana',
'NC':'North Carolina',
'ND':'North Dakota',
'NE':'Nebraska',
'NH':'New Hampshire',
'NJ':'New Jersey',
'NM':'New Mexico',
'NV':'Nevada',
'NY':'New York',
'OH':'Ohio',
'OK':'Oklahoma',
'OR':'Oregon',
'PA':'Pennsylvania',
'RI':'Rhode Island',
'SC':'South Carolina',
'SD':'South Dakota',
'TN':'Tennessee',
'TX':'Texas',
'UT':'Utah',
'VA':'Virginia',
'VT':'Vermont',
'WA':'Washington',
'WI':'Wisconsin',
'WV':'West Virginia',
'WY':'Wyoming'
}
def get_keyword_matches(result, correct_keywords, return_texts=False):
match_texts = []
matches = 0
if not result:
if return_texts:
return matches, match_texts
return matches
for keyword in correct_keywords:
keyword_nocomma = re.sub(r"[$,]+", "", str(keyword))
keyword_re = rf"[(\b\s]({keyword_nocomma})(?:[).,\s\b]|$)"
# dollar amounts look for the full int sans symbols
if isinstance(keyword, (int, float)) or str(keyword).startswith("$"):
res_nocomma = re.sub(r"[$,]+", "", result)
found = re.findall(keyword_re, res_nocomma, re.I)
if len(found) > 0:
matches += 1
match_texts.append(found)
# # if we have a state, check for case-sensitive abbrev + full name
# elif keyword in STATES:
# if f" {keyword}" in result:
# matches += 1
# elif STATES[keyword] in result:
# matches += 1
# case insensitive match on phrases
else:
found = re.findall(keyword_re, result, re.I)
if len(found) > 0:
matches += 1
match_texts.append(found)
if return_texts:
return matches, match_texts
return matches