-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
152 lines (115 loc) · 3.89 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from datetime import *
import statistics
def get_average(list):
if (len(list) != 0): return sum(list)/len(list)
return 0
def fuse_score(scores):
"""
calculate harmonic mean
"""
filtered_scores = []
for score in scores:
if (type(score) is list): score = score[0]
if (score > 0): filtered_scores.append(score)
return statistics.harmonic_mean(filtered_scores)
def convert_datetime_to_string(label, date_format = 'ISO8601'):
str_label = ''
try:
if (date_format == 'ISO8601'):
dt = convert_datetime_ISO8601(label)
else:
dt = convert_datetime(label)
except:
return label
if (dt != ''):
if (dt.hour == 0 and dt.minute == 0 and dt.second == 0):
if (dt.day == 1 and dt.month == 1):
str_label = str(dt.year)
else:
str_label = str(dt.day) + ' ' + dt.strftime("%B") + ', ' + str(dt.year)
else:
if (dt.day == 1 and dt.month == 1):
str_label = str(dt.year) + ' ' + dt.hour + ':' + dt.minute + ':' + dt.second
else:
str_label = str(dt.day) + ' ' + dt.strftime("%B") + ', ' + str(dt.year) \
+ ' ' + dt.hour + ':' + dt.minute + ':' + dt.second
if (dt == ''): return label
return str_label
def convert_datetime_ISO8601(label):
"""
convert label to datetime (ISO8601)
"""
try:
if ('+' in label): label = label.replace('+', '')
dt = datetime.strptime(label, '%Y-%m-%dT%H:%M:%SZ')
#print(dt.year)
return dt
except Exception as e:
try:
temp = label.split('T')
temp1 = temp[0].split('-')
if (temp1[1] == '00'):
temp1[1] = '01'
if (temp1[2] == '00'):
temp1[2] = '01'
label = '-'.join(e for e in temp1)
label = label + 'T' + temp[1]
dt = datetime.strptime(label, '%Y-%m-%dT%H:%M:%SZ')
return dt
except Exception as e:
#print('Error -- convert_datetime: ', e)
pass
return ''
def convert_datetime(label):
"""
convert label to datetime (ISO8601)
"""
#"Jun 28 2018 at 7:40AM" -> "%b %d %Y at %I:%M%p"
#"September 18, 2017, 22:19:55" -> "%B %d, %Y, %H:%M:%S"
#"Sun,05/12/99,12:30PM" -> "%a,%d/%m/%y,%I:%M%p"
#"Mon, 21 March, 2015" -> "%a, %d %B, %Y"
#"2018-03-12T10:12:45Z" -> "%Y-%m-%dT%H:%M:%SZ"
try:
if ('+' in label): label = label.replace('+', '')
dt = datetime.strptime(label, '%d %B, %Y')
return dt
except Exception as e:
try:
dt = datetime.strptime(label, '%d %B %Y')
return dt
except Exception as e:
#print('Error -- convert_datetime: ', e)
pass
return ''
def match_multi_value(value_list, sen_list):
flag = False
sen_id = 0
for value in value_list:
flag, sen_id = match_single_value(value, sen_list)
if (flag == True): break
return flag, sen_id
def match_single_value(value, sen_list):
value = str(value)
flag = False
sen_id = 0
for sen in sen_list:
# should not use lower
if (' ' + value + ' ' in sen):
flag = True
break
try:
head = value + ' '
if (sen.index(head) == 0):
flag = True
break
except:
pass
try:
tail = ' ' + value + '.'
if (sen.index(tail) + len(tail) == len(sen)):
flag = True
break
except:
pass
sen_id = sen_id + 1
return flag, sen_id