-
Notifications
You must be signed in to change notification settings - Fork 11
/
Base.py
130 lines (101 loc) · 3.88 KB
/
Base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import Utilities
import re, sys
from tabulate import tabulate
# Predefined strings.
numbers = "(^a(?=\s)|one|two|three|four|five|six|seven|eight|nine|ten| \
eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen| \
eighteen|nineteen|twenty|thirty|forty|fifty|sixty|seventy|eighty| \
ninety|hundred|thousand)"
day = "(monday|tuesday|wednesday|thursday|friday|saturday|sunday)"
week_day = "(monday|tuesday|wednesday|thursday|friday|saturday|sunday)"
month = "(january|february|march|april|may|june|july|august|september| \
october|november|december)"
dmy = "(year|day|week|month)"
rel_day = "(today|tomorrow|tonight|tonite)"
exp1 = "(after)"
exp2 = "(this)"
iso = "\d+[/-]\d+[/-]\d+ \d+:\d+:\d+\.\d+"
year = "((?<=\s)\d{4}|^\d{4})"
regxp1 = "((\d+|(" + numbers + "[-\s]?)+) " + dmy + "s? " + exp1 + ")"
regxp2 = "(" + exp2 + " (" + dmy + "|" + week_day + "|" + month + "))"
reg1 = re.compile(regxp1, re.IGNORECASE)
reg2 = re.compile(regxp2, re.IGNORECASE)
reg3 = re.compile(rel_day, re.IGNORECASE)
reg4 = re.compile(iso)
reg5 = re.compile(year)
def getCommandLineArgs():
return sys.argv[1], sys.argv[2]
def isRequiredEvent(line):
for word in ['marriage', 'birthday', 'meeting', 'anniversary', 'seminar']:
if word in line.lower():
return True, word
return False, ""
def preProcessData(input):
# Read input file
objects = Utilities.parseInputFile(inputFileName)
# Split text into lines based on delimiter
#lines = Utilities.split(inputData, ".")
# Get rid of empty lines.
#lines = filter(None, lines)
#print "lines: {}".format(lines)
return objects
def extractDate(text):
# Initialization
temporalExpressionFound = []
# re.findall() finds all the substring matches, keep only the full
# matching string. Captures expressions such as 'number of days' ago, etc.
found = reg1.findall(text)
found = [a[0] for a in found if len(a) > 1]
for timex in found:
temporalExpressionFound.append(timex)
# Variations of this thursday, next year, etc
found = reg2.findall(text)
found = [a[0] for a in found if len(a) > 1]
for timex in found:
temporalExpressionFound.append(timex)
# today, tomorrow, etc
found = reg3.findall(text)
for timex in found:
temporalExpressionFound.append(timex)
# ISO
found = reg4.findall(text)
for timex in found:
temporalExpressionFound.append(timex)
# Year
found = reg5.findall(text)
for timex in found:
temporalExpressionFound.append(timex)
# print "temporal expressions: {}".format(temporalExpressionFound)
if temporalExpressionFound:
return ",".join(temporalExpressionFound)
else:
return ""
def initialize():
Utilities.setupLog()
if __name__ == '__main__':
initialize()
# read commmand line parameters
inputFileName, outputFileName = getCommandLineArgs()
# Preprocess input data
lines = preProcessData(inputFileName)
result = []
for line in lines:
isRequired, eventType = isRequiredEvent(line.getText())
if isRequired:
# print "line : {}".format(line)
eventDate = extractDate(line.getText())
if eventDate:
# print "eventdate: ".format(eventDate)
if line.getActual() == "yes":
Utilities.incrementTP()
line.setPredict("yes")
result.append([eventType, eventDate, "", line.getText()])
else:
Utilities.writeLog("INFO [NAIVE APPROACH]: Event Detected but is identified as past event :" + line.getText())
else:
Utilities.writeLog("INFO [NAIVE APPROACH]: Event Detected but event type did not match with required events :" + line.getText())
Utilities.writeOutput(outputFileName, ["Event", "When", "Where", "Text"])
[ Utilities.writeOutput(outputFileName, x) for x in result ]
# Utilities.writeOutput(outputFileName, tabulate(result, headers=["Event", "When", "Where", "Text"], tablefmt="grid"))
Utilities.computeRecall(lines)
Utilities.printMetrics()