Skip to content

Commit

Permalink
Merge pull request #777 from clulab/date-rule-issue776
Browse files Browse the repository at this point in the history
Date rule issue776
  • Loading branch information
kwalcock authored Feb 6, 2024
2 parents 9f89ea7 + 8a4cb9a commit 9c3ed9c
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 24 deletions.
28 changes: 15 additions & 13 deletions main/src/main/resources/org/clulab/numeric/WEEK.tsv
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
#
# list of weeks and their date ranges, case insensitive so everything is lower case for simplicity
# the comments after // are required by WeekNormalizer to get the week date ranges! Do not remove
# the format for the date ranges must be MM-dd:MM-dd or MM:MM
# note: multi-word phrases must be tokenized in the same way as our tokenizer. If not sure, try the phrases in ./shell first!
# the comments after // are required by WeekNormalizer to get the week date ranges! Do not remove.
# The format for the date ranges must be MM-dd:MM-dd or MM:MM.
# Note: multi-word phrases must be tokenized in the same way as our tokenizer. If not sure, try the
# phrases in ./shell first!
#
first week // XXXX-XX-01 -- XXXX-XX-07
1st week // XXXX-XX-01 -- XXXX-XX-07
second week // XXXX-XX-08 -- XXXX-XX-14
2nd week // XXXX-XX-08 -- XXXX-XX-14
third week // XXXX-XX-15 -- XXXX-XX-21
3rd week // XXXX-XX-15 -- XXXX-XX-21
fourth week // XXXX-XX-22 -- XXXX-XX-28
4th week // XXXX-XX-22 -- XXXX-XX-28
first two weeks // XXXX-XX-01 -- XXXX-XX-14
second two weeks // XXXX-XX-15 -- XXXX-XX-28
# Update: If the week (left column) includes a pipe (|), the "week" will be split into multiple
# entries all having the same date range to the right. Weeks will also be trimmed so that the
# entries can be vertically aligned to assure that nothing has been forgotten. Empty values between
# || will also be removed.
#
first week| |1st week| |first |1st // XXXX-XX-01 -- XXXX-XX-07
second week|second weeks|2nd week|2nd weeks|second|2nd // XXXX-XX-08 -- XXXX-XX-14
third week| third weeks|3rd week|3rd weeks|third |3rd // XXXX-XX-15 -- XXXX-XX-21
fourth week|fourth weeks|4th week|4th weeks // XXXX-XX-22 -- XXXX-XX-28
first two weeks // XXXX-XX-01 -- XXXX-XX-14
second two weeks // XXXX-XX-15 -- XXXX-XX-28
11 changes: 10 additions & 1 deletion main/src/main/resources/org/clulab/numeric/date-ranges.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@
pattern: |
/(?i)between/ @month1:PossibleMonth /(?i)(and|to)/ @month2:PossibleMonth @year:PossibleYear
- name: date-range-5-week
priority: ${rulepriority}
label: DateRange
type: token
example: "It took place between the second and third weeks of June"
action: mkDateRangeMentionBetweenWeeks
pattern: |
/(?i)between/ /(?i)the/? (?<week1> /(?i)(first|1st|second|2nd)/ /(?i)week/?) /(?i)(and|to)/ (?<week2> /(?i)(second|2nd|third|3rd|fourth|4th|last)/ /(?i)weeks?/) /(?i)of/ @month:PossibleMonth
- name: date-range-6
priority: ${rulepriority}
label: DateRange
Expand Down Expand Up @@ -101,7 +110,7 @@
example: "First two weeks of May"
action: mkDateRangeMentionWithWeek
pattern: |
(?<week> /(?i)(first|second|last)/ /(?i)two/ /(?i)weeks/) /(?i)of/ @month:PossibleMonth
(?<week> /(?i)(first|second|last)/ /(?i)(two|three)/ /(?i)weeks/) /(?i)of/ @month:PossibleMonth
- name: date-unbound-range-1
priority: ${rulepriority}
Expand Down
16 changes: 10 additions & 6 deletions main/src/main/scala/org/clulab/numeric/WeekNormalizer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ class WeekNormalizer(weekPath: String) {
object WeekNormalizer {

def readNormsFromResource(path: String): Map[String, WeekRange] = {
val customResourcePath = new File(NumericEntityRecognizer.resourceDir, path)
val customFile = new File(NumericEntityRecognizer.resourceDir, path)
val source =
if (customFile.exists) Sourcer.sourceFromFile(customFile)
else Sourcer.sourceFromResource(path)

if (customResourcePath.exists)
Using.resource(Sourcer.sourceFromFile(customResourcePath))(readNormsFromSource)
else
Using.resource(Sourcer.sourceFromResource(path))(readNormsFromSource)
Using.resource(source)(readNormsFromSource)
}

def readNormsFromSource(source: Source): Map[String, WeekRange] = {
Expand All @@ -38,14 +38,18 @@ object WeekNormalizer {
CommentedStandardKbSource.read(source) { (week, normOpt, unitClassOpt) =>
assert(normOpt.isDefined) // We're insisting on this.

val weeks = week.split('|').map(_.trim).filter(_.nonEmpty)
val norm = normOpt.get.split("--").map(_.trim)
val (start, end) = norm match {
case Array(start, end) => (start, end)
case _ => throw new RuntimeException(s"ERROR: incorrect date range in week file")
}
val startDay = getDay(start)
val endDay = getDay(end)
norms += week -> WeekRange(startDay, endDay)
val weekRange = WeekRange(startDay, endDay)
val entries = weeks.map(_ -> weekRange)

norms ++= entries
}
norms.toMap
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ class NumericActions(seasonNormalizer: SeasonNormalizer, unitNormalizer: UnitNor
convert(mentions, toDateRangeMentionWithMonth, "toDateRangeMentionWithMonth")
}

/** Constructs a DateRangeMention from a token pattern */
def mkDateRangeMentionBetweenWeeks(mentions: Seq[Mention], state: State): Seq[Mention] = {
convert(mentions, toDateRangeMentionBetweenWeeks(weekNormalizer), "toDateRangeMentionBetweenWeeks")
}

/** Constructs a DateRangeMention from a token pattern */
def mkDateRangeMentionWithSinceRef(mentions: Seq[Mention], state: State): Seq[Mention] = {
convert(mentions, toDateRangeMentionWithSinceRef, "toDateRangeMentionWithSinceRef")
Expand Down
47 changes: 43 additions & 4 deletions main/src/main/scala/org/clulab/numeric/mentions/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,29 @@ package object mentions {
throw new RuntimeException(s"ERROR: cannot convert mention of type [${m.getClass.toString}] to DateRangeMention!")
}

def toDateRangeMentionBetweenWeeks(weekNormalizer: WeekNormalizer)(mention: Mention): DateRangeMention = {

def throwRuntimeException(week: String): WeekRange = {
throw new RuntimeException(s"ERROR: could not find argument $week in mention [${mention.raw.mkString(" ")}]!")
}

mention match {
case m: DateRangeMention => m
case m: RelationMention =>
val w1Norm = getWeekRange(weekNormalizer)("week1", m).getOrElse(throwRuntimeException("week1"))
val w2Norm = getWeekRange(weekNormalizer)("week2", m).getOrElse(throwRuntimeException("week2"))
val monthOpt = getArgWords("month", m)

DateRangeMention(
m,
TempEvalFormatter.mkDate(w1Norm.startDay, monthOpt, None),
TempEvalFormatter.mkDate(w2Norm.endDay, monthOpt, None)
)
case m =>
throw new RuntimeException(s"ERROR: cannot convert mention of type [${m.getClass.toString}] to DateRangeMention!")
}
}

def toDateRangeMentionWithMonth(mention: Mention): DateRangeMention = mention match {
case m: DateRangeMention => m

Expand Down Expand Up @@ -917,10 +940,17 @@ package object mentions {
private def getWeekRange(weekNormalizer: WeekNormalizer)(argName: String, m:Mention): Option[WeekRange] = {
val wordsOpt = getArgWords(argName, m)

if (wordsOpt.isEmpty) None
else if (wordsOpt.get.mkString(" ").toLowerCase().equals("last week")) {getLastWeekRange(m)}
else if (wordsOpt.get.mkString(" ").toLowerCase().equals("last two weeks")) {getLastTwoWeeksRange(m)}
else weekNormalizer.norm(wordsOpt.get)
wordsOpt.flatMap { words =>
val wordSeq = words.mkString(" ").toLowerCase()
//println(s"wordSeq = $wordSeq")

wordSeq match {
case "last week" => getLastWeekRange(m)
case "last two weeks" => getLastTwoWeeksRange(m)
case "last three weeks" => getLastThreeWeeksRange(m)
case _ => weekNormalizer.norm(words)
}
}
}

private def getLastWeekRange(m:Mention): Option[WeekRange] = {
Expand All @@ -941,6 +971,15 @@ package object mentions {
Some(WeekRange(startDay = Some(Seq((lastDay - 13).toString)), endDay = Some(Seq(lastDay.toString))))
}

private def getLastThreeWeeksRange(m:Mention): Option[WeekRange] = {
val month = getArgWords("month", m)
val modifiedMonth = TempEvalFormatter.convertLiteralMonth(month.get.mkString(""))
val monthObj = Month.of(modifiedMonth)
val lastDay = monthObj.length(false)

Some(WeekRange(startDay = Some(Seq((lastDay - 20).toString)), endDay = Some(Seq(lastDay.toString))))
}

private def getHoliday(holiday: Seq[String], year: Option[Seq[String]]): (Option[Seq[String]], Option[Seq[String]]) = {
val dayMonthOpt = HolidayNormalizer.norm(holiday, year)
dayMonthOpt match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,14 @@ class TestNumericEntityRecognition extends Test {
ensure(sentence = "We applied it in Fall in 21", Interval(4, 7), goldEntity= "DATE-RANGE", goldNorm = "XX21-09-22 -- XX21-12-21")
ensure(sentence = "We applied it in fall of 2021", Interval(4, 7), goldEntity= "DATE-RANGE", goldNorm = "2021-09-22 -- 2021-12-21")
}

it should "recognize between-week date ranges" in {
ensure("It took place between the second and third weeks of June",
Interval(3, 11), "DATE-RANGE", "XXXX-06-08 -- XXXX-06-21")

ensure("It took place between the first and 4th weeks of May",
Interval(3, 11), "DATE-RANGE", "XXXX-05-01 -- XXXX-05-28")
}

it should "recognize measurement units" in {
ensure("It was 12 ha", Interval(2, 4), "MEASUREMENT-AREA", "12.0 ha")
Expand Down

0 comments on commit 9c3ed9c

Please sign in to comment.