In [None]:
import re
import dateparser


def not_recording_date(d, matches):
    return not any(rdm.start() <= d.start() and rdm.end() >= d.end() for rdm in matches)


def classify_dates(dates, content):


  pattern = re.compile(
      r"\b(?:witness whereof|before me|subscribed|sworn|signed|delivered)\b",
      re.IGNORECASE
  )

  dd = None
  ed = None
  sd = None
  dd_span = None

  if dates:
    dd, dd_span = min(dates, key=lambda x: x[1][1])
    dd_start, dd_end = dd_span

  matches = list(pattern.finditer(content))

  for d in dates:
    start, end = d[1][0] - 200, d[1][0]
    if "effective" in content[start:end].lower():
      ed = d[0]
      break


  if matches:
      last_match = matches[0]
      notary = last_match.start()
      notary_dates = list(filter(lambda x: is_within_100_words(x, content, notary), dates))
      closest_candidates_span = set([x[1] for x in notary_dates])
      closest_candidates_date = set([x[0] for x in notary_dates])
      if closest_candidates_date:
        sd = min(closest_candidates_date)

      if dd_span is not None and dd_span in closest_candidates_span:
        dd = None

  return dd, sd, ed



def find_doc_date(filename, date_pattern, text, recording_date_matches=None):


  parsed_recording_dates = []
  if recording_date_matches:

    recording_date_matches = list(recording_date_matches)
    for rdm in recording_date_matches:

      rd = re.search(date_pattern, rdm.group(), re.IGNORECASE)
      recording_date_parsed = dateparser.parse(rd.group())
      if recording_date_parsed:
        parsed_recording_dates.append(recording_date_parsed.date())

    parsed_recording_date = sorted(set(parsed_recording_dates))[0] if parsed_recording_dates else None
    date_matches = list(re.finditer(date_pattern, text, re.IGNORECASE))

    all_dates = []

    for dm in date_matches:
      raw = dm.group()
      raw = raw.replace("day", "").replace("of", "")
      raw = " ".join(raw.split())

      parsed = dateparser.parse(raw)
      if parsed:
        doc_date = parsed.date()

        if parsed_recording_date and doc_date <= parsed_recording_date and (parsed_recording_date - doc_date).days <= 150:

          if not_recording_date(dm, recording_date_matches):
            all_dates.append((doc_date, dm.span()))

    all_dates = [x for x in all_dates if x is not None]

    dd, sd, ed = classify_dates(all_dates, text)
    return dd, sd, ed


