In [1]:
import csv
from datetime import datetime
import re
import pandas as pd
import numpy as np

In [4]:
# Function to load CSV file using DictReader
def load_csv(filepath):
    with open(filepath, mode='r') as file:
        csv_reader = csv.DictReader(file)
        extracted_data = [(row['Input'], row['Expected Output']) for row in csv_reader]
    return extracted_data

# Function to identify and format dates from text
def extract_date(text):
    # Define different regex patterns for date formats
    regex_patterns = [
        r'(\d{1,2})(st|nd|rd|th)?\s+of\s+(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{4})',
        r'(\d{1,2})(st|nd|rd|th)?\s+(January|February|March|April|May|June|July|August|September|October|November|December),?\s+(\d{4})',
        r'(\d{1,2})(st|nd|rd|th)?\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec),?\s+(\d{4})',
        r'(\d{4})-(\d{2})-(\d{2})',
        r'(\d{1,2})/(\d{1,2})/(\d{4})'
    ]
    
    month_map = {
        'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05', 'June': '06',
        'July': '07', 'August': '08', 'September': '09', 'October': '10', 'November': '11', 'December': '12',
        'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'May': '05', 'Jun': '06',
        'Jul': '07', 'Aug': '08', 'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'
    }

    # Iterate through regex patterns and check for matches
    for regex in regex_patterns:
        matched = re.search(regex, text)
        if matched:
            # If match includes a full month name
            if len(matched.groups()) == 4:
                day, _, month, year = matched.groups()
                day = day.zfill(2)
                month = month_map[month]
                return f"{day}/{month}/{year}"
            # If match includes a year-month-day format
            elif len(matched.groups()) == 3:
                year, month, day = matched.groups()
                return f"{day}/{month}/{year}"
            # If match includes a day, month, and year in numeric format
            elif len(matched.groups()) == 3:
                day, month, year = matched.groups()
                day = day.zfill(2)
                month = month.zfill(2)
                return f"{day}/{month}/{year}"
    return None

# Load test cases from a CSV file
csv_filepath = 'date_parser_testcases.csv'
test_data = load_csv(csv_filepath)

# Print the data read from the CSV for verification
print("Loaded test cases:")
for input_text, expected in test_data:
    print(f"Input: {input_text}, Expected: {expected}")

# Evaluate the date extraction function
for input_text, expected_output in test_data:
    extracted_date = extract_date(input_text)
    print(f"Input Text: {input_text}")
    print(f"Extracted Date: {extracted_date}")
    print(f"Expected Output: {expected_output}")
    print(f"Does it match? {extracted_date == expected_output}\n")

    

Loaded test cases:
Input: The event will take place on March 5, 2023., Expected: 05/03/2023
Input: Her birthday is on 07/08/1990., Expected: 07/08/1990
Input: The deadline is 2022-12-31., Expected: 31/12/2022
Input: We met on 1st of January 2000., Expected: 01/01/2000
Input: The concert is scheduled for 15th September, 2021., Expected: 15/09/2021
Input: Let's catch up on 02.04.2022., Expected: 02/04/2022
Input: The project started on 5/6/19., Expected: 05/06/2019
Input: He was born on 1987/11/23., Expected: 23/11/1987
Input: Christmas is on 25th Dec 2024., Expected: 25/12/2024
Input: The meeting is set for April 03, 2020., Expected: 03/04/2020
Input: Her birthdate, noted as 1997-05-20, is in the records., Expected: 20/05/1997
Input: Her appointment is on the 2nd of March, 2021., Expected: 02/03/2021
Input: The exam date is 2021.11.10., Expected: 10/11/2021
Input: They got married on 12/12/12., Expected: 12/12/2012
Input: The workshop is on February 15th, 2022., Expected: 15/02/2022
Inp