In [16]:
import re
import csv

def parse_date(text: str) -> str | None:
    month_map = {
        'january': '01', 'jan': '01',
        'february': '02', 'feb': '02',
        'march': '03', 'mar': '03',
        'april': '04', 'apr': '04',
        'may': '05',
        'june': '06', 'jun': '06',
        'july': '07', 'jul': '07',
        'august': '08', 'aug': '08',
        'september': '09', 'sep': '09', 'sept': '09',
        'october': '10', 'oct': '10',
        'november': '11', 'nov': '11',
        'december': '12', 'dec': '12',
    }

    # Create a regex pattern string from the month_map keys for matching month names.
    month_names_pattern = '|'.join(month_map.keys())

    # A list of regex patterns to try in order of precedence.
    # Named groups (?P<name>...) are used to easily extract date components.
    patterns = [
        # Pattern 1: Handles "21st June, 2024", "11 August 2024", "3rd of March 1998"
        re.compile(
            r'\b(?P<day>\d{1,2})(?:st|nd|rd|th)?(?:\s+of)?\s+(?P<month>' + month_names_pattern + r')[,.]?\s+(?P<year>\d{4})\b',
            re.IGNORECASE
        ),
        # Pattern 2: Handles "June 21st, 2024", "August 11, 2024"
        re.compile(
            r'\b(?P<month>' + month_names_pattern + r')\s+(?P<day>\d{1,2})(?:st|nd|rd|th)?[,.]?\s+(?P<year>\d{4})\b',
            re.IGNORECASE
        ),
        # Pattern 3: Handles "DD/MM/YYYY", "DD-MM-YYYY", "DD.MM.YYYY"
        re.compile(r'\b(?P<day>\d{1,2})[/\.-](?P<month>\d{1,2})[/\.-](?P<year>\d{4})\b'),
        # Pattern 4: Handles "YYYY-MM-DD", "YYYY/MM/DD", "YYYY.MM.DD"
        re.compile(r'\b(?P<year>\d{4})[/\.-](?P<month>\d{1,2})[/\.-](?P<day>\d{1,2})\b'),
        # Pattern 5: Handles "DD/MM/YY", "DD-MM-YY"
        re.compile(r'\b(?P<day>\d{1,2})[/\.-](?P<month>\d{1,2})[/\.-](?P<year>\d{2})\b'),
        # Pattern 6: Handles "YY/MM/DD", "YY-MM-DD"
        re.compile(r'\b(?P<year>\d{2})[/\.-](?P<month>\d{1,2})[/\.-](?P<day>\d{1,2})\b'),
    ]

    for pattern in patterns:
        match = pattern.search(text)
        if match:
            parts = match.groupdict()
            day_str = parts.get('day')
            month_str = parts.get('month')
            year_str = parts.get('year')

            # Handle 2-digit years by converting them to 4-digit years
            if len(year_str) == 2:
                year_val_2_digit = int(year_str)
                # Heuristic: years > current_year_last_two_digits+10 are 19xx, else 20xx.
                # This is a simple rule; for this script, we'll use a fixed cutoff.
                # e.g., > 30 is 19xx, <= 30 is 20xx. Handles 1931-2030.
                if year_val_2_digit > 30:
                    year_str = f"19{year_str}"
                else:
                    year_str = f"20{year_str}"

            if month_str and not month_str.isdigit():
                month_num_str = month_map.get(month_str.lower())
            else:
                month_num_str = month_str

            try:
                day_val = int(day_str)
                month_val = int(month_num_str)
                if not (1 <= day_val <= 31 and 1 <= month_val <= 12):
                    continue
            except (ValueError, TypeError):
                continue

            return f"{day_val:02d}/{month_val:02d}/{year_str}"

    return None

In [18]:
# --- Main execution block to test the function using the provided CSV ---
if __name__ == "__main__":
    csv_file_name = 'date_parser_testcases.csv'
    inputs = []
    expected_outputs = []

    try:
        with open(csv_file_name, mode='r', encoding='utf-8') as infile:
            # Use DictReader to easily access columns by name
            reader = csv.DictReader(infile)
            for row in reader:
                # Check for variations in column names (e.g., with/without spaces)
                input_col = 'Input' if 'Input' in row else 'Input '
                expected_col = 'Expected Output' if 'Expected Output' in row else 'Expected Output '

                if input_col in row and expected_col in row:
                    inputs.append(row[input_col])
                    expected_outputs.append(row[expected_col])
    except FileNotFoundError:
        print(f"Error: The file '{csv_file_name}' was not found.")
        print("Please make sure the CSV file is in the same directory as the script.")
    except Exception as e:
        print(f"An error occurred while reading the CSV file: {e}")

    if inputs:
        print(f"--- Running Date Parser Test Cases from '{csv_file_name}' ---\n")
        correct_count = 0
        for i, text_input in enumerate(inputs):
            parsed_result = parse_date(text_input)
            expected_result = expected_outputs[i]

             status = "PASS" if parsed_result == expected_result else "FAIL"
             if status == "PASS":
                correct_count += 1

            print(f"{i+1}. Input:    \"{text_input}\"")
            print(f"   Output:   {parsed_result}\n")
            print(f" Expected Output: {}

        total_count = len(inputs)
        accuracy = (correct_count / total_count) * 100 if total_count > 0 else 0
    else:
        print("No test cases were loaded. Please check the CSV file.")

--- Running Date Parser Test Cases from 'date_parser_testcases.csv' ---

1. Input:    "The event will take place on March 5, 2023."
   Output:   05/03/2023

2. Input:    "Her birthday is on 07/08/1990."
   Output:   07/08/1990

3. Input:    "The deadline is 2022-12-31."
   Output:   31/12/2022

4. Input:    "We met on 1st of January 2000."
   Output:   01/01/2000

5. Input:    "The concert is scheduled for 15th September, 2021."
   Output:   15/09/2021

6. Input:    "Let's catch up on 02.04.2022."
   Output:   02/04/2022

7. Input:    "The project started on 5/6/19."
   Output:   05/06/2019

8. Input:    "He was born on 1987/11/23."
   Output:   23/11/1987

9. Input:    "Christmas is on 25th Dec 2024."
   Output:   25/12/2024

10. Input:    "The meeting is set for April 03, 2020."
   Output:   03/04/2020

11. Input:    "Her birthdate, noted as 1997-05-20, is in the records."
   Output:   20/05/1997

12. Input:    "Her appointment is on the 2nd of March, 2021."
   Output:   02/03/2021

