In [8]:
import re
import csv

# Month name to number mapping
MONTHS = {
    'january': '01', 'february': '02', 'march': '03', 'april': '04',
    'may': '05', 'june': '06', 'july': '07', 'august': '08',
    'september': '09', 'october': '10', 'november': '11', 'december': '12'
}

# Regex patterns for different date formats
DATE_PATTERNS = [
    r'(\d{1,2})(st|nd|rd|th)?\s+(January|February|March|April|May|June|July|August|September|October|November|December)[,]?\s+(\d{4})',
    r'(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{1,2})(st|nd|rd|th)?[,]?\s+(\d{4})',
    r'(\d{1,2})/(\d{1,2})/(\d{4})',
    r'(\d{1,2})-(\d{1,2})-(\d{4})'
]

# Function to extract date from text
def extract_date(text):
    for pattern in DATE_PATTERNS:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            groups = match.groups()
            if len(groups) == 4:
                # Case: 21st June 2024 or June 21st 2024
                if groups[0].isdigit():  # 21st June 2024
                    day = int(groups[0])
                    month = MONTHS[groups[2].lower()]
                    year = int(groups[3])
                else:  # June 21st 2024
                    day = int(groups[1])
                    month = MONTHS[groups[0].lower()]
                    year = int(groups[3])
            elif len(groups) == 3:
                # Case: 21/06/2024 or 21-06-2024
                day = int(groups[0])
                month = int(groups[1])
                year = int(groups[2])
            else:
                continue

            return f"{day:02d}/{int(month):02d}/{year}"
    return "Date not found"

# Function to read CSV and apply date parsing
def process_csv(file_path):
    with open(file_path, newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        headers = reader.fieldnames
        text_column = headers[0]  # Assume first column is the text column
        for row in reader:
            text = row[text_column]
            extracted = extract_date(text)
            print(f"Text: {text}")
            print(f"Extracted Date: {extracted}\n")

# Run the parser
process_csv("date_parser_testcases.csv")

Text: The event will take place on March 5, 2023.
Extracted Date: 05/03/2023

Text: Her birthday is on 07/08/1990.
Extracted Date: 07/08/1990

Text: The deadline is 2022-12-31.
Extracted Date: Date not found

Text: We met on 1st of January 2000.
Extracted Date: Date not found

Text: The concert is scheduled for 15th September, 2021.
Extracted Date: 15/09/2021

Text: Let's catch up on 02.04.2022.
Extracted Date: Date not found

Text: The project started on 5/6/19.
Extracted Date: Date not found

Text: He was born on 1987/11/23.
Extracted Date: Date not found

Text: Christmas is on 25th Dec 2024.
Extracted Date: Date not found

Text: The meeting is set for April 03, 2020.
Extracted Date: 03/04/2020

Text: Her birthdate, noted as 1997-05-20, is in the records.
Extracted Date: Date not found

Text: Her appointment is on the 2nd of March, 2021.
Extracted Date: Date not found

Text: The exam date is 2021.11.10.
Extracted Date: Date not found

Text: They got married on 12/12/12.
Extracted Dat