In [35]:
import csv
from datetime import datetime
import re
import pandas as pd
import numpy as np

In [42]:
# Function to read CSV file using DictReader
def read_csv(filepath):
    with open(filepath, mode='r') as file:
        reader = csv.DictReader(file)
        data = [(row['Input'], row['Expected Output']) for row in reader]
    return data


Defining Regex patterns and Parsing dates from text

In [43]:
# Function to parse dates from text
def parse_date(text):
    # Define regex patterns for various date formats
    patterns = [
        r'(\d{1,2})(st|nd|rd|th)?\s+of\s+(January|February|March|April|May|June|July|August|September|October|November|December)\s+(\d{4})',
        r'(\d{1,2})(st|nd|rd|th)?\s+(January|February|March|April|May|June|July|August|September|October|November|December),?\s+(\d{4})',
        r'(\d{1,2})(st|nd|rd|th)?\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec),?\s+(\d{4})',
        r'(\d{4})-(\d{2})-(\d{2})',
        r'(\d{1,2})/(\d{1,2})/(\d{4})'
    ]
    
    months = {
        'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05', 'June': '06',
        'July': '07', 'August': '08', 'September': '09', 'October': '10', 'November': '11', 'December': '12',
        'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'May': '05', 'Jun': '06',
        'Jul': '07', 'Aug': '08', 'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'
    }

    for pattern in patterns:
        match = re.search(pattern, text)
        if match:
            if len(match.groups()) == 4:
                day, _, month, year = match.groups()
                day = day.zfill(2)
                month = months[month]
                return f"{day}/{month}/{year}"
            elif len(match.groups()) == 3:
                year, month, day = match.groups()
                return f"{day}/{month}/{year}"
            elif len(match.groups()) == 4:
                day, month, year = match.groups()
                day = day.zfill(2)
                month = month.zfill(2)
                return f"{day}/{month}/{year}"
    return None


In [44]:
# Read the test cases from the provided CSV file
filepath = 'date_parser_testcases.csv'
data = read_csv(filepath)

# Debug print to check the data read from the CSV
print("Data read from CSV:")
for row in data:
    print(row)

# Extract text and expected output and test the function
for text, expected_output in data:
    date = parse_date(text)
    print(f"Text: {text}")
    print(f"Extracted Date: {date}")
    print(f"Expected Output: {expected_output}")
    print(f"Match: {date == expected_output}\n")

Data read from CSV:
('The event will take place on March 5, 2023.', '05/03/2023')
('Her birthday is on 07/08/1990.', '07/08/1990')
('The deadline is 2022-12-31.', '31/12/2022')
('We met on 1st of January 2000.', '01/01/2000')
('The concert is scheduled for 15th September, 2021.', '15/09/2021')
("Let's catch up on 02.04.2022.", '02/04/2022')
('The project started on 5/6/19.', '05/06/2019')
('He was born on 1987/11/23.', '23/11/1987')
('Christmas is on 25th Dec 2024.', '25/12/2024')
('The meeting is set for April 03, 2020.', '03/04/2020')
('Her birthdate, noted as 1997-05-20, is in the records.', '20/05/1997')
('Her appointment is on the 2nd of March, 2021.', '02/03/2021')
('The exam date is 2021.11.10.', '10/11/2021')
('They got married on 12/12/12.', '12/12/2012')
('The workshop is on February 15th, 2022.', '15/02/2022')
('Submit your report by 08/31/2021.', '31/08/2021')
('The course starts on 1st July 2023.', '01/07/2023')
('Independence Day is on 4th of July, 2022.', '04/07/2022')
(