# **PART 2**

In [1]:
import pandas as pd

df = pd.read_csv("/content/date_parser_testcases.csv")
display(df.head())

Unnamed: 0,Input,Expected Output
0,"The event will take place on March 5, 2023.",05/03/2023
1,Her birthday is on 07/08/1990.,07/08/1990
2,The deadline is 2022-12-31.,31/12/2022
3,We met on 1st of January 2000.,01/01/2000
4,"The concert is scheduled for 15th September, 2...",15/09/2021


In [15]:
import re
from datetime import datetime

def parse_date(text):
    # Dictionary of month names to numbers
    month_map = {
        'jan': 1, 'january': 1, 'feb': 2, 'february': 2, 'mar': 3, 'march': 3,
        'apr': 4, 'april': 4, 'may': 5, 'jun': 6, 'june': 6, 'jul': 7, 'july': 7,
        'aug': 8, 'august': 8, 'sep': 9, 'september': 9, 'oct': 10, 'october': 10,
        'nov': 11, 'november': 11, 'dec': 12, 'december': 12
    }
    # Regex patterns for different date formats
    patterns = [
        # DD.MM.YYYY
        r'(?<!\d)(3[01]|[12][0-9]|0?[1-9])\.(1[0-2]|0?[1-9])\.(\d{4})(?!\d)',
        # DD/MM/YY or DD-MM-YY
        r'(?<!\d)(3[01]|[12][0-9]|0?[1-9])[/|-](1[0-2]|0?[1-9])[/|-](\d{2})(?!\d)',
        # DD/MM/YYYY or DD-MM-YYYY
        r'(?<!\d)(3[01]|[12][0-9]|0?[1-9])[/|-](1[0-2]|0?[1-9])[/|-](\d{4})(?!\d)',
        # MM/DD/YYYY or MM-DD-YYYY
        r'(?<!\d)(1[0-2]|0?[1-9])[/|-](3[01]|[12][0-9]|0?[1-9])[/|-](\d{4})(?!\d)',
        # YYYY-MM-DD
        r'(\d{4})-(1[0-2]|0?[1-9])-(3[01]|[12][0-9]|0?[1-9])',
        # DD Month YYYY or DDth Month YYYY
        r'(3[01]|[12][0-9]|0?[1-9])(?:st|nd|rd|th)?\s+(?:of\s+)?(\w+),?\s+(\d{2,4})',
        # Month DD, YYYY or Month DDth, YYYY
        r'(\w+)\s+(3[01]|[12][0-9]|0?[1-9])(?:st|nd|rd|th)?,?\s+(\d{2,4})',
        # YYYY Month DD
        r'(\d{4})\s+(\w+)\s+(3[01]|[12][0-9]|0?[1-9])(?:st|nd|rd|th)?',
        # YYYY/MM/DD
        r'(\d{4})/(\d{1,2})/(\d{1,2})'
    ]

    for i, pattern in enumerate(patterns):
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            groups = match.groups()
            try:
                if i == 0: # DD.MM.YYYY
                    day, month, year = int(groups[0]), int(groups[1]), int(groups[2])
                elif i == 1: # DD/MM/YY or DD-MM-YY
                    day, month, year = int(groups[0]), int(groups[1]), int("20" + groups[2])
                elif i == 2:  # DD/MM/YYYY or DD-MM-YYYY
                    day, month, year = int(groups[0]), int(groups[1]), int(groups[2])
                elif i == 3:  # MM/DD/YYYY or MM-DD-YYYY
                    month, day, year = int(groups[0]), int(groups[1]), int(groups[2])
                elif i == 4:  # YYYY-MM-DD
                    year, month, day = int(groups[0]), int(groups[1]), int(groups[2])
                elif i == 5:  # DD Month YYYY
                    day, month_str, year = int(groups[0]), groups[1], int(groups[2])
                    if len(str(year)) == 2:
                        year = int("20" + str(year))
                    month = month_map[month_str.lower().rstrip('.')]
                elif i == 6:  # Month DD, YYYY
                    month_str, day, year = groups[0], int(groups[1]), int(groups[2])
                    if len(str(year)) == 2:
                        year = int("20" + str(year))
                    month = month_map[month_str.lower().rstrip('.')]
                elif i == 7: # YYYY Month DD
                    year, month_str, day = int(groups[0]), groups[1], int(groups[2])
                    month = month_map[month_str.lower().rstrip('.')]
                elif i == 8: # YYYY/MM/DD
                    year, month, day = int(groups[0]), int(groups[1]), int(groups[2])

                # Validate date and format it
                date_obj = datetime(year, month, day)
                return date_obj.strftime('%d/%m/%Y')
            except (ValueError, KeyError):
                continue

    return 'N/A'

In [16]:
df['Parsed Output'] = df['Input'].apply(parse_date)

correct_predictions = (df['Parsed Output'] == df['Expected Output']).sum()
total_predictions = len(df)
accuracy = (correct_predictions / total_predictions) * 100

print(f"Accuracy: {accuracy:.2f}%")
display(df)

Accuracy: 93.00%


Unnamed: 0,Input,Expected Output,Parsed Output
0,"The event will take place on March 5, 2023.",05/03/2023,05/03/2023
1,Her birthday is on 07/08/1990.,07/08/1990,07/08/1990
2,The deadline is 2022-12-31.,31/12/2022,31/12/2022
3,We met on 1st of January 2000.,01/01/2000,01/01/2000
4,"The concert is scheduled for 15th September, 2...",15/09/2021,15/09/2021
...,...,...,...
95,"We celebrate Independence Day on 2023-07-04, a...",04/07/2023,04/07/2023
96,The final date for submission is 30th November...,30/11/2022,30/11/2022
97,"The annual conference is on 15th October 2023,...",15/10/2023,15/10/2023
98,"His birthdate, noted as 1990-05-20, is in the ...",20/05/1990,20/05/1990
