# Date Grammar Testing

Testing date parsing with optional years and smart year inference.
Supports formats like: `Jan 2`, `Jan 2 26`, `1/15`, `1/15/2026`

In [17]:
# Grammar definition
DATE_GRAMMAR = r"""
?start: date_constraint

date_constraint: date ("-" date)?

date: mdy_slash | mdy_text

// Format 1: MM/DD/YYYY (year required)
mdy_slash: MONTH_NUM "/" DAY_NUM "/" YEAR

// Format 2: "Jan 15 2024" (year required)
mdy_text: MONTH_TEXT DAY_NUM YEAR

// Terminals
MONTH_TEXT.2: /(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)/i
YEAR.1: /\d{4}|\d{2}/
MONTH_NUM.1: /1[0-2]|0?[1-9]/
DAY_NUM.1: /[12][0-9]|3[01]|0?[1-9]/

%import common.WS
%ignore WS
"""

In [18]:
from datetime import date
from lark import Lark, Transformer

MONTH_MAP = {
    "JAN": 1, "FEB": 2, "MAR": 3, "APR": 4,
    "MAY": 5, "JUN": 6, "JUL": 7, "AUG": 8,
    "SEP": 9, "OCT": 10, "NOV": 11, "DEC": 12,
}

class DateValidator(Transformer):
    """Transforms parsed dates into Python date objects."""
    
    def _resolve_date(self, month, day, year):
        """Create date object and validate it."""
        try:
            return date(year, month, day)
        except ValueError as e:
            raise ValueError(f"Invalid date: {e}")
    
    def YEAR(self, token):
        """Convert year token to int, handling 2-digit years."""
        year_val = int(token)
        if year_val < 100:
            return 2000 + year_val
        return year_val
    
    def MONTH_NUM(self, token):
        return int(token)
    
    def DAY_NUM(self, token):
        return int(token)
    
    def MONTH_TEXT(self, token):
        return MONTH_MAP[token.upper()]
    
    def mdy_slash(self, items):
        """Process MM/DD/YYYY format."""
        month, day, year = items
        return self._resolve_date(month, day, year)
    
    def mdy_text(self, items):
        """Process 'Jan 15 2026' format."""
        month, day, year = items
        return self._resolve_date(month, day, year)
    
    def date(self, items):
        return items[0]

    def date_constraint(self, items):
        """Process single date or date range."""
        if len(items) == 1:
            # Single date
            return items[0]
        else:
            # Date range: return tuple (start_date, end_date)
            start_date, end_date = items
            if end_date < start_date:
                raise ValueError(f"Invalid range: end date {end_date} is before start date {start_date}")
            return (start_date, end_date)

In [19]:
# Create parser with transformer
parser = Lark(DATE_GRAMMAR, parser="lalr", transformer=DateValidator())
print("Parser created successfully!")

Parser created successfully!


In [20]:
import lark
def validate_date_constraint(text):
    """
    Parse a date constraint and return helpful error info.
    
    Returns:
        date object if valid
        
    Raises:
        ValueError with detailed context if invalid
    """
    try:
        return parser.parse(text)
    except lark.UnexpectedInput as e:
        # Extract context
        line = e.get_context(text)
        token = e.token if hasattr(e, 'token') else None
        
        # Build helpful message
        if token:
            msg = f"Invalid date format at '{token}'\n"
            msg += f"Position: {e.pos_in_stream}\n"
            msg += f"Context:\n{line}\n"
            
            # Try to give specific advice
            if e.expected and 'YEAR' in str(e.expected):
                msg += "Missing year. Expected format: 'Jan 2 26' or '1/2/26'"
        else:
            msg = f"Invalid date format: {text}\n"
            msg += f"Expected format: 'Jan 2 26' or '1/2/26'"
        
        raise ValueError(msg)
    except Exception as e:
        raise ValueError(f"Error parsing '{text}': {e}")

In [21]:
# Test it
test_cases = [
    "Jan 2",      # Missing year
    "13/15/26",   # Invalid month
    "Feb 29 23",  # Invalid day (not leap year)
]

for text in test_cases:
    print(f"\nTesting: '{text}'")
    print("-" * 40)
    try:
        result = validate_date_constraint(text)
        print(f"Success: {result}")
    except ValueError as e:
        print(f"Error caught:\n{e}")


Testing: 'Jan 2'
----------------------------------------
Error caught:
Invalid date format: Jan 2
Expected format: 'Jan 2 26' or '1/2/26'

Testing: '13/15/26'
----------------------------------------
Error caught:
Invalid date format at '3'
Position: 1
Context:
13/15/26
 ^



Testing: 'Feb 29 23'
----------------------------------------
Error caught:
Error parsing 'Feb 29 23': Invalid date: day is out of range for month


In [22]:
# Simple test
result = parser.parse("Jan 2")
print(f"Today: {date.today()}")
print(f"Parsed 'Jan 2' -> {result}")
print(f"Type: {type(result)}")

UnexpectedToken: Unexpected token Token('$END', '') at line 1, column 5.
Expected one of: 
	* YEAR


In [23]:
# Test parsing errors with helpful messages
from lark.exceptions import UnexpectedInput, UnexpectedToken

test_input = "Jan 2"  # Missing year

try:
    result = parser.parse(test_input)
    print(f"Unexpectedly succeeded: {result}")
except UnexpectedInput as e:
    print(f"Input: '{test_input}'")
    print(f"Error at position {e.pos_in_stream}: {e.token}")
    print(f"Expected: {e.expected}")
    print(f"\nFull error:\n{e}")

Input: 'Jan 2'
Error at position 4: 
Expected: {'YEAR'}

Full error:
Unexpected token Token('$END', '') at line 1, column 5.
Expected one of: 
	* YEAR



In [24]:
# Comprehensive tests with mocked date
MOCK_TODAY = date(2024, 4, 15)

test_cases = {
    # Year provided
    "10/25/2025": date(2025, 10, 25),
    "FEB 29 2024": date(2024, 2, 29),
    "Jan 1 26": date(2026, 1, 1),
    # Year omitted, future dates this year
    "12/25": date(2024, 12, 25),
    "MAY 1": date(2024, 5, 1),
    "5/1": date(2024, 5, 1),
    # Year omitted, past dates (should roll to next year)
    "1/15": date(2025, 1, 15),
    "MAR 10": date(2025, 3, 10),
    "Feb 1": date(2025, 2, 1),
    # Edge: today's date
    "4/15": date(2024, 4, 15),
    "APR 15": date(2024, 4, 15),
}

print(f"--- Testing with MOCK_TODAY = {MOCK_TODAY} ---\n")

# Temporarily replace date.today()
original_today = date.today
date.today = lambda: MOCK_TODAY

try:
    passed = 0
    failed = 0
    
    for text, expected in test_cases.items():
        try:
            result = parser.parse(text)
            if result == expected:
                status = "✅"
                passed += 1
            else:
                status = "❌"
                failed += 1
            print(f'{status} "{text:15}" -> {result} (expected {expected})')
        except Exception as e:
            print(f'❌ "{text:15}" -> ERROR: {e}')
            failed += 1
    
    print(f"\n--- Results: {passed} passed, {failed} failed ---")
    
finally:
    # Restore original date.today()
    date.today = original_today

--- Testing with MOCK_TODAY = 2024-04-15 ---



TypeError: cannot set 'today' attribute of immutable type 'datetime.date'

In [25]:
# Interactive testing cell - run with different inputs
test_input = "Dec 25"  # Change this to test different dates

try:
    result = parser.parse(test_input)
    print(f"Input: {test_input}")
    print(f"Today: {date.today()}")
    print(f"Result: {result}")
    print(f"Type: {type(result)}")
except Exception as e:
    print(f"Error parsing '{test_input}': {e}")

Error parsing 'Dec 25': Unexpected token Token('$END', '') at line 1, column 5.
Expected one of: 
	* YEAR



In [26]:
# Test edge cases and errors
print("--- Testing Invalid Dates ---\n")

invalid_dates = [
    "FEB 29 2023",  # Not a leap year
    "13/15",         # Invalid month
    "11/31",         # November only has 30 days
    "APR 31",        # April only has 30 days
]

for text in invalid_dates:
    try:
        result = parser.parse(text)
        print(f'❌ "{text}" unexpectedly succeeded: {result}')
    except Exception as e:
        print(f'✅ "{text}" correctly rejected: {type(e).__name__}')

--- Testing Invalid Dates ---

✅ "FEB 29 2023" correctly rejected: ValueError
✅ "13/15" correctly rejected: UnexpectedToken
✅ "11/31" correctly rejected: UnexpectedToken
✅ "APR 31" correctly rejected: UnexpectedToken


In [27]:
parser = Lark(DATE_GRAMMAR, parser="lalr", transformer=DateValidator())

# Test single dates
print("Single dates:")
print(parser.parse("Jan 2 26"))           # -> date(2026, 1, 2)
print(parser.parse("1/15/25"))            # -> date(2025, 1, 15)

# Test date ranges
print("\nDate ranges:")
print(parser.parse("Jan 1 26 - Jan 5 26"))       # -> (date(2026, 1, 1), date(2026, 1, 5))
print(parser.parse("12/20/25 - 12/25/25"))       # -> (date(2025, 12, 20), date(2025, 12, 25))
print(parser.parse("Feb 1 26-Feb 10 26"))        # -> (date(2026, 2, 1), date(2026, 2, 10))

# Test invalid range
try:
    print(parser.parse("Jan 10 26 - Jan 5 26"))  # end before start
except ValueError as e:
    print(f"Error: {e}")

Single dates:
2026-01-02
2025-01-15

Date ranges:
(datetime.date(2026, 1, 1), datetime.date(2026, 1, 5))
(datetime.date(2025, 12, 20), datetime.date(2025, 12, 25))
(datetime.date(2026, 2, 1), datetime.date(2026, 2, 10))
Error: Invalid range: end date 2026-01-05 is before start date 2026-01-10
