From 7c41b8c4470770d2a4c224229192d96a4a26f7df Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 8 May 2024 08:28:34 -0400 Subject: [PATCH 1/8] Convert tests to pytest format - Use parameterization in pytest to simplify many test cases by wrapping the input and expected values in tuples - No need for unittest or class wrappers - There is a legitimately failing test in parser/tests.py - code change needed in parser_classes to handle this I think - natlang/tests.py will need to be updated to match the new spec, as will the whole text_to_edtf() function --- edtf/natlang/tests.py | 115 ++++++------ edtf/parser/tests.py | 396 ++++++++++++++++++------------------------ edtf/tests.py | 208 +++++++++------------- 3 files changed, 297 insertions(+), 422 deletions(-) diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index 645a373..eaa9af6 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -1,26 +1,30 @@ -import unittest +import pytest from edtf.natlang.en import text_to_edtf +# TODO update the tests and code to test and output the new spec + # where examples are tuples, the second item is the normalised output -EXAMPLES = ( - ('active late 17th-19th centuries', '16xx/18xx'), # ignoring 'late' for now - ('active 17-19th Centuries', '16xx/18xx'), # ignoring 'late' for now +@pytest.mark.parametrize("input_text,expected_output", [ + # Ignoring 'late' for simplicity in these examples + ('active late 17th-19th centuries', '16xx/18xx'), + ('active 17-19th Centuries', '16xx/18xx'), # Unrecognised values ('', None), ('this isn\'t a date', None), - # Explicity rejected values that would otherwise be badly converted + # Explicitly rejected values that would otherwise be badly converted ('23rd Dynasty', None), - ('90', '1990'), # implied century + # Implied century and specific years + ('90', '1990'), # Implied century ('1860', '1860'), ('the year 1800', '1800'), ('the year 1897', '1897'), ('January 2008', '2008-01'), ('January 12, 1940', '1940-01-12'), - # uncertain/approximate + # Uncertain or approximate dates ('1860?', '1860?'), ('1862 (uncertain)', '1862?'), ('maybe 1862', '1862?'), @@ -31,11 +35,11 @@ ('~ Feb 1812', '1812-02~'), ('circa Feb 1812', '1812-02~'), ('Feb 1812 approx', '1812-02~'), - ('c1860', '1860~'), # different abbreviations - ('c.1860', '1860~'), # with or without . + ('c1860', '1860~'), # Different abbreviations + ('c.1860', '1860~'), # With or without . ('ca1860', '1860~'), ('ca.1860', '1860~'), - ('c 1860', '1860~'), # with or without space + ('c 1860', '1860~'), # With or without space ('c. 1860', '1860~'), ('ca. 1860', '1860~'), ('approx 1860', '1860~'), @@ -44,15 +48,14 @@ ('approximately 1860', '1860~'), ('about 1860', '1860~'), ('about Spring 1849', '1849-21~'), - ('notcirca 1860', '1860'), # avoid words containing circa - ('attica 1802', '1802'), - # avoid false positive circa at the end of preceding word - ('attic. 1802', '1802'), # avoid false positive circa + ('notcirca 1860', '1860'), # Avoid words containing 'circa' + ('attica 1802', '1802'), # Avoid false positive 'circa' at the end of preceding word + ('attic. 1802', '1802'), # Avoid false positive 'circa' - # masked precision - ('1860s', '186x'), # 186x has decade precision, 186u has year precision. + # Masked precision + ('1860s', '186x'), # 186x has decade precision, 186u has year precision. - # masked precision + uncertainty + # Masked precision + uncertainty ('ca. 1860s', '186x~'), ('c. 1860s', '186x~'), ('Circa 1840s', '184x~'), @@ -60,26 +63,26 @@ ('ca. 1860s?', '186x?~'), ('uncertain: approx 1862', '1862?~'), - # masked precision with first decade (ambiguous) - ('1800s', '18xx'), # without additional uncertainty, use the century - ('2000s', '20xx'), # without additional uncertainty, use the century - ('c1900s', '190x~'), # if there's additional uncertainty, use the decade - ('c1800s?', '180x?~'), # if there's additional uncertainty, use the decade + # Ambiguous masked precision for centuries and decades + ('1800s', '18xx'), # Without additional uncertainty, use the century + ('2000s', '20xx'), # Without additional uncertainty, use the century + ('c1900s', '190x~'), # If there's additional uncertainty, use the decade + ('c1800s?', '180x?~'), # If there's additional uncertainty, use the decade - # unspecified + # Unspecified dates ('January 12', 'uuuu-01-12'), ('January', 'uuuu-01'), ('10/7/2008', '2008-10-07'), ('7/2008', '2008-07'), - # seasons + # Seasons mapped to specific codes ('Spring 1872', '1872-21'), ('Summer 1872', '1872-22'), ('Autumn 1872', '1872-23'), ('Fall 1872', '1872-23'), ('Winter 1872', '1872-24'), - # before/after + # Dates relative to known events (before/after) ('earlier than 1928', 'unknown/1928'), ('before 1928', 'unknown/1928'), ('after 1928', '1928/unknown'), @@ -87,32 +90,30 @@ ('before January 1928', 'unknown/1928-01'), ('before 18 January 1928', 'unknown/1928-01-18'), - # before/after approx + # Approximations combined with before/after ('before approx January 18 1928', 'unknown/1928-01-18~'), ('before approx January 1928', 'unknown/1928-01~'), ('after approx January 1928', '1928-01~/unknown'), ('after approx Summer 1928', '1928-22~/unknown'), - # before/after and uncertain/unspecificed + # Before and after with uncertain / unspecified components ('after about the 1920s', '192x~/unknown'), ('before about the 1900s', 'unknown/190x~'), ('before the 1900s', 'unknown/19xx'), - # unspecified + # Specifying unspecified components within a date # ('decade in 1800s', '18ux'), #too esoteric # ('decade somewhere during the 1800s', '18ux'), #lengthier. Keywords are 'in' or 'during' - ('year in the 1860s', '186u'), - # 186x has decade precision, 186u has year precision. - ('year in the 1800s', '18xu'), + ('year in the 1860s', '186u'), # 186x has decade precision + ('year in the 1800s', '18xu'), # 186u has year precision ('year in about the 1800s', '180u~'), ('month in 1872', '1872-uu'), ('day in Spring 1849', '1849-21-uu'), ('day in January 1872', '1872-01-uu'), ('day in 1872', '1872-uu-uu'), ('birthday in 1872', '1872'), - # avoid false positive at end of preceding word - # centuries + # Handling centuries with approximation and uncertainty ('1st century', '00xx'), ('10c', '09xx'), ('19th century', '18xx'), @@ -126,7 +127,7 @@ ('19c?', '18xx?'), ('c.19c?', '18xx?~'), - # BC/AD + # BC/AD dating ('1 AD', '0001'), ('17 CE', '0017'), ('127 CE', '0127'), @@ -136,18 +137,17 @@ ('c127 CE', '0127~'), ('c1270 CE', '1270~'), ('c64 BCE', '-0064~'), - ('2nd century bc', '-01xx'), # -200 to -101 + ('2nd century bc', '-01xx'), # -200 to -101 ('2nd century bce', '-01xx'), ('2nd century ad', '01xx'), ('2nd century ce', '01xx'), - # c-c-c-combo - # just showing off now... + # Combining uncertainties and approximations in creative ways ('a day in about Spring 1849?', '1849-21-uu?~'), - # simple ranges. Not all of these results are correct EDTF, but - # this is as good as the EDTF implementation and simple natural - # language parser we have. + # Simple date ranges, showcasing both the limitations and capabilities of the parser + # Not all of these results are correct EDTF, but this is as good as the EDTF implementation + # and simple natural language parser we have. ('1851-1852', '1851/1852'), ('1851-1852; printed 1853-1854', '1851/1852'), ('1851-52', '1851/1852'), @@ -156,7 +156,6 @@ ('1857-mid 1860s', '1857/186x'), ('1858/1860', '[1858, 1860]'), ('1860s-1870s', '186x/187x'), - ('1861, printed 1869', '1861'), ('1910-30', '1910/1930'), ('active 1910-30', '1910/1930'), ('1861-67', '1861/1867'), @@ -174,16 +173,13 @@ ('1900; 1973', '1900'), ('1900; printed 1912', '1900'), ('1915 late - autumn 1916', '1915/1916-23'), - - ('1915, from Camerawork, October 1916', '1915'), # should be {1915, 1916-10} + ('1915, from Camerawork, October 1916', '1915'), # should be {1915, 1916-10} ('1920s -early 1930s', '192x/193x'), ('1930s, printed early 1960s', '193x'), # should be something like {193x, 196x}, - # though those forms aren't explicitly supported in the spec. ('1932, printed 1976 by Gunther Sander', '1932'), # should be {1932, 1976} - ('1938, printed 1940s-1950s', '1938'), # should be something like {1938, 194x-195x} - - + ('1938, printed 1940s-1950s', '1938') # should be something like {1938, 194x-195x} + # Uncertain and approximate on different parts of the date # for these to work we need to recast is_uncertain and is_approximate # such that they work on different parts. Probably worth rolling our own # dateparser at this point. @@ -194,22 +190,13 @@ # ('a day in about Spring in about 1849', '1849~-21~-uu'), # ('maybe January in some year in about the 1830s', '183u~-01?'), # ('about July? in about 1849', '1849~-07?~'), -) - - -class TestLevel0(unittest.TestCase): - def test_natlang(self): - """ - For each of the examples, establish that: - - the unicode of the parsed object is acceptably equal to the EDTF string - - the parsed object is a subclass of EDTFObject - :return: - """ - for i, o in EXAMPLES: - e = text_to_edtf(i) - print("%s => %s" % (i, e)) - self.assertEqual(e, o) +]) +def test_natlang(input_text, expected_output): + """ + Test natural language conversion to EDTF format: + Verify that the conversion from text to EDTF format matches the expected output. + """ + result = text_to_edtf(input_text) + assert result == expected_output, f"Failed for input: {input_text}" -if __name__ == '__main__': - unittest.main() diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 4043988..877fd0b 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -1,19 +1,16 @@ -import unittest -import sys +import pytest from datetime import date from time import struct_time from edtf.parser.grammar import parse_edtf as parse -from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, \ - TIME_EMPTY_EXTRAS +from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS from edtf.parser.edtf_exceptions import EDTFParseException -# Example object types and attributes. -# the first item in each tuple is the input EDTF string, and expected parse result. -# where the first value is a tuple, the second item is the normalised parse result. +# Example object types and attributes represented as tuples. +# The first item in each tuple is the input EDTF string, and expected parse result. +# where the first value is a tuple, the second item is a tuple of the normalised parse result. # -# The rest of the values in each tuple indicate the iso versions of the derived -# Python ``date``s. +# The values in the second tuple indicate the iso versions of the derived Python `date`s. # - If there's one other value, all the derived dates should be the same. # - If there're two other values, then all the lower values should be the same # and all the upper values should be the same. @@ -26,176 +23,171 @@ EXAMPLES = ( # ******************************* LEVEL 0 ********************************* # year, month, day - ('2001-02-03', '2001-02-03'), + ('2001-02-03', ('2001-02-03',)), # year, month - ('2008-12', '2008-12-01', '2008-12-31'), + ('2008-12', ('2008-12-01', '2008-12-31')), # year - ('2008', '2008-01-01', '2008-12-31'), + ('2008', ('2008-01-01', '2008-12-31')), # a negative year - ('-0999', '-0999-01-01', '-0999-12-31'), + ('-0999', ('-0999-01-01', '-0999-12-31')), # year zero - ('0000', '0000-01-01', '0000-12-31'), + ('0000', ('0000-01-01', '0000-12-31')), # DateTimes - ('2001-02-03T09:30:01', '2001-02-03'), - ('2004-01-01T10:10:10Z', '2004-01-01'), - ('2004-01-01T10:10:10+05:00', '2004-01-01'), - ('1985-04-12T23:20:30', '1985-04-12'), + ('2001-02-03T09:30:01', ('2001-02-03',)), + ('2004-01-01T10:10:10Z', ('2004-01-01',)), + ('2004-01-01T10:10:10+05:00', ('2004-01-01',)), + ('1985-04-12T23:20:30', ('1985-04-12',)), + # Intervals # An interval beginning sometime in 1964 and ending sometime in 2008. Year precision. - ('1964/2008', '1964-01-01', '2008-12-31'), + ('1964/2008', ('1964-01-01', '2008-12-31')), # An interval beginning sometime in June 2004 and ending sometime in August of 2006. Month precision. - ('2004-06/2006-08', '2004-06-01', '2006-08-31'), + ('2004-06/2006-08', ('2004-06-01', '2006-08-31')), # An interval beginning sometime on February 1, 2004 and ending sometime on February 8, 2005. Day precision. - ('2004-02-01/2005-02-08', '2004-02-01', '2005-02-08'), - # An interval beginning sometime on February 1, 2004 and ending sometime in February 2005. The precision of the interval is not defined; the start endpoint has day precision and the end endpoint has month precision. - ('2004-02-01/2005-02', '2004-02-01', '2005-02-28'), - # An interval beginning sometime on February 1, 2004 and ending sometime in 2005. The start endpoint has day precision and the end endpoint has year precision. - ('2004-02-01/2005', '2004-02-01', '2005-12-31'), + ('2004-02-01/2005-02-08', ('2004-02-01', '2005-02-08')), + # An interval beginning sometime on February 1, 2004 and ending sometime in February 2005. + # The precision of the interval is not defined; the start endpoint has day precision and the end endpoint has month precision. + ('2004-02-01/2005-02', ('2004-02-01', '2005-02-28')), + # An interval beginning sometime on February 1, 2004 and ending sometime in 2005. + # The start endpoint has day precision and the end endpoint has year precision. + ('2004-02-01/2005', ('2004-02-01', '2005-12-31')), # An interval beginning sometime in 2005 and ending sometime in February 2006. - ('2005/2006-02', '2005-01-01', '2006-02-28'), + ('2005/2006-02', ('2005-01-01', '2006-02-28')), # An interval beginning sometime in -2005 and ending sometime in February -2004. - ('-2005/-1999-02', '-2005-01-01', '-1999-02-28'), + ('-2005/-1999-02', ('-2005-01-01', '-1999-02-28')), # ******************************* LEVEL 1 ********************************* - # Uncertain/Approximate + # Uncertain/Approximate # uncertain: possibly the year 1984, but not definitely - ('1984?', '1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31'), - ('2004-06-11?', '2004-06-11', '2004-06-11', '2004-06-10', '2004-06-12'), - ('2004-06?', '2004-06-01', '2004-06-30', '2004-05-01', '2004-07-30'), + ('1984?', ('1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31')), + ('2004-06-11?', ('2004-06-11', '2004-06-11', '2004-06-10', '2004-06-12')), + ('2004-06?', ('2004-06-01', '2004-06-30', '2004-05-01', '2004-07-30')), # "approximately" the year 1984 - ('1984~', '1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31'), + ('1984~', ('1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31')), # the year is approximately 1984 and even that is uncertain - ('1984%', '1984-01-01', '1984-12-31', '1982-01-01', '1986-12-31'), + ('1984%', ('1984-01-01', '1984-12-31', '1982-01-01', '1986-12-31')), # Unspecified # some unspecified year in the 1990s. - ('199X', '1990-01-01', '1999-12-31'), + ('199X', ('1990-01-01', '1999-12-31')), # some unspecified year in the 1900s. - ('19XX', '1900-01-01', '1999-12-31'), + ('19XX', ('1900-01-01', '1999-12-31')), # some month in 1999 - ('1999-XX', '1999-01-01', '1999-12-31'), + ('1999-XX', ('1999-01-01', '1999-12-31')), # some day in January 1999 - ('1999-01-XX', '1999-01-01', '1999-01-31'), + ('1999-01-XX', ('1999-01-01', '1999-01-31')), # some day in 1999 - ('1999-XX-XX', '1999-01-01', '1999-12-31'), + ('1999-XX-XX', ('1999-01-01', '1999-12-31')), # Uncertain/Approximate lower boundary dates (BCE) - ('-0275~', '-0275-01-01', '-0275-12-31', '-0276-01-01', '-0274-12-31'), - ('-0001~', '-0001-01-01', '-0001-12-31', '-0002-01-01', '0000-12-31'), - ('0000~', '0000-01-01', '0000-12-31', '-0001-01-01', '0001-12-31'), + ('-0275~', ('-0275-01-01', '-0275-12-31', '-0276-01-01', '-0274-12-31')), + ('-0001~', ('-0001-01-01', '-0001-12-31', '-0002-01-01', '0000-12-31')), + ('0000~', ('0000-01-01', '0000-12-31', '-0001-01-01', '0001-12-31')), # L1 Extended Interval # beginning unknown, end 2006 - ('/2006', '1996-12-31', '2006-12-31'), + ('/2006', ('1996-12-31', '2006-12-31')), # beginning June 1, 2004, end unknown - ('2004-06-01/', '2004-06-01', '2014-06-01'), + ('2004-06-01/', ('2004-06-01', '2014-06-01')), # beginning open, end 2006 - ('../2006', '-20000000-01-01', '2006-12-31'), - # beginning January 1 2004 with no end date - ('2004-01-01/..', '2004-01-01', '20000000-12-31'), + ('../2006', ('-inf', '2006-12-31')), + # beginning January 1, 2004 with no end date + ('2004-01-01/..', ('2004-01-01', 'inf')), # interval beginning approximately 1984 and ending June 2004 - ('1984~/2004-06', '1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30'), + ('1984~/2004-06', ('1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30')), # interval beginning 1984 and ending approximately June 2004 - ('1984/2004-06~', '1984-01-01', '2004-06-30', '1984-01-01', '2004-07-30'), - ('1984?/2004%', '1984-01-01', '2004-12-31', '1983-01-01', '2006-12-31'), - ('1984~/2004~', '1984-01-01', '2004-12-31', '1983-01-01', '2005-12-31'), + ('1984/2004-06~', ('1984-01-01', '2004-06-30', '1984-01-01', '2004-07-30')), + ('1984?/2004%', ('1984-01-01', '2004-12-31', '1983-01-01', '2006-12-31')), + ('1984~/2004~', ('1984-01-01', '2004-12-31', '1983-01-01', '2005-12-31')), # interval whose beginning is uncertain but thought to be 1984, and whose end is uncertain and approximate but thought to be 2004 - ('1984-06?/2004-08?', '1984-06-01', '2004-08-31', '1984-05-01', '2004-09-30'), - ('1984-06-02?/2004-08-08~', '1984-06-02', '2004-08-08', '1984-06-01', '2004-08-09'), - ('1984-06-02?/', '1984-06-02', '1994-06-02', '1984-06-01', '1994-06-02'), + ('1984-06?/2004-08?', ('1984-06-01', '2004-08-31', '1984-05-01', '2004-09-30')), + ('1984-06-02?/2004-08-08~', ('1984-06-02', '2004-08-08', '1984-06-01', '2004-08-09')), + ('1984-06-02?/', ('1984-06-02', '1994-06-02', '1984-06-01', '1994-06-02')), # Year exceeding 4 digits - # the year 170000002 - ('Y170000002', '170000002-01-01', '170000002-12-31'), - # the year -170000002 - ('Y-170000002', '-170000002-01-01', '-170000002-12-31'), + ('Y170000002', ('170000002-01-01', '170000002-12-31')), + ('Y-170000002', ('-170000002-01-01', '-170000002-12-31')), # Seasons - # Spring, 2001 - ('2001-21', '2001-03-01', '2001-05-31'), - # Summer, 2003 - ('2003-22', '2003-06-01', '2003-08-31'), - # Autumn, 2000 - ('2000-23', '2000-09-01', '2000-11-30'), - # Winter, 2010 - ('2010-24', '2010-12-01', '2010-12-31'), + ('2001-21', ('2001-03-01', '2001-05-31')), + ('2003-22', ('2003-06-01', '2003-08-31')), + ('2000-23', ('2000-09-01', '2000-11-30')), + ('2010-24', ('2010-12-01', '2010-12-31')), # ******************************* LEVEL 2 ********************************* - - # Partial Uncertain/ Approximate + # Partial Uncertain/Approximate # uncertain year; month, day known - ('2004?-06-11', '2004-06-11', '2003-06-11', '2005-06-11'), + ('2004?-06-11', ('2004-06-11', '2003-06-11', '2005-06-11')), # year and month are approximate; day known - ('2004-06~-11', '2004-06-11', '2003-05-11', '2005-07-11'), + ('2004-06~-11', ('2004-06-11', '2003-05-11', '2005-07-11')), # uncertain month, year and day known - ('2004-?06-11', '2004-06-11', '2004-05-11', '2004-07-11'), + ('2004-?06-11', ('2004-06-11', '2004-05-11', '2004-07-11')), # day is approximate; year, month known - ('2004-06-~11', '2004-06-11', '2004-06-10', '2004-06-12'), + ('2004-06-~11', ('2004-06-11', '2004-06-10', '2004-06-12')), # Year known, month within year is approximate and uncertain - NEW SPEC - ('2004-%06', '2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30'), + ('2004-%06', ('2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30')), # Year known, month and day uncertain - NEW SPEC - ('2004-?06-?11', '2004-06-11', '2004-05-10', '2004-07-12'), + ('2004-?06-?11', ('2004-06-11', '2004-05-10', '2004-07-12')), # Year uncertain, month known, day approximate - NEW SPEC - ('2004?-06-~11', '2004-06-11', '2003-06-10', '2005-06-12'), + ('2004?-06-~11', ('2004-06-11', '2003-06-10', '2005-06-12')), # Year uncertain and month is both uncertain and approximate - NEW SPEC - ('?2004-%06', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), + ('?2004-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), # This has the same meaning as the previous example.- NEW SPEC - ('2004?-%06', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), + ('2004?-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), # Year uncertain, month and day approximate. - NEW SPEC - ('2004?-~06-~04','2004-06-04', '2003-05-03', '2005-07-05'), - # what about that? - #('2004?-06-04~','2004-06-04', '2003-05-03', '2005-07-05'), + ('2004?-~06-~04', ('2004-06-04', '2003-05-03', '2005-07-05')), # Year known, month and day approximate. - NEW SPEC - ('2011-~06-~04', '2011-06-04', '2011-05-03', '2011-07-05'), - # Approximate season (around Autumn 2011) - #('2011-23~', '2011-09-01', '2011-11-30', '2011-06-09', '2012-02-22'), - # Years wrapping - #('2011-24~', '2011-12-01', '2011-12-31', '2011-09-08', '2012-03-24'), + ('2011-~06-~04', ('2011-06-04', '2011-05-03', '2011-07-05')), # Partial unspecified # December 25 sometime during the 1560s - ('156X-12-25', '1560-12-25', '1569-12-25'), + ('156X-12-25', ('1560-12-25', '1569-12-25')), # December 25 sometime during the 1500s - ('15XX-12-25', '1500-12-25', '1599-12-25'), + ('15XX-12-25', ('1500-12-25', '1599-12-25')), # Year and day of month specified, month unspecified - ('1560-XX-25', '1560-01-25', '1560-12-25'), - ('15XX-12-XX', '1500-12-01', '1599-12-31'), + ('1560-XX-25', ('1560-01-25', '1560-12-25')), + ('15XX-12-XX', ('1500-12-01', '1599-12-31')), # Day specified, year and month unspecified - ('XXXX-XX-23', '0000-01-23', '9999-12-23'), + ('XXXX-XX-23', ('0000-01-23', '9999-12-23')), + # One of a Set # One of the years 1667, 1668, 1670, 1671, 1672 - (('[1667,1668, 1670..1672]', '[1667, 1668, 1670..1672]'), '1667-01-01', '1672-12-31'), + ('[1667, 1668, 1670..1672]', ('1667-01-01', '1672-12-31')), # December 3, 1760 or some earlier date - ('[..1760-12-03]', '-20000000-01-01', '1760-12-03'), + ('[..1760-12-03]', ('-inf', '1760-12-03')), # December 1760 or some later month - ('[1760-12..]', '1760-12-01', '20000000-12-31'), + ('[1760-12..]', ('1760-12-01', 'inf')), # January or February of 1760 or December 1760 or some later month - ('[1760-01, 1760-02, 1760-12..]', '1760-01-01', '20000000-12-31'), + # This test is failing due to a code issue: + # TypeError: '>' not supported between instances of 'float' and 'time.struct_time' + ('[1760-01, 1760-02, 1760-12..]', ('1760-01-01', 'inf')), #TODO fix in parser_classes # Either the year 1667 or the month December of 1760. - ('[1667, 1760-12]', '1667-01-01', '1760-12-31'), + ('[1667, 1760-12]', ('1667-01-01', '1760-12-31')), # Multiple Dates # All of the years 1667, 1668, 1670, 1671, 1672 - (('{1667,1668, 1670..1672}', '{1667, 1668, 1670..1672}'), '1667-01-01', '1672-12-31'), + ('{1667,1668, 1670..1672}', ('1667-01-01', '1672-12-31')), # The year 1960 and the month December of 1961. - ('{1960, 1961-12}', '1960-01-01', '1961-12-31'), + ('{1960, 1961-12}', ('1960-01-01', '1961-12-31')), + # Masked Precision --> eliminated # A date during the 1960s #('196x', '1960-01-01', '1969-12-31'), # A date during the 1900s #('19xx', '1900-01-01', '1999-12-31'), - # L2 Extended Interval - ('2004-06-~01/2004-06-~20', '2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21'), + # L2 Extended Interval + # Interval with fuzzy day endpoints in June 2004 + ('2004-06-~01/2004-06-~20', ('2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21')), # The interval began on an unspecified day in June 2004. - ('2004-06-XX/2004-07-03', '2004-06-01', '2004-07-03'), + ('2004-06-XX/2004-07-03', ('2004-06-01', '2004-07-03')), # Year Requiring More than Four Digits - Exponential Form # the year 170000000 - ('Y17E7', '170000000-01-01', '170000000-12-31'), + ('Y17E7', ('170000000-01-01', '170000000-12-31')), # the year -170000000 - ('Y-17E7', '-170000000-01-01', '-170000000-12-31'), + ('Y-17E7', ('-170000000-01-01', '-170000000-12-31')), # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) # TODO Not yet implemented, see https://github.com/ixc/python-edtf/issues/12 - # ('Y17101E4S3', '171010000-01-01', '171999999-12-31'), + # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), # L2 Seasons - # Spring southern, 2001 - ('2001-29', '2001-09-01', '2001-11-30'), + # Spring southern hemisphere, 2001 + ('2001-29', ('2001-09-01', '2001-11-30')), # second quarter of 2001 - ('2001-34', '2001-04-01', '2001-06-30'), + ('2001-34', ('2001-04-01', '2001-06-30')), ) BAD_EXAMPLES = ( @@ -218,137 +210,83 @@ '2004-06-(01)~/2004-06-(20)~', # An interval in June 2004 beginning approximately the first and ending approximately the 20th - OLD SPEC ) +def iso_to_struct_time(iso_date): + """ Convert YYYY-mm-dd date strings or infinities to time structs or float infinities. """ + if iso_date == 'inf': + return float('inf') + elif iso_date == '-inf': + return float('-inf') -class TestParsing(unittest.TestCase): - def test_non_parsing(self): - for i in BAD_EXAMPLES: - self.assertRaises(EDTFParseException, parse, i) - - def testInterval(self): - #expression = ('1984~/2004-06', '1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30') - #expression = ('/2006', '1996-01-01', '2006-12-31') - #expression = ('../2006', '0001-01-01', '2006-12-31') - expression = ('../-2006', '-20000000-01-01', '-2006-12-31') - #expression = ('2006/', '2006-01-01', '9999-12-31') - i = expression[0] - expected_lower_strict = expression[1] - expected_upper_strict = expression[2] - - def iso_to_struct_time(iso_date): - """ Convert YYYY-mm-dd date strings to time structs """ - if iso_date[0] == '-': - is_negative = True - iso_date = iso_date[1:] - else: - is_negative = False - y, mo, d = [int(i) for i in iso_date.split('-')] - if is_negative: - y *= -1 - return struct_time( - [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - - # Convert string date representations into `struct_time`s - expected_lower_strict = iso_to_struct_time(expected_lower_strict) - expected_upper_strict = iso_to_struct_time(expected_upper_strict) - - f = parse(i) - print(str(f.lower_strict()) + '/' + str(f.upper_strict())) - self.assertEqual(f.lower_strict(), expected_lower_strict) - self.assertEqual(f.upper_strict(), expected_upper_strict) - - - def test_date_values(self): - """ - Test that everY EDTFObject can tell you its lower and upper - fuzzy and strict dates, and that they're what we think they should be. - """ - - for e in EXAMPLES: - i = e[0] - if isinstance(i, tuple): - i, o = i - else: - o = i - - sys.stdout.write("parsing '%s'" % i) - f = parse(i) - sys.stdout.write(" => %s()\n" % type(f).__name__) - self.assertIsInstance(f, EDTFObject) - self.assertEqual(str(f), o) + if iso_date[0] == '-': + is_negative = True + iso_date = iso_date[1:] + else: + is_negative = False + y, mo, d = [int(i) for i in iso_date.split('-')] + if is_negative: + y *= -1 + return struct_time([y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - if len(e) == 5: - expected_lower_strict = e[1] - expected_upper_strict = e[2] - expected_lower_fuzzy = e[3] - expected_upper_fuzzy = e[4] - elif len(e) == 4: - expected_lower_strict = e[1] - expected_upper_strict = e[1] - expected_lower_fuzzy = e[2] - expected_upper_fuzzy = e[3] - elif len(e) == 3: - expected_lower_strict = e[1] - expected_upper_strict = e[2] - expected_lower_fuzzy = e[1] - expected_upper_fuzzy = e[2] - elif len(e) == 2: - expected_lower_strict = e[1] - expected_upper_strict = e[1] - expected_lower_fuzzy = e[1] - expected_upper_fuzzy = e[1] - if len(e) == 1: - continue - def iso_to_struct_time(iso_date): - """ Convert YYYY-mm-dd date strings to time structs """ - if iso_date[0] == '-': - is_negative = True - iso_date = iso_date[1:] - else: - is_negative = False - y, mo, d = [int(i) for i in iso_date.split('-')] - if is_negative: - y *= -1 - return struct_time( - [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) +@pytest.mark.parametrize("test_input,expected_tuple", EXAMPLES) +def test_edtf_examples(test_input, expected_tuple): + """ Test parsing of EDTF strings with expected outputs. """ + result = parse(test_input) + assert isinstance(result, EDTFObject), "Result should be an instance of EDTFObject" - # Convert string date representations into `struct_time`s - expected_lower_strict = iso_to_struct_time(expected_lower_strict) - expected_upper_strict = iso_to_struct_time(expected_upper_strict) - expected_lower_fuzzy = iso_to_struct_time(expected_lower_fuzzy) - expected_upper_fuzzy = iso_to_struct_time(expected_upper_fuzzy) + # Extract only the date part if the result includes a time. + result_date = str(result) + if 'T' in result_date: + result_date = result_date.split('T')[0] - try: - self.assertEqual(f.lower_strict(), expected_lower_strict) - self.assertEqual(f.upper_strict(), expected_upper_strict) - self.assertEqual(f.lower_fuzzy(), expected_lower_fuzzy) - self.assertEqual(f.upper_fuzzy(), expected_upper_fuzzy) - except Exception as x: - # Write to stdout for manual debugging, I guess - sys.stdout.write(str(x)) - # Re-raise exception so unit tests work for non-manual usage - raise + # Unpack expected results based on their count + if len(expected_tuple) == 1: + assert result_date == expected_tuple[0], f"Expected {expected_tuple[0]}, got {result_date}" + elif len(expected_tuple) == 2: + lower_strict = iso_to_struct_time(expected_tuple[0]) + upper_strict = iso_to_struct_time(expected_tuple[1]) + assert result.lower_strict() == lower_strict, "Lower strict date does not match" + assert result.upper_strict() == upper_strict, "Upper strict date does not match" + elif len(expected_tuple) == 3: + strict_date = iso_to_struct_time(expected_tuple[0]) + lower_fuzzy = iso_to_struct_time(expected_tuple[1]) + upper_fuzzy = iso_to_struct_time(expected_tuple[2]) + assert result.lower_strict() == strict_date, "Lower strict date does not match" + assert result.upper_strict() == strict_date, "Upper strict date does not match" + assert result.lower_fuzzy() == lower_fuzzy, "Lower fuzzy date does not match" + assert result.upper_fuzzy() == upper_fuzzy, "Upper fuzzy date does not match" + elif len(expected_tuple) == 4: + lower_strict = iso_to_struct_time(expected_tuple[0]) + upper_strict = iso_to_struct_time(expected_tuple[1]) + lower_fuzzy = iso_to_struct_time(expected_tuple[2]) + upper_fuzzy = iso_to_struct_time(expected_tuple[3]) + assert result.lower_strict() == lower_strict, "Lower strict date does not match" + assert result.upper_strict() == upper_strict, "Upper strict date does not match" + assert result.lower_fuzzy() == lower_fuzzy, "Lower fuzzy date does not match" + assert result.upper_fuzzy() == upper_fuzzy, "Upper fuzzy date does not match" - def test_comparisons(self): - d1 = parse("1979-08~") - d2 = parse("1979-08~") - d3 = parse("1979-09-16") - d4 = parse("1979-08-16") - d5 = date(1979, 8, 16) - d6 = date(1970, 9, 16) - self.assertEqual(d1, d2) - self.assertNotEqual(d1, d3) - self.assertTrue(d1 >= d2) - self.assertTrue(d2 >= d1) - self.assertTrue(d3 > d1) - self.assertTrue(d1 < d4) +@pytest.mark.parametrize("bad_input", BAD_EXAMPLES) +def test_non_parsing(bad_input): + """ Test that non-parsing inputs correctly raise an exception. """ + with pytest.raises(EDTFParseException): + parse(bad_input) - # with python dates (EDTFFormat must be first operand) - self.assertEqual(d4, d5) - self.assertTrue(d1 < d5) - self.assertTrue(d1 > d6) +def test_comparisons(): + """ Test comparisons between parsed EDTF objects and standard dates. """ + d1 = parse("1979-08~") + d2 = parse("1979-08~") + d3 = parse("1979-09-16") + d4 = parse("1979-08-16") + d5 = date(1979, 8, 16) + d6 = date(1970, 9, 16) -if __name__ == '__main__': - unittest.main() + assert d1 == d2 + assert d1 != d3 + assert d1 >= d2 + assert d3 > d1 + assert d1 < d4 + assert d4 == d5 + assert d1 < d5 + assert d1 > d6 diff --git a/edtf/tests.py b/edtf/tests.py index 0e49e67..f5ef655 100644 --- a/edtf/tests.py +++ b/edtf/tests.py @@ -1,134 +1,84 @@ -import unittest - from time import struct_time from datetime import datetime, date from edtf import convert - -class TestConversions(unittest.TestCase): - - def test_dt_to_struct_time_for_datetime(self): - now = datetime.now() - st = convert.dt_to_struct_time(now) - # Check equal year, month, day, hours, minutes, seconds - self.assertEqual(st[:6], now.timetuple()[:6]) - # Confirm 'extra' fields are set to defaults - self.assertEqual(st[6:], (0, 0, -1)) - - def test_dt_to_struct_time_for_date(self): - today = date.today() - st = convert.dt_to_struct_time(today) - # Check equal year, month, day - self.assertEqual(st[:3], today.timetuple()[:3]) - # Confirm time fields are zeroed - self.assertEqual(st[3:6], (0, 0, 0)) - # Confirm 'extra' fields are set to defaults - self.assertEqual(st[6:], (0, 0, -1)) - - def test_struct_time_to_date(self): - st = struct_time( - [2018, 4, 19] + convert.TIME_EMPTY_TIME + convert.TIME_EMPTY_EXTRAS) - d = date(*st[:3]) - self.assertEqual(d, convert.struct_time_to_date(st)) - - def test_struct_time_to_datetime(self): - st = struct_time( - [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - dt = datetime(*st[:6]) - converted_dt = convert.struct_time_to_datetime(st) - self.assertEqual(dt, converted_dt) - # Note that 'extra' fields are auto-populated by `datetime` module - self.assertEqual(converted_dt.timetuple()[6:], (3, 109, -1)) - - def test_trim_struct_time(self): - now = datetime.now() - st = now.timetuple() - trimmed_st = convert.trim_struct_time(st) - # Confirm trimmed `struct_time` has expected date/time values - self.assertEqual( - trimmed_st[:6], - (now.year, now.month, now.day, now.hour, now.minute, now.second) - ) - # Confirm 'extra' fields are set to defaults - self.assertEqual(trimmed_st[6:], (0, 0, -1)) - # Confirm 'extra' fields in untrimmed `struct_time` has real values - self.assertNotEqual(st[6:], (0, 0, -1)) - - def test_struct_time_to_jd(self): - # Check conversion of AD date & time to Julian Date number - st_ad = struct_time( - [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - jd_ad = 2458227.9263194446 - self.assertEqual(jd_ad, convert.struct_time_to_jd(st_ad)) - # Check conversion of BC date & time to Julian Date number - st_bc = struct_time( - [-2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - jd_bc = 984091.9263194444 - self.assertEqual(jd_bc, convert.struct_time_to_jd(st_bc)) - - def test_jd_to_struct_time(self): - # Check conversion of Julian Date number to AD date & time - jd_ad = 2458227.9263194446 # As in `test_struct_time_to_jd` - st_ad = struct_time( - [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - self.assertEqual(st_ad, convert.jd_to_struct_time(jd_ad)) - # Check conversion of Julian Date number to BC date & time - # WARNING: Converted time is off by 1 second, 53 not 54 - jd_bc = 984091.9263194444 # As in `test_struct_time_to_jd` - st_bc = struct_time( - [-2018, 4, 19] + [10, 13, 54 - 1] + convert.TIME_EMPTY_EXTRAS) - self.assertEqual(st_bc, convert.jd_to_struct_time(jd_bc)) - - def test_jd_round_trip_for_extreme_future(self): - original_st = struct_time( - [999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) - jd = convert.struct_time_to_jd(original_st) - converted_st = convert.jd_to_struct_time(jd) - # Confirm that year, month, day, hour, minute are correct (not second) - self.assertEqual(original_st[:5], converted_st[:5]) - # WARNING: Seconds are off by 1, should be 3 but is 2 - self.assertEqual(3 - 1, converted_st[5]) - - def test_jd_round_trip_for_extreme_past(self): - original_st = struct_time( - [-999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) - converted_st = convert.jd_to_struct_time( - convert.struct_time_to_jd(original_st)) - # WARNING: We have lost a year of accuracy - self.assertEqual( - (-999999 + 1, # Year off by 1 - 8, 4, 21, 15, 3, 0, 0, -1), - tuple(converted_st)) - - def test_jd_round_trip_for_zero_year_aka_1_bc(self): - original_st = struct_time( - [0, 9, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) - converted_st = convert.jd_to_struct_time( - convert.struct_time_to_jd(original_st)) - self.assertEqual( - (0, 9, 5, 4, 58, 59, 0, 0, -1), - tuple(converted_st)) - - def test_jd_round_trip_for_2_bc(self): - original_st = struct_time( - [-1, 12, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) - converted_st = convert.jd_to_struct_time( - convert.struct_time_to_jd(original_st)) - self.assertEqual( - (-1, 12, 5, 4, 58, 59, 0, 0, -1), - tuple(converted_st)) - - def test_roll_negative_time_fields(self): - # Confirm time value is adjusted as expected - year = -100 - month = -17 # More than 1 year - day = -34 # More than 1 month - hour = -25 # More than 1 day - minute = -74 # More than 1 hour - second = -253 # More than 1 minute - self.assertEqual( - (-102, 5, 24, 21, 41, 47), - convert._roll_negative_time_fields( - year, month, day, hour, minute, second) - ) +def test_dt_to_struct_time_for_datetime(): + now = datetime.now() + st = convert.dt_to_struct_time(now) + assert st[:6] == now.timetuple()[:6] + assert st[6:] == (0, 0, -1) + +def test_dt_to_struct_time_for_date(): + today = date.today() + st = convert.dt_to_struct_time(today) + assert st[:3] == today.timetuple()[:3] + assert st[3:6] == (0, 0, 0) + assert st[6:] == (0, 0, -1) + +def test_struct_time_to_date(): + st = struct_time([2018, 4, 19] + convert.TIME_EMPTY_TIME + convert.TIME_EMPTY_EXTRAS) + d = date(*st[:3]) + assert d == convert.struct_time_to_date(st) + +def test_struct_time_to_datetime(): + st = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + dt = datetime(*st[:6]) + converted_dt = convert.struct_time_to_datetime(st) + assert dt == converted_dt + assert converted_dt.timetuple()[6:] == (3, 109, -1) + +def test_trim_struct_time(): + now = datetime.now() + st = now.timetuple() + trimmed_st = convert.trim_struct_time(st) + assert trimmed_st[:6] == (now.year, now.month, now.day, now.hour, now.minute, now.second) + assert trimmed_st[6:] == (0, 0, -1) + assert st[6:] != (0, 0, -1) + +def test_struct_time_to_jd(): + st_ad = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + jd_ad = 2458227.9263194446 + assert jd_ad == convert.struct_time_to_jd(st_ad) + st_bc = struct_time([-2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + jd_bc = 984091.9263194444 + assert jd_bc == convert.struct_time_to_jd(st_bc) + +def test_jd_to_struct_time(): + jd_ad = 2458227.9263194446 + st_ad = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + assert st_ad == convert.jd_to_struct_time(jd_ad) + jd_bc = 984091.9263194444 + st_bc = struct_time([-2018, 4, 19] + [10, 13, 54 - 1] + convert.TIME_EMPTY_EXTRAS) + assert st_bc == convert.jd_to_struct_time(jd_bc) + +def test_jd_round_trip_for_extreme_future(): + original_st = struct_time([999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) + jd = convert.struct_time_to_jd(original_st) + converted_st = convert.jd_to_struct_time(jd) + assert original_st[:5] == converted_st[:5] + assert 3 - 1 == converted_st[5] + +def test_jd_round_trip_for_extreme_past(): + original_st = struct_time([-999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) + assert (-999999 + 1, 8, 4, 21, 15, 3, 0, 0, -1) == tuple(converted_st) + +def test_jd_round_trip_for_zero_year_aka_1_bc(): + original_st = struct_time([0, 9, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) + assert (0, 9, 5, 4, 58, 59, 0, 0, -1) == tuple(converted_st) + +def test_jd_round_trip_for_2_bc(): + original_st = struct_time([-1, 12, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) + assert (-1, 12, 5, 4, 58, 59, 0, 0, -1) == tuple(converted_st) + +def test_roll_negative_time_fields(): + year = -100 + month = -17 + day = -34 + hour = -25 + minute = -74 + second = -253 + assert (-102, 5, 24, 21, 41, 47) == convert._roll_negative_time_fields(year, month, day, hour, minute, second) From cf341bbb25785eca9033a592a039fc7149212340 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 8 May 2024 09:07:26 -0400 Subject: [PATCH 2/8] Fix infinite comparison for OneOfASet The max and min functions now use a generator expression to filter out infinite values unless they are directly relevant to the calculation; if inf or -inf are found, they are returned instead of doing a comparison. --- edtf/parser/parser_classes.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 2d6c0bf..3b5ac6e 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -713,10 +713,18 @@ def __str__(self): return "[%s]" % (", ".join([str(o) for o in self.objects])) def _strict_date(self, lean): + strict_dates = [x._strict_date(lean) for x in self.objects] + # Accounting for possible 'inf' and '-inf' values if lean == LATEST: - return max([x._strict_date(lean) for x in self.objects]) + if any(isinstance(d, float) and d == float('inf') for d in strict_dates): + return float('inf') + else: + return max((d for d in strict_dates if not isinstance(d, float)), default=float('inf')) else: - return min([x._strict_date(lean) for x in self.objects]) + if any(isinstance(d, float) and d == float('-inf') for d in strict_dates): + return float('-inf') + else: + return min((d for d in strict_dates if not isinstance(d, float)), default=float('-inf')) class MultipleDates(EDTFObject): From 0cc4bdf4f5de33921a56d6bafe6d4e3c456ed84f Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Wed, 8 May 2024 12:35:59 -0400 Subject: [PATCH 3/8] Remove context from from_db_value() In Django 3.0, "support for the context argument of Field.from_db_value() and Expression.convert_value() is removed": https://github.com/django/django/blob/91a4b9a8ec2237434f06866f39c7977e889aeae6/docs/releases/3.0.txt#L641-L642 --- edtf/fields.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/edtf/fields.py b/edtf/fields.py index 52b9171..b6f0843 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -53,7 +53,7 @@ def deconstruct(self): del kwargs["max_length"] return name, path, args, kwargs - def from_db_value(self, value, expression, connection, context=None): + def from_db_value(self, value, expression, connection): # Converting values to Python objects if not value: return None From dfd32927ad9746df2d2a9d2327af748d1eac1df5 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 9 May 2024 14:01:19 -0400 Subject: [PATCH 4/8] Allow for setting EDTF directly in Django field Previously, the Django field could directly take an EDTF string, only a natural language string that was then parsed and turned into EDTF. --- edtf/fields.py | 87 +++++++++++++++++++++++++++++--------------------- pyproject.toml | 4 +++ 2 files changed, 55 insertions(+), 36 deletions(-) diff --git a/edtf/fields.py b/edtf/fields.py index b6f0843..bbccbcf 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -24,6 +24,7 @@ def __init__( self, verbose_name=None, name=None, natural_text_field=None, + direct_input_field=None, lower_strict_field=None, upper_strict_field=None, lower_fuzzy_field=None, @@ -31,13 +32,14 @@ def __init__( **kwargs ): kwargs['max_length'] = 2000 - self.natural_text_field, self.lower_strict_field, \ - self.upper_strict_field, self.lower_fuzzy_field, \ - self.upper_fuzzy_field = natural_text_field, lower_strict_field, \ - upper_strict_field, lower_fuzzy_field, upper_fuzzy_field + self.natural_text_field, self.direct_input_field, \ + self.lower_strict_field, self.upper_strict_field, \ + self.lower_fuzzy_field, self.upper_fuzzy_field = \ + natural_text_field, direct_input_field, lower_strict_field, \ + upper_strict_field, lower_fuzzy_field, upper_fuzzy_field super(EDTFField, self).__init__(verbose_name, name, **kwargs) - description = "An field for storing complex/fuzzy date specifications in EDTF format." + description = "A field for storing complex/fuzzy date specifications in EDTF format." def deconstruct(self): name, path, args, kwargs = super(EDTFField, self).deconstruct() @@ -54,14 +56,16 @@ def deconstruct(self): return name, path, args, kwargs def from_db_value(self, value, expression, connection): - # Converting values to Python objects - if not value: - return None + # Converting values from the database to Python objects + if value is None: + return value + try: - return pickle.loads(str(value)) - except: - pass - return parse_edtf(value, fail_silently=True) + # Try to unpickle if the value was pickled + return pickle.loads(value) + except (pickle.PickleError, TypeError): + # If it fails because it's not pickled data, try parsing as EDTF + return parse_edtf(value, fail_silently=True) def to_python(self, value): if isinstance(value, EDTFObject): @@ -86,35 +90,46 @@ def get_prep_value(self, value): def pre_save(self, instance, add): """ - Updates the edtf value from the value of the display_field. - If there's a valid edtf, then set the date values. + Updates the EDTF value from either the natural_text_field, which is parsed + with text_to_edtf() and is used for display, or falling back to the direct_input_field, + which allows directly providing an EDTF string. If one of these provides a valid EDTF object, + then set the date values accordingly. """ - if not self.natural_text_field or self.attname not in instance.__dict__: - return - - edtf = getattr(instance, self.attname) - - # Update EDTF field based on latest natural text value, if any - natural_text = getattr(instance, self.natural_text_field) - if natural_text: - edtf = text_to_edtf(natural_text) + + # Get existing value to determine if update is needed + existing_value = getattr(instance, self.attname, None) + direct_input = getattr(instance, self.direct_input_field, None) + natural_text = getattr(instance, self.natural_text_field, None) + + # if direct_input is provided and is different from the existing value, update the EDTF field + if direct_input and (existing_value is None or str(existing_value) != direct_input): + edtf = parse_edtf(direct_input, fail_silently=True) # ParseException if invalid; should this be raised? + # TODO pyparsing.ParseExceptions are very noisy and dumps the whole grammar (see https://github.com/ixc/python-edtf/issues/46) + + # set the natural_text (display) field to the direct_input if it is not provided + if natural_text is None: + setattr(instance, self.natural_text_field, direct_input) + + elif natural_text: + edtf_string = text_to_edtf(natural_text) + if edtf_string and (existing_value is None or str(existing_value) != edtf_string): + edtf = parse_edtf(edtf_string, fail_silently=True) # potetial ParseException if invalid; should this be raised? + else: + edtf = existing_value else: - edtf = None - - # TODO If `natural_text_field` becomes cleared the derived EDTF field - # value should also be cleared, rather than left at original value? + if not existing_value: + # No inputs provided and no existing value; TODO log this? + return + # TODO: if both direct_input and natural_text are cleared, should we throw an error? + edtf = existing_value - # TODO Handle case where EDTF field is set to a string directly, not - # via `natural_text_field` (this is a slightly unexpected use-case, but - # is a very efficient way to set EDTF values in situations like for API - # imports so we probably want to continue to support it?) - if edtf and not isinstance(edtf, EDTFObject): - edtf = parse_edtf(edtf, fail_silently=True) + # Update the actual EDTF field in the model if there is a change + if edtf != existing_value: + setattr(instance, self.attname, edtf) - setattr(instance, self.attname, edtf) - # set or clear related date fields on the instance + # Process and update related date fields based on the EDTF object for attr in DATE_ATTRS: - field_attr = "%s_field" % attr + field_attr = f"{attr}_field" g = getattr(self, field_attr, None) if g: if edtf: diff --git a/pyproject.toml b/pyproject.toml index 444298e..4ee273d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,9 @@ changelog = "https://github.com/ixc/python-edtf/blob/main/changelog.rst" requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" +[tool.setuptools] +packages.find = { where = ["."], exclude = ["edtf_django_tests", "edtf_django_tests.*"] } + [tool.wheel] universal = false @@ -72,3 +75,4 @@ legacy_tox_ini = """ python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"] python_classes = ["Test*", "*Tests"] python_functions = ["test_*"] +addopts = "--ignore=edtf_django_tests/" From 4f0604099d741bd8f385ee5ec2b7b991b11b7f35 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 9 May 2024 14:05:22 -0400 Subject: [PATCH 5/8] Set up Django test project Django 4 test project for now --- .../edtf_django_tests/__init__.py | 0 edtf_django_tests/edtf_django_tests/asgi.py | 16 +++ .../edtf_django_tests/settings.py | 124 ++++++++++++++++++ edtf_django_tests/edtf_django_tests/urls.py | 22 ++++ edtf_django_tests/edtf_django_tests/wsgi.py | 16 +++ .../edtf_integration/__init__.py | 0 edtf_django_tests/edtf_integration/admin.py | 3 + edtf_django_tests/edtf_integration/apps.py | 6 + .../edtf_integration/migrations/__init__.py | 0 edtf_django_tests/edtf_integration/views.py | 3 + edtf_django_tests/manage.py | 22 ++++ pyproject.toml | 2 +- 12 files changed, 213 insertions(+), 1 deletion(-) create mode 100644 edtf_django_tests/edtf_django_tests/__init__.py create mode 100644 edtf_django_tests/edtf_django_tests/asgi.py create mode 100644 edtf_django_tests/edtf_django_tests/settings.py create mode 100644 edtf_django_tests/edtf_django_tests/urls.py create mode 100644 edtf_django_tests/edtf_django_tests/wsgi.py create mode 100644 edtf_django_tests/edtf_integration/__init__.py create mode 100644 edtf_django_tests/edtf_integration/admin.py create mode 100644 edtf_django_tests/edtf_integration/apps.py create mode 100644 edtf_django_tests/edtf_integration/migrations/__init__.py create mode 100644 edtf_django_tests/edtf_integration/views.py create mode 100755 edtf_django_tests/manage.py diff --git a/edtf_django_tests/edtf_django_tests/__init__.py b/edtf_django_tests/edtf_django_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/edtf_django_tests/edtf_django_tests/asgi.py b/edtf_django_tests/edtf_django_tests/asgi.py new file mode 100644 index 0000000..b62c5f5 --- /dev/null +++ b/edtf_django_tests/edtf_django_tests/asgi.py @@ -0,0 +1,16 @@ +""" +ASGI config for edtf_django_tests project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/4.2/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "edtf_django_tests.settings") + +application = get_asgi_application() diff --git a/edtf_django_tests/edtf_django_tests/settings.py b/edtf_django_tests/edtf_django_tests/settings.py new file mode 100644 index 0000000..a8121e3 --- /dev/null +++ b/edtf_django_tests/edtf_django_tests/settings.py @@ -0,0 +1,124 @@ +""" +Django settings for edtf_django_tests project. + +Generated by 'django-admin startproject' using Django 4.2.7. + +For more information on this file, see +https://docs.djangoproject.com/en/4.2/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/4.2/ref/settings/ +""" + +from pathlib import Path + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = "django-insecure-zkd&%e=di9d(p@wq7vnstn+4dx7cxbxkve�*+57sks0q$=0a" + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = True + +ALLOWED_HOSTS = [] + + +# Application definition + +INSTALLED_APPS = [ + "django.contrib.admin", + "django.contrib.auth", + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.staticfiles", + "edtf_integration", +] + +MIDDLEWARE = [ + "django.middleware.security.SecurityMiddleware", + "django.contrib.sessions.middleware.SessionMiddleware", + "django.middleware.common.CommonMiddleware", + "django.middleware.csrf.CsrfViewMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + "django.middleware.clickjacking.XFrameOptionsMiddleware", +] + +ROOT_URLCONF = "edtf_django_tests.urls" + +TEMPLATES = [ + { + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": [], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", + ], + }, + }, +] + +WSGI_APPLICATION = "edtf_django_tests.wsgi.application" + + +# Database +# https://docs.djangoproject.com/en/4.2/ref/settings/#databases + +DATABASES = { + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": BASE_DIR / "db.sqlite3", + } +} + + +# Password validation +# https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", + }, +] + + +# Internationalization +# https://docs.djangoproject.com/en/4.2/topics/i18n/ + +LANGUAGE_CODE = "en-us" + +TIME_ZONE = "UTC" + +USE_I18N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/4.2/howto/static-files/ + +STATIC_URL = "static/" + +# Default primary key field type +# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" diff --git a/edtf_django_tests/edtf_django_tests/urls.py b/edtf_django_tests/edtf_django_tests/urls.py new file mode 100644 index 0000000..ceca78b --- /dev/null +++ b/edtf_django_tests/edtf_django_tests/urls.py @@ -0,0 +1,22 @@ +""" +URL configuration for edtf_django_tests project. + +The `urlpatterns` list routes URLs to views. For more information please see: + https://docs.djangoproject.com/en/4.2/topics/http/urls/ +Examples: +Function views + 1. Add an import: from my_app import views + 2. Add a URL to urlpatterns: path('', views.home, name='home') +Class-based views + 1. Add an import: from other_app.views import Home + 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') +Including another URLconf + 1. Import the include() function: from django.urls import include, path + 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) +""" +from django.contrib import admin +from django.urls import path + +urlpatterns = [ + path("admin/", admin.site.urls), +] diff --git a/edtf_django_tests/edtf_django_tests/wsgi.py b/edtf_django_tests/edtf_django_tests/wsgi.py new file mode 100644 index 0000000..20450c1 --- /dev/null +++ b/edtf_django_tests/edtf_django_tests/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for edtf_django_tests project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/4.2/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "edtf_django_tests.settings") + +application = get_wsgi_application() diff --git a/edtf_django_tests/edtf_integration/__init__.py b/edtf_django_tests/edtf_integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/edtf_django_tests/edtf_integration/admin.py b/edtf_django_tests/edtf_integration/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/edtf_django_tests/edtf_integration/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/edtf_django_tests/edtf_integration/apps.py b/edtf_django_tests/edtf_integration/apps.py new file mode 100644 index 0000000..23bc09d --- /dev/null +++ b/edtf_django_tests/edtf_integration/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class EdtfIntegrationConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "edtf_integration" diff --git a/edtf_django_tests/edtf_integration/migrations/__init__.py b/edtf_django_tests/edtf_integration/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/edtf_django_tests/edtf_integration/views.py b/edtf_django_tests/edtf_integration/views.py new file mode 100644 index 0000000..91ea44a --- /dev/null +++ b/edtf_django_tests/edtf_integration/views.py @@ -0,0 +1,3 @@ +from django.shortcuts import render + +# Create your views here. diff --git a/edtf_django_tests/manage.py b/edtf_django_tests/manage.py new file mode 100755 index 0000000..b2d2a20 --- /dev/null +++ b/edtf_django_tests/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "edtf_django_tests.settings") + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 4ee273d..0b7a0ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ classifiers = [ [project.optional-dependencies] test = [ - "django", + "django>=4.2,<5.0", "pytest" ] From 25ee5074e3157076f345ed84c56604cd79250dcc Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 9 May 2024 14:14:03 -0400 Subject: [PATCH 6/8] Add TestEvent model implementing EDTFField Ignore SQLlite local database --- .gitignore | 1 + .../migrations/0001_initial.py | 64 +++++++++++++++++++ edtf_django_tests/edtf_integration/models.py | 45 +++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 edtf_django_tests/edtf_integration/migrations/0001_initial.py create mode 100644 edtf_django_tests/edtf_integration/models.py diff --git a/.gitignore b/.gitignore index ab3165a..7c23190 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ coverage.xml # Django stuff: *.log +db.sqlite3 # Sphinx documentation docs/_build/ diff --git a/edtf_django_tests/edtf_integration/migrations/0001_initial.py b/edtf_django_tests/edtf_integration/migrations/0001_initial.py new file mode 100644 index 0000000..286a9de --- /dev/null +++ b/edtf_django_tests/edtf_integration/migrations/0001_initial.py @@ -0,0 +1,64 @@ +# Generated by Django 4.2.13 on 2024-05-09 18:13 + +from django.db import migrations, models +import edtf.fields + + +class Migration(migrations.Migration): + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="TestEvent", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "date_display", + models.CharField( + blank=True, + help_text="Enter the date in natural language format (e.g., 'Approximately June 2004').", + max_length=255, + null=True, + verbose_name="Date of creation (display)", + ), + ), + ( + "date_edtf_direct", + models.CharField( + blank=True, + help_text="Enter the date in EDTF format (e.g., '2004-06~').", + max_length=255, + null=True, + verbose_name="Date of creation (EDTF format)", + ), + ), + ( + "date_edtf", + edtf.fields.EDTFField( + blank=True, + lower_fuzzy_field="date_earliest", + lower_strict_field="date_sort_ascending", + natural_text_field="date_display", + null=True, + upper_fuzzy_field="date_latest", + upper_strict_field="date_sort_descending", + verbose_name="Date of creation (EDTF)", + ), + ), + ("date_earliest", models.FloatField(blank=True, null=True)), + ("date_latest", models.FloatField(blank=True, null=True)), + ("date_sort_ascending", models.FloatField(blank=True, null=True)), + ("date_sort_descending", models.FloatField(blank=True, null=True)), + ], + ), + ] diff --git a/edtf_django_tests/edtf_integration/models.py b/edtf_django_tests/edtf_integration/models.py new file mode 100644 index 0000000..0274d5f --- /dev/null +++ b/edtf_django_tests/edtf_integration/models.py @@ -0,0 +1,45 @@ +from django.db import models +from edtf.fields import EDTFField + + +class TestEvent(models.Model): + date_display = models.CharField( + "Date of creation (display)", + blank=True, + null=True, + max_length=255, + help_text="Enter the date in natural language format (e.g., 'Approximately June 2004')." + ) + + date_edtf_direct = models.CharField( + "Date of creation (EDTF format)", + max_length=255, + blank=True, + null=True, + help_text="Enter the date in EDTF format (e.g., '2004-06~')." + ) + + # EDTF field that parses the input from either natural language or direct EDTF string + # natural_text_field is the field that stores the natural language input and is used for display + # direct_input_field stores an EDTF string + # TODO is there a need for both a natural text input and a label? + # TODO could consolidate the direct_input_field and natural_text_field into a single field, but would need + # a flag to indicate whether the input is natural language or EDTF as the natural language parser sometimes + # misparses an EDTF string as a natural language string (e.g. `2020-03-15/2020-04-15` -> `2020-03-15`) + date_edtf = EDTFField( + "Date of creation (EDTF)", + natural_text_field='date_display', + direct_input_field='date_edtf_direct', + lower_fuzzy_field='date_earliest', + upper_fuzzy_field='date_latest', + lower_strict_field='date_sort_ascending', + upper_strict_field='date_sort_descending', + blank=True, + null=True, + ) + # Computed fields for filtering + date_earliest = models.FloatField(blank=True, null=True) + date_latest = models.FloatField(blank=True, null=True) + # Computed fields for sorting + date_sort_ascending = models.FloatField(blank=True, null=True) + date_sort_descending = models.FloatField(blank=True, null=True) From fbf4262b35f0760c7fb8944d906092d54fea0c15 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Thu, 9 May 2024 14:18:41 -0400 Subject: [PATCH 7/8] Add Django integration tests This basic Django app shows how a user could create a model using the EDTFField and store data in it. The integration tests check that the EDTFField and associated fields (date_edtf_direct and date_display, in this case) work correctly. There is a weird issue in test_date_display() where if we use an instance variable (self.event1, self.event2) the event.date_display property is available, but if we retrieve the object from the database it is not. I tried using TestEvent.objects.create() as well as the current method (make and then save an instance to no effect). CI is set up to run the Django integration tests after Pytest. We could move to using pytest/django-pytest for these tests as well --- .github/workflows/ci.yml | 5 ++ edtf_django_tests/edtf_integration/tests.py | 96 +++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 edtf_django_tests/edtf_integration/tests.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 34cbabc..627dd03 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,3 +32,8 @@ jobs: - name: Run unit tests run: | pytest + + - name: Run Django integration tests + working-directory: ./edtf_django_tests + run: | + python manage.py test edtf_integration diff --git a/edtf_django_tests/edtf_integration/tests.py b/edtf_django_tests/edtf_integration/tests.py new file mode 100644 index 0000000..de54d64 --- /dev/null +++ b/edtf_django_tests/edtf_integration/tests.py @@ -0,0 +1,96 @@ +from django.test import TestCase +from .models import TestEvent +from edtf.parser.grammar import parse_edtf as parse +from edtf.parser import EDTFObject +from edtf.convert import struct_time_to_jd + +class TestEventModelTests(TestCase): + def setUp(self): + # Create instances and assign them to instance variables + # date_edtf_direct is a valid EDTF string, date_display is a date + # to be parsed from natural language + self.event1 = TestEvent(date_edtf_direct="2020-03-15/2020-04-15") + self.event2 = TestEvent(date_edtf_direct="2021-05-06") + self.event3 = TestEvent(date_edtf_direct="2019-11") + self.event4 = TestEvent(date_display="Approximately August 2018") + self.event5 = TestEvent(date_edtf_direct="2021-05-06") + self.event1.save() + self.event2.save() + self.event3.save() + self.event4.save() + self.event5.save() + + + def test_edtf_object_returned(self): + for event in TestEvent.objects.all(): + self.assertIsInstance(event.date_edtf, EDTFObject) + + + def test_sorting(self): + events = list(TestEvent.objects.order_by('date_sort_ascending')) + self.assertEqual(events[0].date_display, "Approximately August 2018") + self.assertEqual(events[1].date_edtf_direct, "2019-11") + self.assertEqual(events[2].date_edtf_direct, "2020-03-15/2020-04-15") + self.assertEqual(events[3].date_edtf_direct, "2021-05-06") + self.assertEqual(events[4].date_edtf_direct, "2021-05-06") + + events_desc = list(TestEvent.objects.order_by('-date_sort_descending')) + self.assertEqual(events_desc[0].date_edtf_direct, "2021-05-06") + self.assertEqual(events_desc[1].date_edtf_direct, "2021-05-06") + self.assertEqual(events_desc[2].date_edtf_direct, "2020-03-15/2020-04-15") + self.assertEqual(events_desc[3].date_edtf_direct, "2019-11") + self.assertEqual(events_desc[4].date_display, "Approximately August 2018") + + + def test_date_boundaries(self): + event = TestEvent.objects.get(date_edtf_direct="2020-03-15/2020-04-15") + expected_earliest_jd = struct_time_to_jd(parse("2020-03-15").lower_strict()) + expected_latest_jd = struct_time_to_jd(parse("2020-04-15").upper_strict()) + self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + + event = self.event2 + expected_earliest_jd = struct_time_to_jd(parse("2021-05-06").lower_strict()) + expected_latest_jd = struct_time_to_jd(parse("2021-05-06").upper_strict()) + self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + + event = TestEvent.objects.get(date_edtf_direct="2019-11") + expected_earliest_jd = struct_time_to_jd(parse("2019-11").lower_strict()) + expected_latest_jd = struct_time_to_jd(parse("2019-11").upper_strict()) + self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + + event = TestEvent.objects.get(date_display="Approximately August 2018") + expected_earliest_jd = struct_time_to_jd(parse("2018-08~").lower_fuzzy()) + expected_latest_jd = struct_time_to_jd(parse("2018-08~").upper_fuzzy()) + self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + + def test_date_display(self): + """ + Test that the date_display field is correctly populated based on the EDTF input. + In the future, a more sophisticated natural language parser could be used to generate + a human readable date from the EDTF input. + """ + # why does this fail?? + # event = TestEvent.objects.get(date_edtf_direct="2020-03-15/2020-04-15") + # self.assertEqual(event.date_display, "2020-03-15/2020-04-15") + + self.assertEqual(self.event1.date_display, "2020-03-15/2020-04-15") + self.assertEqual(self.event2.date_display, "2021-05-06") + self.assertEqual(self.event3.date_display, "2019-11") + self.assertEqual(self.event4.date_display, "Approximately August 2018") + + def test_comparison(self): + # test equality of the same dates + self.assertEqual(self.event2.date_edtf, self.event5.date_edtf, "Events with the same date should be equal") + + # test inequality of different dates + self.assertNotEqual(self.event1.date_edtf, self.event2.date_edtf, "Events with different dates should not be equal") + + # greater than + self.assertGreater(self.event2.date_edtf, self.event3.date_edtf, "2021-05-06 is greater than 2019-11") + + # less than + self.assertLess(self.event3.date_edtf, self.event2.date_edtf, "2019-11 is less than 2021-05-06") \ No newline at end of file From e2a79ddd35d17871f408096ae3c6dc4fa04d5b44 Mon Sep 17 00:00:00 2001 From: Cole Crawford <16374762+ColeDCrawford@users.noreply.github.com> Date: Sun, 12 May 2024 19:01:17 -0400 Subject: [PATCH 8/8] Improve handling of field updates Ensure that `EDTFField` properly updates related fields whenever it changes inspired by ImageField. - Use EDTFFieldDescriptorClass as a descriptor for EDTFField. This inherits from DeferredAttribute and handles getting, setting, and updating values. Whenever the field value is set, additional logic is processed to potentially update the field again based on other fields. - update_values() replaces pre_save() to better handle updates/dependencies when EDTFField value changes - contribute_to_class() attaches update_values() to the `post_init` signal These changes should make the field updates more stable and (not reliant on definition order in models using EDTFField). Thanks for the suggestion @aweakley https://github.com/ixc/python-edtf/pull/47#issuecomment-2105500069 Co-Authored-By: aweakley <224316+aweakley@users.noreply.github.com> --- edtf/fields.py | 36 ++++++++++++++---- edtf_django_tests/edtf_integration/tests.py | 41 ++++++++------------- 2 files changed, 45 insertions(+), 32 deletions(-) diff --git a/edtf/fields.py b/edtf/fields.py index bbccbcf..d568375 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -3,12 +3,14 @@ except: import pickle -from django.db import models from django.core.exceptions import FieldDoesNotExist +from django.db import models +from django.db.models import signals +from django.db.models.query_utils import DeferredAttribute from edtf import parse_edtf, EDTFObject -from edtf.natlang import text_to_edtf from edtf.convert import struct_time_to_date, struct_time_to_jd +from edtf.natlang import text_to_edtf DATE_ATTRS = ( 'lower_strict', @@ -17,6 +19,20 @@ 'upper_fuzzy', ) +class EDTFFieldDescriptor(DeferredAttribute): + """ + Descriptor for the EDTFField's attribute on the model instance. + This updates the dependent fields each time this value is set. + """ + + def __set__(self, instance, value): + # First set the value we are given + instance.__dict__[self.field.attname] = value + # `update_values` may provide us with a new value to set + edtf = self.field.update_values(instance, value) + if edtf != value: + instance.__dict__[self.field.attname] = edtf + class EDTFField(models.CharField): @@ -40,6 +56,7 @@ def __init__( super(EDTFField, self).__init__(verbose_name, name, **kwargs) description = "A field for storing complex/fuzzy date specifications in EDTF format." + descriptor_class = EDTFFieldDescriptor def deconstruct(self): name, path, args, kwargs = super(EDTFField, self).deconstruct() @@ -88,7 +105,7 @@ def get_prep_value(self, value): return pickle.dumps(value) return value - def pre_save(self, instance, add): + def update_values(self, instance, *args, **kwargs): """ Updates the EDTF value from either the natural_text_field, which is parsed with text_to_edtf() and is used for display, or falling back to the direct_input_field, @@ -123,10 +140,6 @@ def pre_save(self, instance, add): # TODO: if both direct_input and natural_text are cleared, should we throw an error? edtf = existing_value - # Update the actual EDTF field in the model if there is a change - if edtf != existing_value: - setattr(instance, self.attname, edtf) - # Process and update related date fields based on the EDTF object for attr in DATE_ATTRS: field_attr = f"{attr}_field" @@ -151,3 +164,12 @@ def pre_save(self, instance, add): else: setattr(instance, g, None) return edtf + + def contribute_to_class(self, cls, name, **kwargs): + super().contribute_to_class(cls, name, **kwargs) + # Attach update_values so that dependent fields declared + # after their corresponding edtf field don't stay cleared by + # Model.__init__, see Django bug #11196. + # Only run post-initialization values update on non-abstract models + if not cls._meta.abstract: + signals.post_init.connect(self.update_values, sender=cls) \ No newline at end of file diff --git a/edtf_django_tests/edtf_integration/tests.py b/edtf_django_tests/edtf_integration/tests.py index de54d64..9385733 100644 --- a/edtf_django_tests/edtf_integration/tests.py +++ b/edtf_django_tests/edtf_integration/tests.py @@ -9,16 +9,11 @@ def setUp(self): # Create instances and assign them to instance variables # date_edtf_direct is a valid EDTF string, date_display is a date # to be parsed from natural language - self.event1 = TestEvent(date_edtf_direct="2020-03-15/2020-04-15") - self.event2 = TestEvent(date_edtf_direct="2021-05-06") - self.event3 = TestEvent(date_edtf_direct="2019-11") - self.event4 = TestEvent(date_display="Approximately August 2018") - self.event5 = TestEvent(date_edtf_direct="2021-05-06") - self.event1.save() - self.event2.save() - self.event3.save() - self.event4.save() - self.event5.save() + self.event1 = TestEvent.objects.create(date_edtf_direct="2020-03-15/2020-04-15") + self.event2 = TestEvent.objects.create(date_edtf_direct="2021-05-06") + self.event3 = TestEvent.objects.create(date_edtf_direct="2019-11") + self.event4 = TestEvent.objects.create(date_display="Approximately August 2018") + self.event5 = TestEvent.objects.create(date_edtf_direct="2021-05-06") def test_edtf_object_returned(self): @@ -49,23 +44,22 @@ def test_date_boundaries(self): self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) - event = self.event2 expected_earliest_jd = struct_time_to_jd(parse("2021-05-06").lower_strict()) expected_latest_jd = struct_time_to_jd(parse("2021-05-06").upper_strict()) - self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) - self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) - - event = TestEvent.objects.get(date_edtf_direct="2019-11") + self.assertAlmostEqual(self.event2.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(self.event2.date_latest, expected_latest_jd, places=1) + + event3 = TestEvent.objects.get(date_edtf_direct="2019-11") expected_earliest_jd = struct_time_to_jd(parse("2019-11").lower_strict()) expected_latest_jd = struct_time_to_jd(parse("2019-11").upper_strict()) - self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) - self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + self.assertAlmostEqual(event3.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event3.date_latest, expected_latest_jd, places=1) - event = TestEvent.objects.get(date_display="Approximately August 2018") + event4 = TestEvent.objects.get(date_display="Approximately August 2018") expected_earliest_jd = struct_time_to_jd(parse("2018-08~").lower_fuzzy()) expected_latest_jd = struct_time_to_jd(parse("2018-08~").upper_fuzzy()) - self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) - self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + self.assertAlmostEqual(event4.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event4.date_latest, expected_latest_jd, places=1) def test_date_display(self): """ @@ -73,11 +67,8 @@ def test_date_display(self): In the future, a more sophisticated natural language parser could be used to generate a human readable date from the EDTF input. """ - # why does this fail?? - # event = TestEvent.objects.get(date_edtf_direct="2020-03-15/2020-04-15") - # self.assertEqual(event.date_display, "2020-03-15/2020-04-15") - - self.assertEqual(self.event1.date_display, "2020-03-15/2020-04-15") + event = TestEvent.objects.get(date_edtf_direct="2020-03-15/2020-04-15") + self.assertEqual(event.date_display, "2020-03-15/2020-04-15") self.assertEqual(self.event2.date_display, "2021-05-06") self.assertEqual(self.event3.date_display, "2019-11") self.assertEqual(self.event4.date_display, "Approximately August 2018")