diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 34cbabc..627dd03 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -32,3 +32,8 @@ jobs: - name: Run unit tests run: | pytest + + - name: Run Django integration tests + working-directory: ./edtf_django_tests + run: | + python manage.py test edtf_integration diff --git a/.gitignore b/.gitignore index ab3165a..7c23190 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ coverage.xml # Django stuff: *.log +db.sqlite3 # Sphinx documentation docs/_build/ diff --git a/edtf/fields.py b/edtf/fields.py index 52b9171..d568375 100644 --- a/edtf/fields.py +++ b/edtf/fields.py @@ -3,12 +3,14 @@ except: import pickle -from django.db import models from django.core.exceptions import FieldDoesNotExist +from django.db import models +from django.db.models import signals +from django.db.models.query_utils import DeferredAttribute from edtf import parse_edtf, EDTFObject -from edtf.natlang import text_to_edtf from edtf.convert import struct_time_to_date, struct_time_to_jd +from edtf.natlang import text_to_edtf DATE_ATTRS = ( 'lower_strict', @@ -17,6 +19,20 @@ 'upper_fuzzy', ) +class EDTFFieldDescriptor(DeferredAttribute): + """ + Descriptor for the EDTFField's attribute on the model instance. + This updates the dependent fields each time this value is set. + """ + + def __set__(self, instance, value): + # First set the value we are given + instance.__dict__[self.field.attname] = value + # `update_values` may provide us with a new value to set + edtf = self.field.update_values(instance, value) + if edtf != value: + instance.__dict__[self.field.attname] = edtf + class EDTFField(models.CharField): @@ -24,6 +40,7 @@ def __init__( self, verbose_name=None, name=None, natural_text_field=None, + direct_input_field=None, lower_strict_field=None, upper_strict_field=None, lower_fuzzy_field=None, @@ -31,13 +48,15 @@ def __init__( **kwargs ): kwargs['max_length'] = 2000 - self.natural_text_field, self.lower_strict_field, \ - self.upper_strict_field, self.lower_fuzzy_field, \ - self.upper_fuzzy_field = natural_text_field, lower_strict_field, \ - upper_strict_field, lower_fuzzy_field, upper_fuzzy_field + self.natural_text_field, self.direct_input_field, \ + self.lower_strict_field, self.upper_strict_field, \ + self.lower_fuzzy_field, self.upper_fuzzy_field = \ + natural_text_field, direct_input_field, lower_strict_field, \ + upper_strict_field, lower_fuzzy_field, upper_fuzzy_field super(EDTFField, self).__init__(verbose_name, name, **kwargs) - description = "An field for storing complex/fuzzy date specifications in EDTF format." + description = "A field for storing complex/fuzzy date specifications in EDTF format." + descriptor_class = EDTFFieldDescriptor def deconstruct(self): name, path, args, kwargs = super(EDTFField, self).deconstruct() @@ -53,15 +72,17 @@ def deconstruct(self): del kwargs["max_length"] return name, path, args, kwargs - def from_db_value(self, value, expression, connection, context=None): - # Converting values to Python objects - if not value: - return None + def from_db_value(self, value, expression, connection): + # Converting values from the database to Python objects + if value is None: + return value + try: - return pickle.loads(str(value)) - except: - pass - return parse_edtf(value, fail_silently=True) + # Try to unpickle if the value was pickled + return pickle.loads(value) + except (pickle.PickleError, TypeError): + # If it fails because it's not pickled data, try parsing as EDTF + return parse_edtf(value, fail_silently=True) def to_python(self, value): if isinstance(value, EDTFObject): @@ -84,37 +105,44 @@ def get_prep_value(self, value): return pickle.dumps(value) return value - def pre_save(self, instance, add): + def update_values(self, instance, *args, **kwargs): """ - Updates the edtf value from the value of the display_field. - If there's a valid edtf, then set the date values. + Updates the EDTF value from either the natural_text_field, which is parsed + with text_to_edtf() and is used for display, or falling back to the direct_input_field, + which allows directly providing an EDTF string. If one of these provides a valid EDTF object, + then set the date values accordingly. """ - if not self.natural_text_field or self.attname not in instance.__dict__: - return - - edtf = getattr(instance, self.attname) - - # Update EDTF field based on latest natural text value, if any - natural_text = getattr(instance, self.natural_text_field) - if natural_text: - edtf = text_to_edtf(natural_text) + + # Get existing value to determine if update is needed + existing_value = getattr(instance, self.attname, None) + direct_input = getattr(instance, self.direct_input_field, None) + natural_text = getattr(instance, self.natural_text_field, None) + + # if direct_input is provided and is different from the existing value, update the EDTF field + if direct_input and (existing_value is None or str(existing_value) != direct_input): + edtf = parse_edtf(direct_input, fail_silently=True) # ParseException if invalid; should this be raised? + # TODO pyparsing.ParseExceptions are very noisy and dumps the whole grammar (see https://github.com/ixc/python-edtf/issues/46) + + # set the natural_text (display) field to the direct_input if it is not provided + if natural_text is None: + setattr(instance, self.natural_text_field, direct_input) + + elif natural_text: + edtf_string = text_to_edtf(natural_text) + if edtf_string and (existing_value is None or str(existing_value) != edtf_string): + edtf = parse_edtf(edtf_string, fail_silently=True) # potetial ParseException if invalid; should this be raised? + else: + edtf = existing_value else: - edtf = None + if not existing_value: + # No inputs provided and no existing value; TODO log this? + return + # TODO: if both direct_input and natural_text are cleared, should we throw an error? + edtf = existing_value - # TODO If `natural_text_field` becomes cleared the derived EDTF field - # value should also be cleared, rather than left at original value? - - # TODO Handle case where EDTF field is set to a string directly, not - # via `natural_text_field` (this is a slightly unexpected use-case, but - # is a very efficient way to set EDTF values in situations like for API - # imports so we probably want to continue to support it?) - if edtf and not isinstance(edtf, EDTFObject): - edtf = parse_edtf(edtf, fail_silently=True) - - setattr(instance, self.attname, edtf) - # set or clear related date fields on the instance + # Process and update related date fields based on the EDTF object for attr in DATE_ATTRS: - field_attr = "%s_field" % attr + field_attr = f"{attr}_field" g = getattr(self, field_attr, None) if g: if edtf: @@ -136,3 +164,12 @@ def pre_save(self, instance, add): else: setattr(instance, g, None) return edtf + + def contribute_to_class(self, cls, name, **kwargs): + super().contribute_to_class(cls, name, **kwargs) + # Attach update_values so that dependent fields declared + # after their corresponding edtf field don't stay cleared by + # Model.__init__, see Django bug #11196. + # Only run post-initialization values update on non-abstract models + if not cls._meta.abstract: + signals.post_init.connect(self.update_values, sender=cls) \ No newline at end of file diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index 645a373..eaa9af6 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -1,26 +1,30 @@ -import unittest +import pytest from edtf.natlang.en import text_to_edtf +# TODO update the tests and code to test and output the new spec + # where examples are tuples, the second item is the normalised output -EXAMPLES = ( - ('active late 17th-19th centuries', '16xx/18xx'), # ignoring 'late' for now - ('active 17-19th Centuries', '16xx/18xx'), # ignoring 'late' for now +@pytest.mark.parametrize("input_text,expected_output", [ + # Ignoring 'late' for simplicity in these examples + ('active late 17th-19th centuries', '16xx/18xx'), + ('active 17-19th Centuries', '16xx/18xx'), # Unrecognised values ('', None), ('this isn\'t a date', None), - # Explicity rejected values that would otherwise be badly converted + # Explicitly rejected values that would otherwise be badly converted ('23rd Dynasty', None), - ('90', '1990'), # implied century + # Implied century and specific years + ('90', '1990'), # Implied century ('1860', '1860'), ('the year 1800', '1800'), ('the year 1897', '1897'), ('January 2008', '2008-01'), ('January 12, 1940', '1940-01-12'), - # uncertain/approximate + # Uncertain or approximate dates ('1860?', '1860?'), ('1862 (uncertain)', '1862?'), ('maybe 1862', '1862?'), @@ -31,11 +35,11 @@ ('~ Feb 1812', '1812-02~'), ('circa Feb 1812', '1812-02~'), ('Feb 1812 approx', '1812-02~'), - ('c1860', '1860~'), # different abbreviations - ('c.1860', '1860~'), # with or without . + ('c1860', '1860~'), # Different abbreviations + ('c.1860', '1860~'), # With or without . ('ca1860', '1860~'), ('ca.1860', '1860~'), - ('c 1860', '1860~'), # with or without space + ('c 1860', '1860~'), # With or without space ('c. 1860', '1860~'), ('ca. 1860', '1860~'), ('approx 1860', '1860~'), @@ -44,15 +48,14 @@ ('approximately 1860', '1860~'), ('about 1860', '1860~'), ('about Spring 1849', '1849-21~'), - ('notcirca 1860', '1860'), # avoid words containing circa - ('attica 1802', '1802'), - # avoid false positive circa at the end of preceding word - ('attic. 1802', '1802'), # avoid false positive circa + ('notcirca 1860', '1860'), # Avoid words containing 'circa' + ('attica 1802', '1802'), # Avoid false positive 'circa' at the end of preceding word + ('attic. 1802', '1802'), # Avoid false positive 'circa' - # masked precision - ('1860s', '186x'), # 186x has decade precision, 186u has year precision. + # Masked precision + ('1860s', '186x'), # 186x has decade precision, 186u has year precision. - # masked precision + uncertainty + # Masked precision + uncertainty ('ca. 1860s', '186x~'), ('c. 1860s', '186x~'), ('Circa 1840s', '184x~'), @@ -60,26 +63,26 @@ ('ca. 1860s?', '186x?~'), ('uncertain: approx 1862', '1862?~'), - # masked precision with first decade (ambiguous) - ('1800s', '18xx'), # without additional uncertainty, use the century - ('2000s', '20xx'), # without additional uncertainty, use the century - ('c1900s', '190x~'), # if there's additional uncertainty, use the decade - ('c1800s?', '180x?~'), # if there's additional uncertainty, use the decade + # Ambiguous masked precision for centuries and decades + ('1800s', '18xx'), # Without additional uncertainty, use the century + ('2000s', '20xx'), # Without additional uncertainty, use the century + ('c1900s', '190x~'), # If there's additional uncertainty, use the decade + ('c1800s?', '180x?~'), # If there's additional uncertainty, use the decade - # unspecified + # Unspecified dates ('January 12', 'uuuu-01-12'), ('January', 'uuuu-01'), ('10/7/2008', '2008-10-07'), ('7/2008', '2008-07'), - # seasons + # Seasons mapped to specific codes ('Spring 1872', '1872-21'), ('Summer 1872', '1872-22'), ('Autumn 1872', '1872-23'), ('Fall 1872', '1872-23'), ('Winter 1872', '1872-24'), - # before/after + # Dates relative to known events (before/after) ('earlier than 1928', 'unknown/1928'), ('before 1928', 'unknown/1928'), ('after 1928', '1928/unknown'), @@ -87,32 +90,30 @@ ('before January 1928', 'unknown/1928-01'), ('before 18 January 1928', 'unknown/1928-01-18'), - # before/after approx + # Approximations combined with before/after ('before approx January 18 1928', 'unknown/1928-01-18~'), ('before approx January 1928', 'unknown/1928-01~'), ('after approx January 1928', '1928-01~/unknown'), ('after approx Summer 1928', '1928-22~/unknown'), - # before/after and uncertain/unspecificed + # Before and after with uncertain / unspecified components ('after about the 1920s', '192x~/unknown'), ('before about the 1900s', 'unknown/190x~'), ('before the 1900s', 'unknown/19xx'), - # unspecified + # Specifying unspecified components within a date # ('decade in 1800s', '18ux'), #too esoteric # ('decade somewhere during the 1800s', '18ux'), #lengthier. Keywords are 'in' or 'during' - ('year in the 1860s', '186u'), - # 186x has decade precision, 186u has year precision. - ('year in the 1800s', '18xu'), + ('year in the 1860s', '186u'), # 186x has decade precision + ('year in the 1800s', '18xu'), # 186u has year precision ('year in about the 1800s', '180u~'), ('month in 1872', '1872-uu'), ('day in Spring 1849', '1849-21-uu'), ('day in January 1872', '1872-01-uu'), ('day in 1872', '1872-uu-uu'), ('birthday in 1872', '1872'), - # avoid false positive at end of preceding word - # centuries + # Handling centuries with approximation and uncertainty ('1st century', '00xx'), ('10c', '09xx'), ('19th century', '18xx'), @@ -126,7 +127,7 @@ ('19c?', '18xx?'), ('c.19c?', '18xx?~'), - # BC/AD + # BC/AD dating ('1 AD', '0001'), ('17 CE', '0017'), ('127 CE', '0127'), @@ -136,18 +137,17 @@ ('c127 CE', '0127~'), ('c1270 CE', '1270~'), ('c64 BCE', '-0064~'), - ('2nd century bc', '-01xx'), # -200 to -101 + ('2nd century bc', '-01xx'), # -200 to -101 ('2nd century bce', '-01xx'), ('2nd century ad', '01xx'), ('2nd century ce', '01xx'), - # c-c-c-combo - # just showing off now... + # Combining uncertainties and approximations in creative ways ('a day in about Spring 1849?', '1849-21-uu?~'), - # simple ranges. Not all of these results are correct EDTF, but - # this is as good as the EDTF implementation and simple natural - # language parser we have. + # Simple date ranges, showcasing both the limitations and capabilities of the parser + # Not all of these results are correct EDTF, but this is as good as the EDTF implementation + # and simple natural language parser we have. ('1851-1852', '1851/1852'), ('1851-1852; printed 1853-1854', '1851/1852'), ('1851-52', '1851/1852'), @@ -156,7 +156,6 @@ ('1857-mid 1860s', '1857/186x'), ('1858/1860', '[1858, 1860]'), ('1860s-1870s', '186x/187x'), - ('1861, printed 1869', '1861'), ('1910-30', '1910/1930'), ('active 1910-30', '1910/1930'), ('1861-67', '1861/1867'), @@ -174,16 +173,13 @@ ('1900; 1973', '1900'), ('1900; printed 1912', '1900'), ('1915 late - autumn 1916', '1915/1916-23'), - - ('1915, from Camerawork, October 1916', '1915'), # should be {1915, 1916-10} + ('1915, from Camerawork, October 1916', '1915'), # should be {1915, 1916-10} ('1920s -early 1930s', '192x/193x'), ('1930s, printed early 1960s', '193x'), # should be something like {193x, 196x}, - # though those forms aren't explicitly supported in the spec. ('1932, printed 1976 by Gunther Sander', '1932'), # should be {1932, 1976} - ('1938, printed 1940s-1950s', '1938'), # should be something like {1938, 194x-195x} - - + ('1938, printed 1940s-1950s', '1938') # should be something like {1938, 194x-195x} + # Uncertain and approximate on different parts of the date # for these to work we need to recast is_uncertain and is_approximate # such that they work on different parts. Probably worth rolling our own # dateparser at this point. @@ -194,22 +190,13 @@ # ('a day in about Spring in about 1849', '1849~-21~-uu'), # ('maybe January in some year in about the 1830s', '183u~-01?'), # ('about July? in about 1849', '1849~-07?~'), -) - - -class TestLevel0(unittest.TestCase): - def test_natlang(self): - """ - For each of the examples, establish that: - - the unicode of the parsed object is acceptably equal to the EDTF string - - the parsed object is a subclass of EDTFObject - :return: - """ - for i, o in EXAMPLES: - e = text_to_edtf(i) - print("%s => %s" % (i, e)) - self.assertEqual(e, o) +]) +def test_natlang(input_text, expected_output): + """ + Test natural language conversion to EDTF format: + Verify that the conversion from text to EDTF format matches the expected output. + """ + result = text_to_edtf(input_text) + assert result == expected_output, f"Failed for input: {input_text}" -if __name__ == '__main__': - unittest.main() diff --git a/edtf/parser/parser_classes.py b/edtf/parser/parser_classes.py index 2d6c0bf..3b5ac6e 100644 --- a/edtf/parser/parser_classes.py +++ b/edtf/parser/parser_classes.py @@ -713,10 +713,18 @@ def __str__(self): return "[%s]" % (", ".join([str(o) for o in self.objects])) def _strict_date(self, lean): + strict_dates = [x._strict_date(lean) for x in self.objects] + # Accounting for possible 'inf' and '-inf' values if lean == LATEST: - return max([x._strict_date(lean) for x in self.objects]) + if any(isinstance(d, float) and d == float('inf') for d in strict_dates): + return float('inf') + else: + return max((d for d in strict_dates if not isinstance(d, float)), default=float('inf')) else: - return min([x._strict_date(lean) for x in self.objects]) + if any(isinstance(d, float) and d == float('-inf') for d in strict_dates): + return float('-inf') + else: + return min((d for d in strict_dates if not isinstance(d, float)), default=float('-inf')) class MultipleDates(EDTFObject): diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 4043988..877fd0b 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -1,19 +1,16 @@ -import unittest -import sys +import pytest from datetime import date from time import struct_time from edtf.parser.grammar import parse_edtf as parse -from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, \ - TIME_EMPTY_EXTRAS +from edtf.parser.parser_classes import EDTFObject, TIME_EMPTY_TIME, TIME_EMPTY_EXTRAS from edtf.parser.edtf_exceptions import EDTFParseException -# Example object types and attributes. -# the first item in each tuple is the input EDTF string, and expected parse result. -# where the first value is a tuple, the second item is the normalised parse result. +# Example object types and attributes represented as tuples. +# The first item in each tuple is the input EDTF string, and expected parse result. +# where the first value is a tuple, the second item is a tuple of the normalised parse result. # -# The rest of the values in each tuple indicate the iso versions of the derived -# Python ``date``s. +# The values in the second tuple indicate the iso versions of the derived Python `date`s. # - If there's one other value, all the derived dates should be the same. # - If there're two other values, then all the lower values should be the same # and all the upper values should be the same. @@ -26,176 +23,171 @@ EXAMPLES = ( # ******************************* LEVEL 0 ********************************* # year, month, day - ('2001-02-03', '2001-02-03'), + ('2001-02-03', ('2001-02-03',)), # year, month - ('2008-12', '2008-12-01', '2008-12-31'), + ('2008-12', ('2008-12-01', '2008-12-31')), # year - ('2008', '2008-01-01', '2008-12-31'), + ('2008', ('2008-01-01', '2008-12-31')), # a negative year - ('-0999', '-0999-01-01', '-0999-12-31'), + ('-0999', ('-0999-01-01', '-0999-12-31')), # year zero - ('0000', '0000-01-01', '0000-12-31'), + ('0000', ('0000-01-01', '0000-12-31')), # DateTimes - ('2001-02-03T09:30:01', '2001-02-03'), - ('2004-01-01T10:10:10Z', '2004-01-01'), - ('2004-01-01T10:10:10+05:00', '2004-01-01'), - ('1985-04-12T23:20:30', '1985-04-12'), + ('2001-02-03T09:30:01', ('2001-02-03',)), + ('2004-01-01T10:10:10Z', ('2004-01-01',)), + ('2004-01-01T10:10:10+05:00', ('2004-01-01',)), + ('1985-04-12T23:20:30', ('1985-04-12',)), + # Intervals # An interval beginning sometime in 1964 and ending sometime in 2008. Year precision. - ('1964/2008', '1964-01-01', '2008-12-31'), + ('1964/2008', ('1964-01-01', '2008-12-31')), # An interval beginning sometime in June 2004 and ending sometime in August of 2006. Month precision. - ('2004-06/2006-08', '2004-06-01', '2006-08-31'), + ('2004-06/2006-08', ('2004-06-01', '2006-08-31')), # An interval beginning sometime on February 1, 2004 and ending sometime on February 8, 2005. Day precision. - ('2004-02-01/2005-02-08', '2004-02-01', '2005-02-08'), - # An interval beginning sometime on February 1, 2004 and ending sometime in February 2005. The precision of the interval is not defined; the start endpoint has day precision and the end endpoint has month precision. - ('2004-02-01/2005-02', '2004-02-01', '2005-02-28'), - # An interval beginning sometime on February 1, 2004 and ending sometime in 2005. The start endpoint has day precision and the end endpoint has year precision. - ('2004-02-01/2005', '2004-02-01', '2005-12-31'), + ('2004-02-01/2005-02-08', ('2004-02-01', '2005-02-08')), + # An interval beginning sometime on February 1, 2004 and ending sometime in February 2005. + # The precision of the interval is not defined; the start endpoint has day precision and the end endpoint has month precision. + ('2004-02-01/2005-02', ('2004-02-01', '2005-02-28')), + # An interval beginning sometime on February 1, 2004 and ending sometime in 2005. + # The start endpoint has day precision and the end endpoint has year precision. + ('2004-02-01/2005', ('2004-02-01', '2005-12-31')), # An interval beginning sometime in 2005 and ending sometime in February 2006. - ('2005/2006-02', '2005-01-01', '2006-02-28'), + ('2005/2006-02', ('2005-01-01', '2006-02-28')), # An interval beginning sometime in -2005 and ending sometime in February -2004. - ('-2005/-1999-02', '-2005-01-01', '-1999-02-28'), + ('-2005/-1999-02', ('-2005-01-01', '-1999-02-28')), # ******************************* LEVEL 1 ********************************* - # Uncertain/Approximate + # Uncertain/Approximate # uncertain: possibly the year 1984, but not definitely - ('1984?', '1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31'), - ('2004-06-11?', '2004-06-11', '2004-06-11', '2004-06-10', '2004-06-12'), - ('2004-06?', '2004-06-01', '2004-06-30', '2004-05-01', '2004-07-30'), + ('1984?', ('1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31')), + ('2004-06-11?', ('2004-06-11', '2004-06-11', '2004-06-10', '2004-06-12')), + ('2004-06?', ('2004-06-01', '2004-06-30', '2004-05-01', '2004-07-30')), # "approximately" the year 1984 - ('1984~', '1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31'), + ('1984~', ('1984-01-01', '1984-12-31', '1983-01-01', '1985-12-31')), # the year is approximately 1984 and even that is uncertain - ('1984%', '1984-01-01', '1984-12-31', '1982-01-01', '1986-12-31'), + ('1984%', ('1984-01-01', '1984-12-31', '1982-01-01', '1986-12-31')), # Unspecified # some unspecified year in the 1990s. - ('199X', '1990-01-01', '1999-12-31'), + ('199X', ('1990-01-01', '1999-12-31')), # some unspecified year in the 1900s. - ('19XX', '1900-01-01', '1999-12-31'), + ('19XX', ('1900-01-01', '1999-12-31')), # some month in 1999 - ('1999-XX', '1999-01-01', '1999-12-31'), + ('1999-XX', ('1999-01-01', '1999-12-31')), # some day in January 1999 - ('1999-01-XX', '1999-01-01', '1999-01-31'), + ('1999-01-XX', ('1999-01-01', '1999-01-31')), # some day in 1999 - ('1999-XX-XX', '1999-01-01', '1999-12-31'), + ('1999-XX-XX', ('1999-01-01', '1999-12-31')), # Uncertain/Approximate lower boundary dates (BCE) - ('-0275~', '-0275-01-01', '-0275-12-31', '-0276-01-01', '-0274-12-31'), - ('-0001~', '-0001-01-01', '-0001-12-31', '-0002-01-01', '0000-12-31'), - ('0000~', '0000-01-01', '0000-12-31', '-0001-01-01', '0001-12-31'), + ('-0275~', ('-0275-01-01', '-0275-12-31', '-0276-01-01', '-0274-12-31')), + ('-0001~', ('-0001-01-01', '-0001-12-31', '-0002-01-01', '0000-12-31')), + ('0000~', ('0000-01-01', '0000-12-31', '-0001-01-01', '0001-12-31')), # L1 Extended Interval # beginning unknown, end 2006 - ('/2006', '1996-12-31', '2006-12-31'), + ('/2006', ('1996-12-31', '2006-12-31')), # beginning June 1, 2004, end unknown - ('2004-06-01/', '2004-06-01', '2014-06-01'), + ('2004-06-01/', ('2004-06-01', '2014-06-01')), # beginning open, end 2006 - ('../2006', '-20000000-01-01', '2006-12-31'), - # beginning January 1 2004 with no end date - ('2004-01-01/..', '2004-01-01', '20000000-12-31'), + ('../2006', ('-inf', '2006-12-31')), + # beginning January 1, 2004 with no end date + ('2004-01-01/..', ('2004-01-01', 'inf')), # interval beginning approximately 1984 and ending June 2004 - ('1984~/2004-06', '1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30'), + ('1984~/2004-06', ('1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30')), # interval beginning 1984 and ending approximately June 2004 - ('1984/2004-06~', '1984-01-01', '2004-06-30', '1984-01-01', '2004-07-30'), - ('1984?/2004%', '1984-01-01', '2004-12-31', '1983-01-01', '2006-12-31'), - ('1984~/2004~', '1984-01-01', '2004-12-31', '1983-01-01', '2005-12-31'), + ('1984/2004-06~', ('1984-01-01', '2004-06-30', '1984-01-01', '2004-07-30')), + ('1984?/2004%', ('1984-01-01', '2004-12-31', '1983-01-01', '2006-12-31')), + ('1984~/2004~', ('1984-01-01', '2004-12-31', '1983-01-01', '2005-12-31')), # interval whose beginning is uncertain but thought to be 1984, and whose end is uncertain and approximate but thought to be 2004 - ('1984-06?/2004-08?', '1984-06-01', '2004-08-31', '1984-05-01', '2004-09-30'), - ('1984-06-02?/2004-08-08~', '1984-06-02', '2004-08-08', '1984-06-01', '2004-08-09'), - ('1984-06-02?/', '1984-06-02', '1994-06-02', '1984-06-01', '1994-06-02'), + ('1984-06?/2004-08?', ('1984-06-01', '2004-08-31', '1984-05-01', '2004-09-30')), + ('1984-06-02?/2004-08-08~', ('1984-06-02', '2004-08-08', '1984-06-01', '2004-08-09')), + ('1984-06-02?/', ('1984-06-02', '1994-06-02', '1984-06-01', '1994-06-02')), # Year exceeding 4 digits - # the year 170000002 - ('Y170000002', '170000002-01-01', '170000002-12-31'), - # the year -170000002 - ('Y-170000002', '-170000002-01-01', '-170000002-12-31'), + ('Y170000002', ('170000002-01-01', '170000002-12-31')), + ('Y-170000002', ('-170000002-01-01', '-170000002-12-31')), # Seasons - # Spring, 2001 - ('2001-21', '2001-03-01', '2001-05-31'), - # Summer, 2003 - ('2003-22', '2003-06-01', '2003-08-31'), - # Autumn, 2000 - ('2000-23', '2000-09-01', '2000-11-30'), - # Winter, 2010 - ('2010-24', '2010-12-01', '2010-12-31'), + ('2001-21', ('2001-03-01', '2001-05-31')), + ('2003-22', ('2003-06-01', '2003-08-31')), + ('2000-23', ('2000-09-01', '2000-11-30')), + ('2010-24', ('2010-12-01', '2010-12-31')), # ******************************* LEVEL 2 ********************************* - - # Partial Uncertain/ Approximate + # Partial Uncertain/Approximate # uncertain year; month, day known - ('2004?-06-11', '2004-06-11', '2003-06-11', '2005-06-11'), + ('2004?-06-11', ('2004-06-11', '2003-06-11', '2005-06-11')), # year and month are approximate; day known - ('2004-06~-11', '2004-06-11', '2003-05-11', '2005-07-11'), + ('2004-06~-11', ('2004-06-11', '2003-05-11', '2005-07-11')), # uncertain month, year and day known - ('2004-?06-11', '2004-06-11', '2004-05-11', '2004-07-11'), + ('2004-?06-11', ('2004-06-11', '2004-05-11', '2004-07-11')), # day is approximate; year, month known - ('2004-06-~11', '2004-06-11', '2004-06-10', '2004-06-12'), + ('2004-06-~11', ('2004-06-11', '2004-06-10', '2004-06-12')), # Year known, month within year is approximate and uncertain - NEW SPEC - ('2004-%06', '2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30'), + ('2004-%06', ('2004-06-01', '2004-06-30', '2004-04-01', '2004-08-30')), # Year known, month and day uncertain - NEW SPEC - ('2004-?06-?11', '2004-06-11', '2004-05-10', '2004-07-12'), + ('2004-?06-?11', ('2004-06-11', '2004-05-10', '2004-07-12')), # Year uncertain, month known, day approximate - NEW SPEC - ('2004?-06-~11', '2004-06-11', '2003-06-10', '2005-06-12'), + ('2004?-06-~11', ('2004-06-11', '2003-06-10', '2005-06-12')), # Year uncertain and month is both uncertain and approximate - NEW SPEC - ('?2004-%06', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), + ('?2004-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), # This has the same meaning as the previous example.- NEW SPEC - ('2004?-%06', '2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30'), + ('2004?-%06', ('2004-06-01', '2004-06-30', '2003-04-01', '2005-08-30')), # Year uncertain, month and day approximate. - NEW SPEC - ('2004?-~06-~04','2004-06-04', '2003-05-03', '2005-07-05'), - # what about that? - #('2004?-06-04~','2004-06-04', '2003-05-03', '2005-07-05'), + ('2004?-~06-~04', ('2004-06-04', '2003-05-03', '2005-07-05')), # Year known, month and day approximate. - NEW SPEC - ('2011-~06-~04', '2011-06-04', '2011-05-03', '2011-07-05'), - # Approximate season (around Autumn 2011) - #('2011-23~', '2011-09-01', '2011-11-30', '2011-06-09', '2012-02-22'), - # Years wrapping - #('2011-24~', '2011-12-01', '2011-12-31', '2011-09-08', '2012-03-24'), + ('2011-~06-~04', ('2011-06-04', '2011-05-03', '2011-07-05')), # Partial unspecified # December 25 sometime during the 1560s - ('156X-12-25', '1560-12-25', '1569-12-25'), + ('156X-12-25', ('1560-12-25', '1569-12-25')), # December 25 sometime during the 1500s - ('15XX-12-25', '1500-12-25', '1599-12-25'), + ('15XX-12-25', ('1500-12-25', '1599-12-25')), # Year and day of month specified, month unspecified - ('1560-XX-25', '1560-01-25', '1560-12-25'), - ('15XX-12-XX', '1500-12-01', '1599-12-31'), + ('1560-XX-25', ('1560-01-25', '1560-12-25')), + ('15XX-12-XX', ('1500-12-01', '1599-12-31')), # Day specified, year and month unspecified - ('XXXX-XX-23', '0000-01-23', '9999-12-23'), + ('XXXX-XX-23', ('0000-01-23', '9999-12-23')), + # One of a Set # One of the years 1667, 1668, 1670, 1671, 1672 - (('[1667,1668, 1670..1672]', '[1667, 1668, 1670..1672]'), '1667-01-01', '1672-12-31'), + ('[1667, 1668, 1670..1672]', ('1667-01-01', '1672-12-31')), # December 3, 1760 or some earlier date - ('[..1760-12-03]', '-20000000-01-01', '1760-12-03'), + ('[..1760-12-03]', ('-inf', '1760-12-03')), # December 1760 or some later month - ('[1760-12..]', '1760-12-01', '20000000-12-31'), + ('[1760-12..]', ('1760-12-01', 'inf')), # January or February of 1760 or December 1760 or some later month - ('[1760-01, 1760-02, 1760-12..]', '1760-01-01', '20000000-12-31'), + # This test is failing due to a code issue: + # TypeError: '>' not supported between instances of 'float' and 'time.struct_time' + ('[1760-01, 1760-02, 1760-12..]', ('1760-01-01', 'inf')), #TODO fix in parser_classes # Either the year 1667 or the month December of 1760. - ('[1667, 1760-12]', '1667-01-01', '1760-12-31'), + ('[1667, 1760-12]', ('1667-01-01', '1760-12-31')), # Multiple Dates # All of the years 1667, 1668, 1670, 1671, 1672 - (('{1667,1668, 1670..1672}', '{1667, 1668, 1670..1672}'), '1667-01-01', '1672-12-31'), + ('{1667,1668, 1670..1672}', ('1667-01-01', '1672-12-31')), # The year 1960 and the month December of 1961. - ('{1960, 1961-12}', '1960-01-01', '1961-12-31'), + ('{1960, 1961-12}', ('1960-01-01', '1961-12-31')), + # Masked Precision --> eliminated # A date during the 1960s #('196x', '1960-01-01', '1969-12-31'), # A date during the 1900s #('19xx', '1900-01-01', '1999-12-31'), - # L2 Extended Interval - ('2004-06-~01/2004-06-~20', '2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21'), + # L2 Extended Interval + # Interval with fuzzy day endpoints in June 2004 + ('2004-06-~01/2004-06-~20', ('2004-06-01', '2004-06-20', '2004-05-31', '2004-06-21')), # The interval began on an unspecified day in June 2004. - ('2004-06-XX/2004-07-03', '2004-06-01', '2004-07-03'), + ('2004-06-XX/2004-07-03', ('2004-06-01', '2004-07-03')), # Year Requiring More than Four Digits - Exponential Form # the year 170000000 - ('Y17E7', '170000000-01-01', '170000000-12-31'), + ('Y17E7', ('170000000-01-01', '170000000-12-31')), # the year -170000000 - ('Y-17E7', '-170000000-01-01', '-170000000-12-31'), + ('Y-17E7', ('-170000000-01-01', '-170000000-12-31')), # Some year between 171010000 and 171999999, estimated to be 171010000 ('S3' indicates a precision of 3 significant digits.) # TODO Not yet implemented, see https://github.com/ixc/python-edtf/issues/12 - # ('Y17101E4S3', '171010000-01-01', '171999999-12-31'), + # ('Y17101E4S3', ('171010000-01-01', '171999999-12-31')), # L2 Seasons - # Spring southern, 2001 - ('2001-29', '2001-09-01', '2001-11-30'), + # Spring southern hemisphere, 2001 + ('2001-29', ('2001-09-01', '2001-11-30')), # second quarter of 2001 - ('2001-34', '2001-04-01', '2001-06-30'), + ('2001-34', ('2001-04-01', '2001-06-30')), ) BAD_EXAMPLES = ( @@ -218,137 +210,83 @@ '2004-06-(01)~/2004-06-(20)~', # An interval in June 2004 beginning approximately the first and ending approximately the 20th - OLD SPEC ) +def iso_to_struct_time(iso_date): + """ Convert YYYY-mm-dd date strings or infinities to time structs or float infinities. """ + if iso_date == 'inf': + return float('inf') + elif iso_date == '-inf': + return float('-inf') -class TestParsing(unittest.TestCase): - def test_non_parsing(self): - for i in BAD_EXAMPLES: - self.assertRaises(EDTFParseException, parse, i) - - def testInterval(self): - #expression = ('1984~/2004-06', '1984-01-01', '2004-06-30', '1983-01-01', '2004-06-30') - #expression = ('/2006', '1996-01-01', '2006-12-31') - #expression = ('../2006', '0001-01-01', '2006-12-31') - expression = ('../-2006', '-20000000-01-01', '-2006-12-31') - #expression = ('2006/', '2006-01-01', '9999-12-31') - i = expression[0] - expected_lower_strict = expression[1] - expected_upper_strict = expression[2] - - def iso_to_struct_time(iso_date): - """ Convert YYYY-mm-dd date strings to time structs """ - if iso_date[0] == '-': - is_negative = True - iso_date = iso_date[1:] - else: - is_negative = False - y, mo, d = [int(i) for i in iso_date.split('-')] - if is_negative: - y *= -1 - return struct_time( - [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - - # Convert string date representations into `struct_time`s - expected_lower_strict = iso_to_struct_time(expected_lower_strict) - expected_upper_strict = iso_to_struct_time(expected_upper_strict) - - f = parse(i) - print(str(f.lower_strict()) + '/' + str(f.upper_strict())) - self.assertEqual(f.lower_strict(), expected_lower_strict) - self.assertEqual(f.upper_strict(), expected_upper_strict) - - - def test_date_values(self): - """ - Test that everY EDTFObject can tell you its lower and upper - fuzzy and strict dates, and that they're what we think they should be. - """ - - for e in EXAMPLES: - i = e[0] - if isinstance(i, tuple): - i, o = i - else: - o = i - - sys.stdout.write("parsing '%s'" % i) - f = parse(i) - sys.stdout.write(" => %s()\n" % type(f).__name__) - self.assertIsInstance(f, EDTFObject) - self.assertEqual(str(f), o) + if iso_date[0] == '-': + is_negative = True + iso_date = iso_date[1:] + else: + is_negative = False + y, mo, d = [int(i) for i in iso_date.split('-')] + if is_negative: + y *= -1 + return struct_time([y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) - if len(e) == 5: - expected_lower_strict = e[1] - expected_upper_strict = e[2] - expected_lower_fuzzy = e[3] - expected_upper_fuzzy = e[4] - elif len(e) == 4: - expected_lower_strict = e[1] - expected_upper_strict = e[1] - expected_lower_fuzzy = e[2] - expected_upper_fuzzy = e[3] - elif len(e) == 3: - expected_lower_strict = e[1] - expected_upper_strict = e[2] - expected_lower_fuzzy = e[1] - expected_upper_fuzzy = e[2] - elif len(e) == 2: - expected_lower_strict = e[1] - expected_upper_strict = e[1] - expected_lower_fuzzy = e[1] - expected_upper_fuzzy = e[1] - if len(e) == 1: - continue - def iso_to_struct_time(iso_date): - """ Convert YYYY-mm-dd date strings to time structs """ - if iso_date[0] == '-': - is_negative = True - iso_date = iso_date[1:] - else: - is_negative = False - y, mo, d = [int(i) for i in iso_date.split('-')] - if is_negative: - y *= -1 - return struct_time( - [y, mo, d] + TIME_EMPTY_TIME + TIME_EMPTY_EXTRAS) +@pytest.mark.parametrize("test_input,expected_tuple", EXAMPLES) +def test_edtf_examples(test_input, expected_tuple): + """ Test parsing of EDTF strings with expected outputs. """ + result = parse(test_input) + assert isinstance(result, EDTFObject), "Result should be an instance of EDTFObject" - # Convert string date representations into `struct_time`s - expected_lower_strict = iso_to_struct_time(expected_lower_strict) - expected_upper_strict = iso_to_struct_time(expected_upper_strict) - expected_lower_fuzzy = iso_to_struct_time(expected_lower_fuzzy) - expected_upper_fuzzy = iso_to_struct_time(expected_upper_fuzzy) + # Extract only the date part if the result includes a time. + result_date = str(result) + if 'T' in result_date: + result_date = result_date.split('T')[0] - try: - self.assertEqual(f.lower_strict(), expected_lower_strict) - self.assertEqual(f.upper_strict(), expected_upper_strict) - self.assertEqual(f.lower_fuzzy(), expected_lower_fuzzy) - self.assertEqual(f.upper_fuzzy(), expected_upper_fuzzy) - except Exception as x: - # Write to stdout for manual debugging, I guess - sys.stdout.write(str(x)) - # Re-raise exception so unit tests work for non-manual usage - raise + # Unpack expected results based on their count + if len(expected_tuple) == 1: + assert result_date == expected_tuple[0], f"Expected {expected_tuple[0]}, got {result_date}" + elif len(expected_tuple) == 2: + lower_strict = iso_to_struct_time(expected_tuple[0]) + upper_strict = iso_to_struct_time(expected_tuple[1]) + assert result.lower_strict() == lower_strict, "Lower strict date does not match" + assert result.upper_strict() == upper_strict, "Upper strict date does not match" + elif len(expected_tuple) == 3: + strict_date = iso_to_struct_time(expected_tuple[0]) + lower_fuzzy = iso_to_struct_time(expected_tuple[1]) + upper_fuzzy = iso_to_struct_time(expected_tuple[2]) + assert result.lower_strict() == strict_date, "Lower strict date does not match" + assert result.upper_strict() == strict_date, "Upper strict date does not match" + assert result.lower_fuzzy() == lower_fuzzy, "Lower fuzzy date does not match" + assert result.upper_fuzzy() == upper_fuzzy, "Upper fuzzy date does not match" + elif len(expected_tuple) == 4: + lower_strict = iso_to_struct_time(expected_tuple[0]) + upper_strict = iso_to_struct_time(expected_tuple[1]) + lower_fuzzy = iso_to_struct_time(expected_tuple[2]) + upper_fuzzy = iso_to_struct_time(expected_tuple[3]) + assert result.lower_strict() == lower_strict, "Lower strict date does not match" + assert result.upper_strict() == upper_strict, "Upper strict date does not match" + assert result.lower_fuzzy() == lower_fuzzy, "Lower fuzzy date does not match" + assert result.upper_fuzzy() == upper_fuzzy, "Upper fuzzy date does not match" - def test_comparisons(self): - d1 = parse("1979-08~") - d2 = parse("1979-08~") - d3 = parse("1979-09-16") - d4 = parse("1979-08-16") - d5 = date(1979, 8, 16) - d6 = date(1970, 9, 16) - self.assertEqual(d1, d2) - self.assertNotEqual(d1, d3) - self.assertTrue(d1 >= d2) - self.assertTrue(d2 >= d1) - self.assertTrue(d3 > d1) - self.assertTrue(d1 < d4) +@pytest.mark.parametrize("bad_input", BAD_EXAMPLES) +def test_non_parsing(bad_input): + """ Test that non-parsing inputs correctly raise an exception. """ + with pytest.raises(EDTFParseException): + parse(bad_input) - # with python dates (EDTFFormat must be first operand) - self.assertEqual(d4, d5) - self.assertTrue(d1 < d5) - self.assertTrue(d1 > d6) +def test_comparisons(): + """ Test comparisons between parsed EDTF objects and standard dates. """ + d1 = parse("1979-08~") + d2 = parse("1979-08~") + d3 = parse("1979-09-16") + d4 = parse("1979-08-16") + d5 = date(1979, 8, 16) + d6 = date(1970, 9, 16) -if __name__ == '__main__': - unittest.main() + assert d1 == d2 + assert d1 != d3 + assert d1 >= d2 + assert d3 > d1 + assert d1 < d4 + assert d4 == d5 + assert d1 < d5 + assert d1 > d6 diff --git a/edtf/tests.py b/edtf/tests.py index 0e49e67..f5ef655 100644 --- a/edtf/tests.py +++ b/edtf/tests.py @@ -1,134 +1,84 @@ -import unittest - from time import struct_time from datetime import datetime, date from edtf import convert - -class TestConversions(unittest.TestCase): - - def test_dt_to_struct_time_for_datetime(self): - now = datetime.now() - st = convert.dt_to_struct_time(now) - # Check equal year, month, day, hours, minutes, seconds - self.assertEqual(st[:6], now.timetuple()[:6]) - # Confirm 'extra' fields are set to defaults - self.assertEqual(st[6:], (0, 0, -1)) - - def test_dt_to_struct_time_for_date(self): - today = date.today() - st = convert.dt_to_struct_time(today) - # Check equal year, month, day - self.assertEqual(st[:3], today.timetuple()[:3]) - # Confirm time fields are zeroed - self.assertEqual(st[3:6], (0, 0, 0)) - # Confirm 'extra' fields are set to defaults - self.assertEqual(st[6:], (0, 0, -1)) - - def test_struct_time_to_date(self): - st = struct_time( - [2018, 4, 19] + convert.TIME_EMPTY_TIME + convert.TIME_EMPTY_EXTRAS) - d = date(*st[:3]) - self.assertEqual(d, convert.struct_time_to_date(st)) - - def test_struct_time_to_datetime(self): - st = struct_time( - [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - dt = datetime(*st[:6]) - converted_dt = convert.struct_time_to_datetime(st) - self.assertEqual(dt, converted_dt) - # Note that 'extra' fields are auto-populated by `datetime` module - self.assertEqual(converted_dt.timetuple()[6:], (3, 109, -1)) - - def test_trim_struct_time(self): - now = datetime.now() - st = now.timetuple() - trimmed_st = convert.trim_struct_time(st) - # Confirm trimmed `struct_time` has expected date/time values - self.assertEqual( - trimmed_st[:6], - (now.year, now.month, now.day, now.hour, now.minute, now.second) - ) - # Confirm 'extra' fields are set to defaults - self.assertEqual(trimmed_st[6:], (0, 0, -1)) - # Confirm 'extra' fields in untrimmed `struct_time` has real values - self.assertNotEqual(st[6:], (0, 0, -1)) - - def test_struct_time_to_jd(self): - # Check conversion of AD date & time to Julian Date number - st_ad = struct_time( - [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - jd_ad = 2458227.9263194446 - self.assertEqual(jd_ad, convert.struct_time_to_jd(st_ad)) - # Check conversion of BC date & time to Julian Date number - st_bc = struct_time( - [-2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - jd_bc = 984091.9263194444 - self.assertEqual(jd_bc, convert.struct_time_to_jd(st_bc)) - - def test_jd_to_struct_time(self): - # Check conversion of Julian Date number to AD date & time - jd_ad = 2458227.9263194446 # As in `test_struct_time_to_jd` - st_ad = struct_time( - [2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) - self.assertEqual(st_ad, convert.jd_to_struct_time(jd_ad)) - # Check conversion of Julian Date number to BC date & time - # WARNING: Converted time is off by 1 second, 53 not 54 - jd_bc = 984091.9263194444 # As in `test_struct_time_to_jd` - st_bc = struct_time( - [-2018, 4, 19] + [10, 13, 54 - 1] + convert.TIME_EMPTY_EXTRAS) - self.assertEqual(st_bc, convert.jd_to_struct_time(jd_bc)) - - def test_jd_round_trip_for_extreme_future(self): - original_st = struct_time( - [999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) - jd = convert.struct_time_to_jd(original_st) - converted_st = convert.jd_to_struct_time(jd) - # Confirm that year, month, day, hour, minute are correct (not second) - self.assertEqual(original_st[:5], converted_st[:5]) - # WARNING: Seconds are off by 1, should be 3 but is 2 - self.assertEqual(3 - 1, converted_st[5]) - - def test_jd_round_trip_for_extreme_past(self): - original_st = struct_time( - [-999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) - converted_st = convert.jd_to_struct_time( - convert.struct_time_to_jd(original_st)) - # WARNING: We have lost a year of accuracy - self.assertEqual( - (-999999 + 1, # Year off by 1 - 8, 4, 21, 15, 3, 0, 0, -1), - tuple(converted_st)) - - def test_jd_round_trip_for_zero_year_aka_1_bc(self): - original_st = struct_time( - [0, 9, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) - converted_st = convert.jd_to_struct_time( - convert.struct_time_to_jd(original_st)) - self.assertEqual( - (0, 9, 5, 4, 58, 59, 0, 0, -1), - tuple(converted_st)) - - def test_jd_round_trip_for_2_bc(self): - original_st = struct_time( - [-1, 12, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) - converted_st = convert.jd_to_struct_time( - convert.struct_time_to_jd(original_st)) - self.assertEqual( - (-1, 12, 5, 4, 58, 59, 0, 0, -1), - tuple(converted_st)) - - def test_roll_negative_time_fields(self): - # Confirm time value is adjusted as expected - year = -100 - month = -17 # More than 1 year - day = -34 # More than 1 month - hour = -25 # More than 1 day - minute = -74 # More than 1 hour - second = -253 # More than 1 minute - self.assertEqual( - (-102, 5, 24, 21, 41, 47), - convert._roll_negative_time_fields( - year, month, day, hour, minute, second) - ) +def test_dt_to_struct_time_for_datetime(): + now = datetime.now() + st = convert.dt_to_struct_time(now) + assert st[:6] == now.timetuple()[:6] + assert st[6:] == (0, 0, -1) + +def test_dt_to_struct_time_for_date(): + today = date.today() + st = convert.dt_to_struct_time(today) + assert st[:3] == today.timetuple()[:3] + assert st[3:6] == (0, 0, 0) + assert st[6:] == (0, 0, -1) + +def test_struct_time_to_date(): + st = struct_time([2018, 4, 19] + convert.TIME_EMPTY_TIME + convert.TIME_EMPTY_EXTRAS) + d = date(*st[:3]) + assert d == convert.struct_time_to_date(st) + +def test_struct_time_to_datetime(): + st = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + dt = datetime(*st[:6]) + converted_dt = convert.struct_time_to_datetime(st) + assert dt == converted_dt + assert converted_dt.timetuple()[6:] == (3, 109, -1) + +def test_trim_struct_time(): + now = datetime.now() + st = now.timetuple() + trimmed_st = convert.trim_struct_time(st) + assert trimmed_st[:6] == (now.year, now.month, now.day, now.hour, now.minute, now.second) + assert trimmed_st[6:] == (0, 0, -1) + assert st[6:] != (0, 0, -1) + +def test_struct_time_to_jd(): + st_ad = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + jd_ad = 2458227.9263194446 + assert jd_ad == convert.struct_time_to_jd(st_ad) + st_bc = struct_time([-2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + jd_bc = 984091.9263194444 + assert jd_bc == convert.struct_time_to_jd(st_bc) + +def test_jd_to_struct_time(): + jd_ad = 2458227.9263194446 + st_ad = struct_time([2018, 4, 19] + [10, 13, 54] + convert.TIME_EMPTY_EXTRAS) + assert st_ad == convert.jd_to_struct_time(jd_ad) + jd_bc = 984091.9263194444 + st_bc = struct_time([-2018, 4, 19] + [10, 13, 54 - 1] + convert.TIME_EMPTY_EXTRAS) + assert st_bc == convert.jd_to_struct_time(jd_bc) + +def test_jd_round_trip_for_extreme_future(): + original_st = struct_time([999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) + jd = convert.struct_time_to_jd(original_st) + converted_st = convert.jd_to_struct_time(jd) + assert original_st[:5] == converted_st[:5] + assert 3 - 1 == converted_st[5] + +def test_jd_round_trip_for_extreme_past(): + original_st = struct_time([-999999, 8, 4] + [21, 15, 3] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) + assert (-999999 + 1, 8, 4, 21, 15, 3, 0, 0, -1) == tuple(converted_st) + +def test_jd_round_trip_for_zero_year_aka_1_bc(): + original_st = struct_time([0, 9, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) + assert (0, 9, 5, 4, 58, 59, 0, 0, -1) == tuple(converted_st) + +def test_jd_round_trip_for_2_bc(): + original_st = struct_time([-1, 12, 5] + [4, 58, 59] + convert.TIME_EMPTY_EXTRAS) + converted_st = convert.jd_to_struct_time(convert.struct_time_to_jd(original_st)) + assert (-1, 12, 5, 4, 58, 59, 0, 0, -1) == tuple(converted_st) + +def test_roll_negative_time_fields(): + year = -100 + month = -17 + day = -34 + hour = -25 + minute = -74 + second = -253 + assert (-102, 5, 24, 21, 41, 47) == convert._roll_negative_time_fields(year, month, day, hour, minute, second) diff --git a/edtf_django_tests/edtf_django_tests/__init__.py b/edtf_django_tests/edtf_django_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/edtf_django_tests/edtf_django_tests/asgi.py b/edtf_django_tests/edtf_django_tests/asgi.py new file mode 100644 index 0000000..b62c5f5 --- /dev/null +++ b/edtf_django_tests/edtf_django_tests/asgi.py @@ -0,0 +1,16 @@ +""" +ASGI config for edtf_django_tests project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/4.2/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "edtf_django_tests.settings") + +application = get_asgi_application() diff --git a/edtf_django_tests/edtf_django_tests/settings.py b/edtf_django_tests/edtf_django_tests/settings.py new file mode 100644 index 0000000..a8121e3 --- /dev/null +++ b/edtf_django_tests/edtf_django_tests/settings.py @@ -0,0 +1,124 @@ +""" +Django settings for edtf_django_tests project. + +Generated by 'django-admin startproject' using Django 4.2.7. + +For more information on this file, see +https://docs.djangoproject.com/en/4.2/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/4.2/ref/settings/ +""" + +from pathlib import Path + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = "django-insecure-zkd&%e=di9d(p@wq7vnstn+4dx7cxbxkve�*+57sks0q$=0a" + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = True + +ALLOWED_HOSTS = [] + + +# Application definition + +INSTALLED_APPS = [ + "django.contrib.admin", + "django.contrib.auth", + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.staticfiles", + "edtf_integration", +] + +MIDDLEWARE = [ + "django.middleware.security.SecurityMiddleware", + "django.contrib.sessions.middleware.SessionMiddleware", + "django.middleware.common.CommonMiddleware", + "django.middleware.csrf.CsrfViewMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + "django.middleware.clickjacking.XFrameOptionsMiddleware", +] + +ROOT_URLCONF = "edtf_django_tests.urls" + +TEMPLATES = [ + { + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": [], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", + ], + }, + }, +] + +WSGI_APPLICATION = "edtf_django_tests.wsgi.application" + + +# Database +# https://docs.djangoproject.com/en/4.2/ref/settings/#databases + +DATABASES = { + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": BASE_DIR / "db.sqlite3", + } +} + + +# Password validation +# https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", + }, +] + + +# Internationalization +# https://docs.djangoproject.com/en/4.2/topics/i18n/ + +LANGUAGE_CODE = "en-us" + +TIME_ZONE = "UTC" + +USE_I18N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/4.2/howto/static-files/ + +STATIC_URL = "static/" + +# Default primary key field type +# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" diff --git a/edtf_django_tests/edtf_django_tests/urls.py b/edtf_django_tests/edtf_django_tests/urls.py new file mode 100644 index 0000000..ceca78b --- /dev/null +++ b/edtf_django_tests/edtf_django_tests/urls.py @@ -0,0 +1,22 @@ +""" +URL configuration for edtf_django_tests project. + +The `urlpatterns` list routes URLs to views. For more information please see: + https://docs.djangoproject.com/en/4.2/topics/http/urls/ +Examples: +Function views + 1. Add an import: from my_app import views + 2. Add a URL to urlpatterns: path('', views.home, name='home') +Class-based views + 1. Add an import: from other_app.views import Home + 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') +Including another URLconf + 1. Import the include() function: from django.urls import include, path + 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) +""" +from django.contrib import admin +from django.urls import path + +urlpatterns = [ + path("admin/", admin.site.urls), +] diff --git a/edtf_django_tests/edtf_django_tests/wsgi.py b/edtf_django_tests/edtf_django_tests/wsgi.py new file mode 100644 index 0000000..20450c1 --- /dev/null +++ b/edtf_django_tests/edtf_django_tests/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for edtf_django_tests project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/4.2/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "edtf_django_tests.settings") + +application = get_wsgi_application() diff --git a/edtf_django_tests/edtf_integration/__init__.py b/edtf_django_tests/edtf_integration/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/edtf_django_tests/edtf_integration/admin.py b/edtf_django_tests/edtf_integration/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/edtf_django_tests/edtf_integration/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/edtf_django_tests/edtf_integration/apps.py b/edtf_django_tests/edtf_integration/apps.py new file mode 100644 index 0000000..23bc09d --- /dev/null +++ b/edtf_django_tests/edtf_integration/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class EdtfIntegrationConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "edtf_integration" diff --git a/edtf_django_tests/edtf_integration/migrations/0001_initial.py b/edtf_django_tests/edtf_integration/migrations/0001_initial.py new file mode 100644 index 0000000..286a9de --- /dev/null +++ b/edtf_django_tests/edtf_integration/migrations/0001_initial.py @@ -0,0 +1,64 @@ +# Generated by Django 4.2.13 on 2024-05-09 18:13 + +from django.db import migrations, models +import edtf.fields + + +class Migration(migrations.Migration): + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="TestEvent", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "date_display", + models.CharField( + blank=True, + help_text="Enter the date in natural language format (e.g., 'Approximately June 2004').", + max_length=255, + null=True, + verbose_name="Date of creation (display)", + ), + ), + ( + "date_edtf_direct", + models.CharField( + blank=True, + help_text="Enter the date in EDTF format (e.g., '2004-06~').", + max_length=255, + null=True, + verbose_name="Date of creation (EDTF format)", + ), + ), + ( + "date_edtf", + edtf.fields.EDTFField( + blank=True, + lower_fuzzy_field="date_earliest", + lower_strict_field="date_sort_ascending", + natural_text_field="date_display", + null=True, + upper_fuzzy_field="date_latest", + upper_strict_field="date_sort_descending", + verbose_name="Date of creation (EDTF)", + ), + ), + ("date_earliest", models.FloatField(blank=True, null=True)), + ("date_latest", models.FloatField(blank=True, null=True)), + ("date_sort_ascending", models.FloatField(blank=True, null=True)), + ("date_sort_descending", models.FloatField(blank=True, null=True)), + ], + ), + ] diff --git a/edtf_django_tests/edtf_integration/migrations/__init__.py b/edtf_django_tests/edtf_integration/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/edtf_django_tests/edtf_integration/models.py b/edtf_django_tests/edtf_integration/models.py new file mode 100644 index 0000000..0274d5f --- /dev/null +++ b/edtf_django_tests/edtf_integration/models.py @@ -0,0 +1,45 @@ +from django.db import models +from edtf.fields import EDTFField + + +class TestEvent(models.Model): + date_display = models.CharField( + "Date of creation (display)", + blank=True, + null=True, + max_length=255, + help_text="Enter the date in natural language format (e.g., 'Approximately June 2004')." + ) + + date_edtf_direct = models.CharField( + "Date of creation (EDTF format)", + max_length=255, + blank=True, + null=True, + help_text="Enter the date in EDTF format (e.g., '2004-06~')." + ) + + # EDTF field that parses the input from either natural language or direct EDTF string + # natural_text_field is the field that stores the natural language input and is used for display + # direct_input_field stores an EDTF string + # TODO is there a need for both a natural text input and a label? + # TODO could consolidate the direct_input_field and natural_text_field into a single field, but would need + # a flag to indicate whether the input is natural language or EDTF as the natural language parser sometimes + # misparses an EDTF string as a natural language string (e.g. `2020-03-15/2020-04-15` -> `2020-03-15`) + date_edtf = EDTFField( + "Date of creation (EDTF)", + natural_text_field='date_display', + direct_input_field='date_edtf_direct', + lower_fuzzy_field='date_earliest', + upper_fuzzy_field='date_latest', + lower_strict_field='date_sort_ascending', + upper_strict_field='date_sort_descending', + blank=True, + null=True, + ) + # Computed fields for filtering + date_earliest = models.FloatField(blank=True, null=True) + date_latest = models.FloatField(blank=True, null=True) + # Computed fields for sorting + date_sort_ascending = models.FloatField(blank=True, null=True) + date_sort_descending = models.FloatField(blank=True, null=True) diff --git a/edtf_django_tests/edtf_integration/tests.py b/edtf_django_tests/edtf_integration/tests.py new file mode 100644 index 0000000..9385733 --- /dev/null +++ b/edtf_django_tests/edtf_integration/tests.py @@ -0,0 +1,87 @@ +from django.test import TestCase +from .models import TestEvent +from edtf.parser.grammar import parse_edtf as parse +from edtf.parser import EDTFObject +from edtf.convert import struct_time_to_jd + +class TestEventModelTests(TestCase): + def setUp(self): + # Create instances and assign them to instance variables + # date_edtf_direct is a valid EDTF string, date_display is a date + # to be parsed from natural language + self.event1 = TestEvent.objects.create(date_edtf_direct="2020-03-15/2020-04-15") + self.event2 = TestEvent.objects.create(date_edtf_direct="2021-05-06") + self.event3 = TestEvent.objects.create(date_edtf_direct="2019-11") + self.event4 = TestEvent.objects.create(date_display="Approximately August 2018") + self.event5 = TestEvent.objects.create(date_edtf_direct="2021-05-06") + + + def test_edtf_object_returned(self): + for event in TestEvent.objects.all(): + self.assertIsInstance(event.date_edtf, EDTFObject) + + + def test_sorting(self): + events = list(TestEvent.objects.order_by('date_sort_ascending')) + self.assertEqual(events[0].date_display, "Approximately August 2018") + self.assertEqual(events[1].date_edtf_direct, "2019-11") + self.assertEqual(events[2].date_edtf_direct, "2020-03-15/2020-04-15") + self.assertEqual(events[3].date_edtf_direct, "2021-05-06") + self.assertEqual(events[4].date_edtf_direct, "2021-05-06") + + events_desc = list(TestEvent.objects.order_by('-date_sort_descending')) + self.assertEqual(events_desc[0].date_edtf_direct, "2021-05-06") + self.assertEqual(events_desc[1].date_edtf_direct, "2021-05-06") + self.assertEqual(events_desc[2].date_edtf_direct, "2020-03-15/2020-04-15") + self.assertEqual(events_desc[3].date_edtf_direct, "2019-11") + self.assertEqual(events_desc[4].date_display, "Approximately August 2018") + + + def test_date_boundaries(self): + event = TestEvent.objects.get(date_edtf_direct="2020-03-15/2020-04-15") + expected_earliest_jd = struct_time_to_jd(parse("2020-03-15").lower_strict()) + expected_latest_jd = struct_time_to_jd(parse("2020-04-15").upper_strict()) + self.assertAlmostEqual(event.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event.date_latest, expected_latest_jd, places=1) + + expected_earliest_jd = struct_time_to_jd(parse("2021-05-06").lower_strict()) + expected_latest_jd = struct_time_to_jd(parse("2021-05-06").upper_strict()) + self.assertAlmostEqual(self.event2.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(self.event2.date_latest, expected_latest_jd, places=1) + + event3 = TestEvent.objects.get(date_edtf_direct="2019-11") + expected_earliest_jd = struct_time_to_jd(parse("2019-11").lower_strict()) + expected_latest_jd = struct_time_to_jd(parse("2019-11").upper_strict()) + self.assertAlmostEqual(event3.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event3.date_latest, expected_latest_jd, places=1) + + event4 = TestEvent.objects.get(date_display="Approximately August 2018") + expected_earliest_jd = struct_time_to_jd(parse("2018-08~").lower_fuzzy()) + expected_latest_jd = struct_time_to_jd(parse("2018-08~").upper_fuzzy()) + self.assertAlmostEqual(event4.date_earliest, expected_earliest_jd, places=1) + self.assertAlmostEqual(event4.date_latest, expected_latest_jd, places=1) + + def test_date_display(self): + """ + Test that the date_display field is correctly populated based on the EDTF input. + In the future, a more sophisticated natural language parser could be used to generate + a human readable date from the EDTF input. + """ + event = TestEvent.objects.get(date_edtf_direct="2020-03-15/2020-04-15") + self.assertEqual(event.date_display, "2020-03-15/2020-04-15") + self.assertEqual(self.event2.date_display, "2021-05-06") + self.assertEqual(self.event3.date_display, "2019-11") + self.assertEqual(self.event4.date_display, "Approximately August 2018") + + def test_comparison(self): + # test equality of the same dates + self.assertEqual(self.event2.date_edtf, self.event5.date_edtf, "Events with the same date should be equal") + + # test inequality of different dates + self.assertNotEqual(self.event1.date_edtf, self.event2.date_edtf, "Events with different dates should not be equal") + + # greater than + self.assertGreater(self.event2.date_edtf, self.event3.date_edtf, "2021-05-06 is greater than 2019-11") + + # less than + self.assertLess(self.event3.date_edtf, self.event2.date_edtf, "2019-11 is less than 2021-05-06") \ No newline at end of file diff --git a/edtf_django_tests/edtf_integration/views.py b/edtf_django_tests/edtf_integration/views.py new file mode 100644 index 0000000..91ea44a --- /dev/null +++ b/edtf_django_tests/edtf_integration/views.py @@ -0,0 +1,3 @@ +from django.shortcuts import render + +# Create your views here. diff --git a/edtf_django_tests/manage.py b/edtf_django_tests/manage.py new file mode 100755 index 0000000..b2d2a20 --- /dev/null +++ b/edtf_django_tests/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "edtf_django_tests.settings") + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 444298e..0b7a0ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ classifiers = [ [project.optional-dependencies] test = [ - "django", + "django>=4.2,<5.0", "pytest" ] @@ -50,6 +50,9 @@ changelog = "https://github.com/ixc/python-edtf/blob/main/changelog.rst" requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" +[tool.setuptools] +packages.find = { where = ["."], exclude = ["edtf_django_tests", "edtf_django_tests.*"] } + [tool.wheel] universal = false @@ -72,3 +75,4 @@ legacy_tox_ini = """ python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"] python_classes = ["Test*", "*Tests"] python_functions = ["test_*"] +addopts = "--ignore=edtf_django_tests/"