Skip to content

Commit

Permalink
Modifying schedules API to allow for rrulesets #5733 (#12043)
Browse files Browse the repository at this point in the history
* Added schedule_rruleset lookup plugin for awx.awx
* Added DB migration for rrule size
* Updated schedule docs
* The schedule API endpoint will now return an array of errors on rule validation to try and inform the user of all errors instead of just the first
  • Loading branch information
john-westcott-iv committed Apr 28, 2022
1 parent 2bef5ce commit c67f508
Show file tree
Hide file tree
Showing 10 changed files with 1,212 additions and 123 deletions.
81 changes: 40 additions & 41 deletions awx/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4645,61 +4645,60 @@ class Meta:

# We reject rrules if:
# - DTSTART is not include
# - INTERVAL is not included
# - SECONDLY is used
# - TZID is used
# - BYDAY prefixed with a number (MO is good but not 20MO)
# - BYYEARDAY
# - BYWEEKNO
# - Multiple DTSTART or RRULE elements
# - Can't contain both COUNT and UNTIL
# - COUNT > 999
# - Multiple DTSTART
# - At least one of RRULE is not included
# - EXDATE or RDATE is included
# For any rule in the ruleset:
# - INTERVAL is not included
# - SECONDLY is used
# - BYDAY prefixed with a number (MO is good but not 20MO)
# - Can't contain both COUNT and UNTIL
# - COUNT > 999
def validate_rrule(self, value):
rrule_value = value
multi_by_month_day = r".*?BYMONTHDAY[\:\=][0-9]+,-*[0-9]+"
multi_by_month = r".*?BYMONTH[\:\=][0-9]+,[0-9]+"
by_day_with_numeric_prefix = r".*?BYDAY[\:\=][0-9]+[a-zA-Z]{2}"
match_count = re.match(r".*?(COUNT\=[0-9]+)", rrule_value)
match_multiple_dtstart = re.findall(r".*?(DTSTART(;[^:]+)?\:[0-9]+T[0-9]+Z?)", rrule_value)
match_native_dtstart = re.findall(r".*?(DTSTART:[0-9]+T[0-9]+) ", rrule_value)
match_multiple_rrule = re.findall(r".*?(RRULE\:)", rrule_value)
match_multiple_rrule = re.findall(r".*?(RULE\:[^\s]*)", rrule_value)
errors = []
if not len(match_multiple_dtstart):
raise serializers.ValidationError(_('Valid DTSTART required in rrule. Value should start with: DTSTART:YYYYMMDDTHHMMSSZ'))
errors.append(_('Valid DTSTART required in rrule. Value should start with: DTSTART:YYYYMMDDTHHMMSSZ'))
if len(match_native_dtstart):
raise serializers.ValidationError(_('DTSTART cannot be a naive datetime. Specify ;TZINFO= or YYYYMMDDTHHMMSSZZ.'))
errors.append(_('DTSTART cannot be a naive datetime. Specify ;TZINFO= or YYYYMMDDTHHMMSSZZ.'))
if len(match_multiple_dtstart) > 1:
raise serializers.ValidationError(_('Multiple DTSTART is not supported.'))
if not len(match_multiple_rrule):
raise serializers.ValidationError(_('RRULE required in rrule.'))
if len(match_multiple_rrule) > 1:
raise serializers.ValidationError(_('Multiple RRULE is not supported.'))
if 'interval' not in rrule_value.lower():
raise serializers.ValidationError(_('INTERVAL required in rrule.'))
if 'secondly' in rrule_value.lower():
raise serializers.ValidationError(_('SECONDLY is not supported.'))
if re.match(multi_by_month_day, rrule_value):
raise serializers.ValidationError(_('Multiple BYMONTHDAYs not supported.'))
if re.match(multi_by_month, rrule_value):
raise serializers.ValidationError(_('Multiple BYMONTHs not supported.'))
if re.match(by_day_with_numeric_prefix, rrule_value):
raise serializers.ValidationError(_("BYDAY with numeric prefix not supported."))
if 'byyearday' in rrule_value.lower():
raise serializers.ValidationError(_("BYYEARDAY not supported."))
if 'byweekno' in rrule_value.lower():
raise serializers.ValidationError(_("BYWEEKNO not supported."))
if 'COUNT' in rrule_value and 'UNTIL' in rrule_value:
raise serializers.ValidationError(_("RRULE may not contain both COUNT and UNTIL"))
if match_count:
count_val = match_count.groups()[0].strip().split("=")
if int(count_val[1]) > 999:
raise serializers.ValidationError(_("COUNT > 999 is unsupported."))
errors.append(_('Multiple DTSTART is not supported.'))
if "rrule:" not in rrule_value.lower():
errors.append(_('One or more rule required in rrule.'))
if "exdate:" in rrule_value.lower():
raise serializers.ValidationError(_('EXDATE not allowed in rrule.'))
if "rdate:" in rrule_value.lower():
raise serializers.ValidationError(_('RDATE not allowed in rrule.'))
for a_rule in match_multiple_rrule:
if 'interval' not in a_rule.lower():
errors.append("{0}: {1}".format(_('INTERVAL required in rrule'), a_rule))
elif 'secondly' in a_rule.lower():
errors.append("{0}: {1}".format(_('SECONDLY is not supported'), a_rule))
if re.match(by_day_with_numeric_prefix, a_rule):
errors.append("{0}: {1}".format(_("BYDAY with numeric prefix not supported"), a_rule))
if 'COUNT' in a_rule and 'UNTIL' in a_rule:
errors.append("{0}: {1}".format(_("RRULE may not contain both COUNT and UNTIL"), a_rule))
match_count = re.match(r".*?(COUNT\=[0-9]+)", a_rule)
if match_count:
count_val = match_count.groups()[0].strip().split("=")
if int(count_val[1]) > 999:
errors.append("{0}: {1}".format(_("COUNT > 999 is unsupported"), a_rule))

try:
Schedule.rrulestr(rrule_value)
except Exception as e:
import traceback

logger.error(traceback.format_exc())
raise serializers.ValidationError(_("rrule parsing failed validation: {}").format(e))
errors.append(_("rrule parsing failed validation: {}").format(e))

if errors:
raise serializers.ValidationError(errors)

return value


Expand Down
18 changes: 18 additions & 0 deletions awx/main/migrations/0160_alter_schedule_rrule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 3.2.12 on 2022-04-18 21:29

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('main', '0159_deprecate_inventory_source_UoPU_field'),
]

operations = [
migrations.AlterField(
model_name='schedule',
name='rrule',
field=models.TextField(help_text='A value representing the schedules iCal recurrence rule.'),
),
]
141 changes: 87 additions & 54 deletions awx/main/models/schedules.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class Meta:
dtend = models.DateTimeField(
null=True, default=None, editable=False, help_text=_("The last occurrence of the schedule occurs before this time, aftewards the schedule expires.")
)
rrule = models.CharField(max_length=255, help_text=_("A value representing the schedules iCal recurrence rule."))
rrule = models.TextField(help_text=_("A value representing the schedules iCal recurrence rule."))
next_run = models.DateTimeField(null=True, default=None, editable=False, help_text=_("The next time that the scheduled action will run."))

@classmethod
Expand All @@ -91,22 +91,22 @@ def get_zoneinfo(self):
@property
def timezone(self):
utc = tzutc()
# All rules in a ruleset will have the same dtstart so we can just take the first rule
tzinfo = Schedule.rrulestr(self.rrule)._rrule[0]._dtstart.tzinfo
if tzinfo is utc:
return 'UTC'
all_zones = Schedule.get_zoneinfo()
all_zones.sort(key=lambda x: -len(x))
for r in Schedule.rrulestr(self.rrule)._rrule:
if r._dtstart:
tzinfo = r._dtstart.tzinfo
if tzinfo is utc:
return 'UTC'
fname = getattr(tzinfo, '_filename', None)
if fname:
for zone in all_zones:
if fname.endswith(zone):
return zone
fname = getattr(tzinfo, '_filename', None)
if fname:
for zone in all_zones:
if fname.endswith(zone):
return zone
logger.warning('Could not detect valid zoneinfo for {}'.format(self.rrule))
return ''

@property
# TODO: How would we handle multiple until parameters? The UI is currently using this on the edit screen of a schedule
def until(self):
# The UNTIL= datestamp (if any) coerced from UTC to the local naive time
# of the DTSTART
Expand Down Expand Up @@ -134,34 +134,48 @@ def coerce_naive_until(cls, rrule):
# timezone (America/New_York), and so we'll coerce to UTC _for you_
# automatically.
#
if 'until=' in rrule.lower():
# if DTSTART;TZID= is used, coerce "naive" UNTIL values
# to the proper UTC date
match_until = re.match(r".*?(?P<until>UNTIL\=[0-9]+T[0-9]+)(?P<utcflag>Z?)", rrule)
if not len(match_until.group('utcflag')):
# rrule = DTSTART;TZID=America/New_York:20200601T120000 RRULE:...;UNTIL=20200601T170000

# Find the UNTIL=N part of the string
# naive_until = UNTIL=20200601T170000
naive_until = match_until.group('until')

# What is the DTSTART timezone for:
# DTSTART;TZID=America/New_York:20200601T120000 RRULE:...;UNTIL=20200601T170000Z
# local_tz = tzfile('/usr/share/zoneinfo/America/New_York')
local_tz = dateutil.rrule.rrulestr(rrule.replace(naive_until, naive_until + 'Z'), tzinfos=UTC_TIMEZONES)._dtstart.tzinfo

# Make a datetime object with tzinfo=<the DTSTART timezone>
# localized_until = datetime.datetime(2020, 6, 1, 17, 0, tzinfo=tzfile('/usr/share/zoneinfo/America/New_York'))
localized_until = make_aware(datetime.datetime.strptime(re.sub('^UNTIL=', '', naive_until), "%Y%m%dT%H%M%S"), local_tz)

# Coerce the datetime to UTC and format it as a string w/ Zulu format
# utc_until = UNTIL=20200601T220000Z
utc_until = 'UNTIL=' + localized_until.astimezone(pytz.utc).strftime('%Y%m%dT%H%M%SZ')

# rrule was: DTSTART;TZID=America/New_York:20200601T120000 RRULE:...;UNTIL=20200601T170000
# rrule is now: DTSTART;TZID=America/New_York:20200601T120000 RRULE:...;UNTIL=20200601T220000Z
rrule = rrule.replace(naive_until, utc_until)
return rrule

# Find the DTSTART rule or raise an error, its usually the first rule but that is not strictly enforced
start_date_rule = re.sub('^.*(DTSTART[^\s]+)\s.*$', r'\1', rrule)
if not start_date_rule:
raise ValueError('A DTSTART field needs to be in the rrule')

rules = re.split(r'\s+', rrule)
for index in range(0, len(rules)):
rule = rules[index]
if 'until=' in rule.lower():
# if DTSTART;TZID= is used, coerce "naive" UNTIL values
# to the proper UTC date
match_until = re.match(r".*?(?P<until>UNTIL\=[0-9]+T[0-9]+)(?P<utcflag>Z?)", rule)
if not len(match_until.group('utcflag')):
# rule = DTSTART;TZID=America/New_York:20200601T120000 RRULE:...;UNTIL=20200601T170000

# Find the UNTIL=N part of the string
# naive_until = UNTIL=20200601T170000
naive_until = match_until.group('until')

# What is the DTSTART timezone for:
# DTSTART;TZID=America/New_York:20200601T120000 RRULE:...;UNTIL=20200601T170000Z
# local_tz = tzfile('/usr/share/zoneinfo/America/New_York')
# We are going to construct a 'dummy' rule for parsing which will include the DTSTART and the rest of the rule
temp_rule = "{} {}".format(start_date_rule, rule.replace(naive_until, naive_until + 'Z'))
# If the rule is an EX rule we have to add an RRULE to it because an EX rule alone will not manifest into a ruleset
if rule.lower().startswith('ex'):
temp_rule = "{} {}".format(temp_rule, 'RRULE:FREQ=MINUTELY;INTERVAL=1;UNTIL=20380601T170000Z')
local_tz = dateutil.rrule.rrulestr(temp_rule, tzinfos=UTC_TIMEZONES, **{'forceset': True})._rrule[0]._dtstart.tzinfo

# Make a datetime object with tzinfo=<the DTSTART timezone>
# localized_until = datetime.datetime(2020, 6, 1, 17, 0, tzinfo=tzfile('/usr/share/zoneinfo/America/New_York'))
localized_until = make_aware(datetime.datetime.strptime(re.sub('^UNTIL=', '', naive_until), "%Y%m%dT%H%M%S"), local_tz)

# Coerce the datetime to UTC and format it as a string w/ Zulu format
# utc_until = UNTIL=20200601T220000Z
utc_until = 'UNTIL=' + localized_until.astimezone(pytz.utc).strftime('%Y%m%dT%H%M%SZ')

# rule was: DTSTART;TZID=America/New_York:20200601T120000 RRULE:...;UNTIL=20200601T170000
# rule is now: DTSTART;TZID=America/New_York:20200601T120000 RRULE:...;UNTIL=20200601T220000Z
rules[index] = rule.replace(naive_until, utc_until)
return " ".join(rules)

@classmethod
def rrulestr(cls, rrule, fast_forward=True, **kwargs):
Expand All @@ -176,20 +190,28 @@ def rrulestr(cls, rrule, fast_forward=True, **kwargs):
if r._dtstart and r._dtstart.tzinfo is None:
raise ValueError('A valid TZID must be provided (e.g., America/New_York)')

if fast_forward and ('MINUTELY' in rrule or 'HOURLY' in rrule) and 'COUNT=' not in rrule:
# Fast forward is a way for us to limit the number of events in the rruleset
# If we are fastforwading and we don't have a count limited rule that is minutely or hourley
# We will modify the start date of the rule to last week to prevent a large number of entries
if fast_forward:
try:
# All rules in a ruleset will have the same dtstart value
# so lets compare the first event to now to see if its > 7 days old
first_event = x[0]
# If the first event was over a week ago...
if (now() - first_event).days > 7:
# hourly/minutely rrules with far-past DTSTART values
# are *really* slow to precompute
# start *from* one week ago to speed things up drastically
dtstart = x._rrule[0]._dtstart.strftime(':%Y%m%dT')
new_start = (now() - datetime.timedelta(days=7)).strftime(':%Y%m%dT')
new_rrule = rrule.replace(dtstart, new_start)
return Schedule.rrulestr(new_rrule, fast_forward=False)
for rule in x._rrule:
# If any rule has a minutely or hourly rule without a count...
if rule._freq in [dateutil.rrule.MINUTELY, dateutil.rrule.HOURLY] and not rule._count:
# hourly/minutely rrules with far-past DTSTART values
# are *really* slow to precompute
# start *from* one week ago to speed things up drastically
new_start = (now() - datetime.timedelta(days=7)).strftime('%Y%m%d')
# Now we want to repalce the DTSTART:<value>T with the new date (which includes the T)
new_rrule = re.sub('(DTSTART[^:]*):[^T]+T', r'\1:{0}T'.format(new_start), rrule)
return Schedule.rrulestr(new_rrule, fast_forward=False)
except IndexError:
pass

return x

def __str__(self):
Expand All @@ -206,6 +228,22 @@ def get_job_kwargs(self):
job_kwargs['_eager_fields'] = {'launch_type': 'scheduled', 'schedule': self}
return job_kwargs

def get_end_date(ruleset):
# if we have a complex ruleset with a lot of options getting the last index of the ruleset can take some time
# And a ruleset without a count/until can come back as datetime.datetime(9999, 12, 31, 15, 0, tzinfo=tzfile('US/Eastern'))
# So we are going to do a quick scan to make sure we would have an end date
for a_rule in ruleset._rrule:
# if this rule does not have until or count in it then we have no end date
if not a_rule._until and not a_rule._count:
return None

# If we made it this far we should have an end date and can ask the ruleset what the last date is
# However, if the until/count is before dtstart we will get an IndexError when trying to get [-1]
try:
return ruleset[-1].astimezone(pytz.utc)
except IndexError:
return None

def update_computed_fields_no_save(self):
affects_fields = ['next_run', 'dtstart', 'dtend']
starting_values = {}
Expand All @@ -229,12 +267,7 @@ def update_computed_fields_no_save(self):
self.dtstart = future_rs[0].astimezone(pytz.utc)
except IndexError:
self.dtstart = None
self.dtend = None
if 'until' in self.rrule.lower() or 'count' in self.rrule.lower():
try:
self.dtend = future_rs[-1].astimezone(pytz.utc)
except IndexError:
self.dtend = None
self.dtend = Schedule.get_end_date(future_rs)

changed = any(getattr(self, field_name) != starting_values[field_name] for field_name in affects_fields)
return changed
Expand Down

0 comments on commit c67f508

Please sign in to comment.