Skip to content

Commit

Permalink
models: Use database constraints to prevent split Series
Browse files Browse the repository at this point in the history
Currently, the 'SeriesReference' object has a unique constraint on the
two fields it has, 'series', which is a foreign key to 'Series', and
'msgid'. This is the wrong constraint. What we actually want to enforce
is that a patch, cover letter or comment is referenced by a single
series, or rather a single series per project the submission appears on.
As such, we should be enforcing uniqueness on the msgid and the project
that the patch, cover letter or comment belongs to.

This requires adding a new field to the object, 'project', since it's
not possible to do something like the following:

  unique_together = [('msgid', 'series__project')]

This is detailed here [1]. In addition, the migration needs a precursor
migration step to merge any broken series.

[1] https://stackoverflow.com/a/4440189/613428

Signed-off-by: Stephen Finucane <stephen@that.guru>
Closes: #241
Cc: Daniel Axtens <dja@axtens.net>
Cc: Petr Vorel <petr.vorel@gmail.com>
  • Loading branch information
stephenfin committed Dec 27, 2019
1 parent 757b33f commit 9f72eb7
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 66 deletions.
121 changes: 121 additions & 0 deletions patchwork/migrations/0039_unique_series_references.py
@@ -0,0 +1,121 @@
from django.db import connection, migrations, models
from django.db.models import Count
import django.db.models.deletion


def merge_duplicate_series(apps, schema_editor):
SeriesReference = apps.get_model('patchwork', 'SeriesReference')
Patch = apps.get_model('patchwork', 'Patch')

msgid_seriesrefs = {}

# find all SeriesReference that share a msgid but point to different series
# and decide which of the series is going to be the authoritative one
msgid_counts = (
SeriesReference.objects.values('msgid')
.annotate(count=Count('msgid'))
.filter(count__gt=1)
)
for msgid_count in msgid_counts:
msgid = msgid_count['msgid']
chosen_ref = None
for series_ref in SeriesReference.objects.filter(msgid=msgid):
if series_ref.series.cover_letter:
if chosen_ref:
# I don't think this can happen, but explode if it does
raise Exception(
"Looks like you've got two or more series that share "
"some patches but do not share a cover letter. Unable "
"to auto-resolve."
)

# if a series has a cover letter, that's the one we'll group
# everything under
chosen_ref = series_ref

if not chosen_ref:
# if none of the series have cover letters, simply use the last
# one (hint: this relies on Python's weird scoping for for loops
# where 'series_ref' is still accessible outside the loop)
chosen_ref = series_ref

msgid_seriesrefs[msgid] = chosen_ref

# reassign any patches referring to non-authoritative series to point to
# the authoritative one, and delete the other series; we do this separately
# to allow us a chance to raise the exception above if necessary
for msgid, chosen_ref in msgid_seriesrefs.items():
for series_ref in SeriesReference.objects.filter(msgid=msgid):
if series_ref == chosen_ref:
continue

# update the patches to point to our chosen series instead, on the
# assumption that all other metadata is correct
for patch in Patch.objects.filter(series=series_ref.series):
patch.series = chosen_ref.series
patch.save()

# delete the other series (which will delete the series ref)
series_ref.series.delete()


def copy_project_field(apps, schema_editor):
if connection.vendor == 'postgresql':
schema_editor.execute(
"""
UPDATE patchwork_seriesreference
SET project_id = patchwork_series.project_id
FROM patchwork_series
WHERE patchwork_seriesreference.series_id = patchwork_series.id
"""
)
elif connection.vendor == 'mysql':
schema_editor.execute(
"""
UPDATE patchwork_seriesreference, patchwork_series
SET patchwork_seriesreference.project_id = patchwork_series.project_id
WHERE patchwork_seriesreference.series_id = patchwork_series.id
""" # noqa
)
else:
SeriesReference = apps.get_model('patchwork', 'SeriesReference')

for series_ref in SeriesReference.objects.all().select_related(
'series'
):
series_ref.project = series_ref.series.project
series_ref.save()


class Migration(migrations.Migration):

dependencies = [('patchwork', '0038_state_slug')]

operations = [
migrations.RunPython(
merge_duplicate_series, migrations.RunPython.noop, atomic=False
),
migrations.AddField(
model_name='seriesreference',
name='project',
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.CASCADE,
to='patchwork.Project',
),
),
migrations.AlterUniqueTogether(
name='seriesreference', unique_together={('project', 'msgid')}
),
migrations.RunPython(
copy_project_field, migrations.RunPython.noop, atomic=False
),
migrations.AlterField(
model_name='seriesreference',
name='project',
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to='patchwork.Project',
),
),
]
6 changes: 4 additions & 2 deletions patchwork/models.py
Expand Up @@ -79,7 +79,8 @@ class Project(models.Model):
webscm_url = models.CharField(max_length=2000, blank=True)
list_archive_url = models.CharField(max_length=2000, blank=True)
list_archive_url_format = models.CharField(
max_length=2000, blank=True,
max_length=2000,
blank=True,
help_text="URL format for the list archive's Message-ID redirector. "
"{} will be replaced by the Message-ID.")
commit_url_format = models.CharField(
Expand Down Expand Up @@ -785,6 +786,7 @@ class SeriesReference(models.Model):
required to handle the case whereby one or more patches are
received before the cover letter.
"""
project = models.ForeignKey(Project, on_delete=models.CASCADE)
series = models.ForeignKey(Series, related_name='references',
related_query_name='reference',
on_delete=models.CASCADE)
Expand All @@ -794,7 +796,7 @@ def __str__(self):
return self.msgid

class Meta:
unique_together = [('series', 'msgid')]
unique_together = [('project', 'msgid')]


class Bundle(models.Model):
Expand Down
123 changes: 63 additions & 60 deletions patchwork/parser.py
Expand Up @@ -16,6 +16,7 @@

from django.contrib.auth.models import User
from django.db.utils import IntegrityError
from django.db import transaction
from django.utils import six

from patchwork.models import Comment
Expand Down Expand Up @@ -256,16 +257,9 @@ def _find_series_by_references(project, mail):
for ref in refs:
try:
return SeriesReference.objects.get(
msgid=ref[:255], series__project=project).series
msgid=ref[:255], project=project).series
except SeriesReference.DoesNotExist:
continue
except SeriesReference.MultipleObjectsReturned:
# FIXME: Open bug: this can happen when we're processing
# messages in parallel. Pick the first and log.
logger.error("Multiple SeriesReferences for %s in project %s!" %
(ref[:255], project.name))
return SeriesReference.objects.filter(
msgid=ref[:255], series__project=project).first().series


def _find_series_by_markers(project, mail, author):
Expand Down Expand Up @@ -1092,47 +1086,65 @@ def parse_mail(mail, list_id=None):
except IntegrityError:
raise DuplicateMailError(msgid=msgid)

# if we don't have a series marker, we will never have an existing
# series to match against.
series = None
if n:
series = find_series(project, mail, author)
for attempt in range(1, 11): # arbitrary retry count
try:
with transaction.atomic():
# if we don't have a series marker, we will never have an
# existing series to match against.
series = None
if n:
series = find_series(project, mail, author)
else:
x = n = 1

# We will create a new series if:
# - there is no existing series to assign this patch to, or
# - there is an existing series, but it already has a patch
# with this number in it
if not series or Patch.objects.filter(
series=series, number=x).count():
series = Series.objects.create(
project=project,
date=date,
submitter=author,
version=version,
total=n)

# NOTE(stephenfin) We must save references for series.
# We do this to handle the case where a later patch is
# received first. Without storing references, it would
# not be possible to identify the relationship between
# patches as the earlier patch does not reference the
# later one.
for ref in refs + [msgid]:
ref = ref[:255]
# we don't want duplicates
try:
# we could have a ref to a previous series.
# (For example, a series sent in reply to
# another series.) That should not create a
# series ref for this series, so check for the
# msg-id only, not the msg-id/series pair.
SeriesReference.objects.get(
msgid=ref, project=project)
except SeriesReference.DoesNotExist:
SeriesReference.objects.create(
msgid=ref, project=project, series=series)
break
except IntegrityError:
# we lost the race so go again
logger.warning('Conflict while saving series. This is '
'probably because multiple patches belonging '
'to the same series have been received at '
'once. Trying again (attempt %02d/10)',
attempt)
else:
x = n = 1

# We will create a new series if:
# - there is no existing series to assign this patch to, or
# - there is an existing series, but it already has a patch with this
# number in it
if not series or Patch.objects.filter(series=series, number=x).count():
series = Series.objects.create(
project=project,
date=date,
submitter=author,
version=version,
total=n)

# NOTE(stephenfin) We must save references for series. We
# do this to handle the case where a later patch is
# received first. Without storing references, it would not
# be possible to identify the relationship between patches
# as the earlier patch does not reference the later one.
for ref in refs + [msgid]:
ref = ref[:255]
# we don't want duplicates
try:
# we could have a ref to a previous series. (For
# example, a series sent in reply to another
# series.) That should not create a series ref
# for this series, so check for the msg-id only,
# not the msg-id/series pair.
SeriesReference.objects.get(msgid=ref,
series__project=project)
except SeriesReference.DoesNotExist:
SeriesReference.objects.create(series=series, msgid=ref)
except SeriesReference.MultipleObjectsReturned:
logger.error("Multiple SeriesReferences for %s"
" in project %s!" % (ref, project.name))
# we failed to save the series so return the series-less patch
logger.warning('Failed to save series. Your patch with message ID '
'%s has been saved but this should not happen. '
'Please report this as a bug and include logs',
msgid)
return patch

# add to a series if we have found one, and we have a numbered
# patch. Don't add unnumbered patches (for example diffs sent
Expand Down Expand Up @@ -1170,14 +1182,9 @@ def parse_mail(mail, list_id=None):
# message
try:
series = SeriesReference.objects.get(
msgid=msgid, series__project=project).series
msgid=msgid, project=project).series
except SeriesReference.DoesNotExist:
series = None
except SeriesReference.MultipleObjectsReturned:
logger.error("Multiple SeriesReferences for %s"
" in project %s!" % (msgid, project.name))
series = SeriesReference.objects.filter(
msgid=msgid, series__project=project).first().series

if not series:
series = Series.objects.create(
Expand All @@ -1190,12 +1197,8 @@ def parse_mail(mail, list_id=None):
# we don't save the in-reply-to or references fields
# for a cover letter, as they can't refer to the same
# series
try:
SeriesReference.objects.get_or_create(series=series,
msgid=msgid)
except SeriesReference.MultipleObjectsReturned:
logger.error("Multiple SeriesReferences for %s"
" in project %s!" % (msgid, project.name))
SeriesReference.objects.create(
msgid=msgid, project=project, series=series)

try:
cover_letter = CoverLetter.objects.create(
Expand Down
9 changes: 6 additions & 3 deletions patchwork/tests/test_parser.py
Expand Up @@ -421,17 +421,20 @@ def test_nested_series(self):
msgids = [make_msgid()]
project = create_project()
series_v1 = create_series(project=project)
create_series_reference(msgid=msgids[0], series=series_v1)
create_series_reference(msgid=msgids[0], series=series_v1,
project=project)

# ...and three patches
for i in range(3):
msgids.append(make_msgid())
create_series_reference(msgid=msgids[-1], series=series_v1)
create_series_reference(msgid=msgids[-1], series=series_v1,
project=project)

# now create a new series with "cover letter"
msgids.append(make_msgid())
series_v2 = create_series(project=project)
ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2)
ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2,
project=project)

# ...and the "first patch" of this new series
msgid = make_msgid()
Expand Down
6 changes: 5 additions & 1 deletion patchwork/tests/utils.py
Expand Up @@ -283,8 +283,12 @@ def create_series(**kwargs):

def create_series_reference(**kwargs):
"""Create 'SeriesReference' object."""
project = kwargs.pop('project', create_project())
series = kwargs.pop('series', create_series(project=project))

values = {
'series': create_series() if 'series' not in kwargs else None,
'series': series,
'project': project,
'msgid': make_msgid(),
}
values.update(**kwargs)
Expand Down

0 comments on commit 9f72eb7

Please sign in to comment.