From bd7d19aa2e5dcf6b3f9bd771873945fbfc2f4cc2 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 17 Apr 2024 10:35:06 -0500 Subject: [PATCH 01/16] wip: identify whats needed to obviate ghostlinkd --- ietf/doc/expire.py | 8 ++++++++ ietf/doc/models.py | 1 + ietf/doc/utils_charter.py | 1 + ietf/doc/views_charter.py | 1 + ietf/doc/views_draft.py | 3 +++ ietf/doc/views_review.py | 1 + ietf/idindex/tasks.py | 1 + ietf/submit/utils.py | 7 ++++++- 8 files changed, 22 insertions(+), 1 deletion(-) diff --git a/ietf/doc/expire.py b/ietf/doc/expire.py index f6779e0471..1650b4ddf5 100644 --- a/ietf/doc/expire.py +++ b/ietf/doc/expire.py @@ -139,6 +139,9 @@ def move_file(f): if os.path.exists(src): try: + # ghostlinkd would keep this in the combined all archive since it would + # be sourced from a different place. But when ghostlinkd is removed, nothing + # new is needed here - the file will already exist in the combined archive shutil.move(src, dst) except IOError as e: if "No such file or directory" in str(e): @@ -213,6 +216,10 @@ def splitext(fn): filename, revision = match.groups() def move_file_to(subdir): + # Similar to move_draft_files_to_archive + # ghostlinkd would keep this in the combined all archive since it would + # be sourced from a different place. But when ghostlinkd is removed, nothing + # new is needed here - the file will already exist in the combined archive shutil.move(path, os.path.join(settings.INTERNET_DRAFT_ARCHIVE_DIR, subdir, basename)) @@ -229,4 +236,5 @@ def move_file_to(subdir): move_file_to("") except Document.DoesNotExist: + # All uses of this past 2014 seem related to major system failures. move_file_to("unknown_ids") diff --git a/ietf/doc/models.py b/ietf/doc/models.py index d97e8238ec..a103fca645 100644 --- a/ietf/doc/models.py +++ b/ietf/doc/models.py @@ -142,6 +142,7 @@ def get_file_path(self): if self.is_dochistory(): self._cached_file_path = settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR else: + # This could be simplified since anything in INTERNET_DRAFT_PATH is also already in INTERNET_ALL_DRAFTS_ARCHIVE_DIR draft_state = self.get_state('draft') if draft_state and draft_state.slug == 'active': self._cached_file_path = settings.INTERNET_DRAFT_PATH diff --git a/ietf/doc/utils_charter.py b/ietf/doc/utils_charter.py index 7d2001e4d7..ecd1c3fce5 100644 --- a/ietf/doc/utils_charter.py +++ b/ietf/doc/utils_charter.py @@ -94,6 +94,7 @@ def fix_charter_revision_after_approval(charter, by): try: old = os.path.join(charter.get_file_path(), '%s-%s.txt' % (charter.name, charter.rev)) new = os.path.join(charter.get_file_path(), '%s-%s.txt' % (charter.name, next_approved_revision(charter.rev))) + # TODO: MULTIWRITE - needs to also land in the ftp dir shutil.copy(old, new) except IOError: log("There was an error copying %s to %s" % (old, new)) diff --git a/ietf/doc/views_charter.py b/ietf/doc/views_charter.py index 9596970f86..e925503092 100644 --- a/ietf/doc/views_charter.py +++ b/ietf/doc/views_charter.py @@ -435,6 +435,7 @@ def submit(request, name, option=None): events.append(e) # Save file on disk + # TODO : MULTIWRITE - this also needs to be written to ftp charter_filename = charter_filename.with_name( f"{name}-{charter.rev}.txt" ) # update rev diff --git a/ietf/doc/views_draft.py b/ietf/doc/views_draft.py index ea30e7bd2d..1deca4503c 100644 --- a/ietf/doc/views_draft.py +++ b/ietf/doc/views_draft.py @@ -831,6 +831,9 @@ def restore_draft_file(request, draft): log.log("Resurrecting %s. Moving files:" % draft.name) for file in files: try: + # ghostlinkd would keep this in the combined all archive since it would + # be sourced from a different place. But when ghostlinkd is removed, nothing + # new is needed here - the file will already exist in the combined archive shutil.move(file, settings.INTERNET_DRAFT_PATH) log.log(" Moved file %s to %s" % (file, settings.INTERNET_DRAFT_PATH)) except shutil.Error as ex: diff --git a/ietf/doc/views_review.py b/ietf/doc/views_review.py index 04c558ce3e..89413c689e 100644 --- a/ietf/doc/views_review.py +++ b/ietf/doc/views_review.py @@ -803,6 +803,7 @@ def complete_review(request, name, assignment_id=None, acronym=None): else: content = form.cleaned_data['review_content'] + # TODO: MULTIWRITE - this also needs to be written to ftp filename = os.path.join(review.get_file_path(), '{}.txt'.format(review.name)) with io.open(filename, 'w', encoding='utf-8') as destination: destination.write(content) diff --git a/ietf/idindex/tasks.py b/ietf/idindex/tasks.py index c01d50cf5d..290095b8f5 100644 --- a/ietf/idindex/tasks.py +++ b/ietf/idindex/tasks.py @@ -69,6 +69,7 @@ def idindex_update_task(): derived_all_id2_tmpfile = tmp_mgr.make_temp_file(all_id2_content) # Move temp files as-atomically-as-possible into place + # TODO: multiwrite - anything being written into id_path also needs to be written to the all archive path tmp_mgr.move_into_place(all_id_tmpfile, id_path / "all_id.txt") tmp_mgr.move_into_place(derived_all_id_tmpfile, derived_path / "all_id.txt") tmp_mgr.move_into_place(download_all_id_tmpfile, download_path / "id-all.txt") diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index e03c702632..632b1e9a4a 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -167,7 +167,10 @@ def validate_submission_rev(name, rev): if rev != expected: return 'Invalid revision (revision %02d is expected)' % expected - + + # This is not really correct, though the edges that it doesn't cover are not likely. + # It might be better just to look in the combined archive to make sure we're not colliding with + # a thing that exists there already because it was included from an approved personal collection. for dirname in [settings.INTERNET_DRAFT_PATH, settings.INTERNET_DRAFT_ARCHIVE_DIR, ]: dir = pathlib.Path(dirname) pattern = '%s-%02d.*' % (name, rev) @@ -651,6 +654,8 @@ def move_files_to_repository(submission): source = Path(settings.IDSUBMIT_STAGING_PATH) / fname dest = Path(settings.IDSUBMIT_REPOSITORY_PATH) / fname if source.exists(): + # TODO: MULTIWRITE, both to the all-archive dir and the ftp dir + # while here, question whether having IDSUBMIT_REPOSITORY_PATH and INTERNET_DRAFT_PATH as separate settings makes sense. move(source, dest) elif dest.exists(): log.log("Intended to move '%s' to '%s', but found source missing while destination exists.") From 9a1a2e73935896caf1efb045f5604a7cd918b706 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 17 Apr 2024 11:29:41 -0500 Subject: [PATCH 02/16] fix: hardlink new charter files to ftp directory --- ietf/doc/utils_charter.py | 26 +++++++++++++++++++++++--- ietf/settings.py | 1 + 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/ietf/doc/utils_charter.py b/ietf/doc/utils_charter.py index ecd1c3fce5..b29d1e303c 100644 --- a/ietf/doc/utils_charter.py +++ b/ietf/doc/utils_charter.py @@ -92,12 +92,31 @@ def change_group_state_after_charter_approval(group, by): def fix_charter_revision_after_approval(charter, by): # according to spec, 00-02 becomes 01, so copy file and record new revision try: - old = os.path.join(charter.get_file_path(), '%s-%s.txt' % (charter.name, charter.rev)) - new = os.path.join(charter.get_file_path(), '%s-%s.txt' % (charter.name, next_approved_revision(charter.rev))) - # TODO: MULTIWRITE - needs to also land in the ftp dir + old = os.path.join( + charter.get_file_path(), "%s-%s.txt" % (charter.name, charter.rev) + ) + new = os.path.join( + charter.get_file_path(), + "%s-%s.txt" % (charter.name, next_approved_revision(charter.rev)), + ) shutil.copy(old, new) except IOError: log("There was an error copying %s to %s" % (old, new)) + # Also provide a copy to the legacy ftp source directory, which is served by rsync + # This replaces the hardlink copy that ghostlink has made in the past + # Still using a hardlink as long as these are on the same filesystem. + # Staying with os.path vs pathlib.Path until we get to python>=3.10. + charter_dir = os.path.join(settings.FTP_DIR, "charter") + ftp_filepath = os.path.join( + charter_dir, "%s-%s.txt" % (charter.name, next_approved_revision(charter.rev)) + ) + try: + os.link(new, ftp_filepath) + except IOError: + log( + "There was an error creating a harlink at %s pointing to %s" + % (ftp_filepath, new) + ) events = [] e = NewRevisionDocEvent(doc=charter, by=by, type="new_revision") @@ -109,6 +128,7 @@ def fix_charter_revision_after_approval(charter, by): charter.rev = e.rev charter.save_with_history(events) + def historic_milestones_for_charter(charter, rev): """Return GroupMilestone/GroupMilestoneHistory objects for charter document at rev by looking through the history.""" diff --git a/ietf/settings.py b/ietf/settings.py index dca3fb132a..d1b380013e 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -684,6 +684,7 @@ def skip_unreadable_post(record): INTERNET_ALL_DRAFTS_ARCHIVE_DIR = '/a/ietfdata/doc/draft/archive' MEETING_RECORDINGS_DIR = '/a/www/audio' DERIVED_DIR = '/a/ietfdata/derived' +FTP_DIR = '/a/ftp' DOCUMENT_FORMAT_ALLOWLIST = ["txt", "ps", "pdf", "xml", "html", ] From f332aeaeaffa0715115e6fe2185e49955fea50f0 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 17 Apr 2024 14:26:35 -0500 Subject: [PATCH 03/16] fix: hardlink new charter files to ftp directory (continued) --- ietf/doc/views_charter.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/ietf/doc/views_charter.py b/ietf/doc/views_charter.py index e925503092..7fb1a532fc 100644 --- a/ietf/doc/views_charter.py +++ b/ietf/doc/views_charter.py @@ -4,6 +4,7 @@ import datetime import json +import os import textwrap from pathlib import Path @@ -42,7 +43,7 @@ from ietf.name.models import GroupStateName from ietf.person.models import Person from ietf.utils.history import find_history_active_at -from ietf.utils.log import assertion +from ietf.utils.log import assertion, log from ietf.utils.mail import send_mail_preformatted from ietf.utils.textupload import get_cleaned_text_file_content from ietf.utils.response import permission_denied @@ -444,6 +445,18 @@ def submit(request, name, option=None): destination.write(form.cleaned_data["txt"]) else: destination.write(form.cleaned_data["content"]) + # Also provide a copy to the legacy ftp source directory, which is served by rsync + # This replaces the hardlink copy that ghostlink has made in the past + # Still using a hardlink as long as these are on the same filesystem. + ftp_filename = Path(settings.FTP_DIR) / "charter" / charter_filename.name + try: + os.link(charter_filename, ftp_filename) # os.link until we are on python>=3.10 + except IOError: + log( + "There was an error creating a hardlink at %s pointing to %s" + % (ftp_filename, charter_filename) + ) + if option in ["initcharter", "recharter"] and charter.ad == None: charter.ad = getattr(group.ad_role(), "person", None) From 9d05204c2eaf6139cca8516c3d86babc34c6ef08 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 17 Apr 2024 14:28:00 -0500 Subject: [PATCH 04/16] chore: bring settings comment up to date --- ietf/settings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ietf/settings.py b/ietf/settings.py index d1b380013e..089cda8d19 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -679,8 +679,8 @@ def skip_unreadable_post(record): IPR_DOCUMENT_PATH = '/a/www/ietf-ftp/ietf/IPR/' # Move drafts to this directory when they expire INTERNET_DRAFT_ARCHIVE_DIR = '/a/ietfdata/doc/draft/collection/draft-archive/' -# The following directory contains linked copies of all drafts, but don't -# write anything to this directory -- its content is maintained by ghostlinkd: +# The following directory contains copies of all drafts - it used to be +# a set of hardlinks maintained by ghostlinkd, but is now explicitly written to INTERNET_ALL_DRAFTS_ARCHIVE_DIR = '/a/ietfdata/doc/draft/archive' MEETING_RECORDINGS_DIR = '/a/www/audio' DERIVED_DIR = '/a/ietfdata/derived' From 38d73de822b35125088b92916b747fa789e5d13f Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Wed, 17 Apr 2024 14:29:10 -0500 Subject: [PATCH 05/16] chore: add archive and ftp dirs to setup of various environments --- dev/deploy-to-container/settings_local.py | 3 ++- dev/diff/settings_local.py | 3 ++- dev/tests/settings_local.py | 3 ++- docker/configs/settings_local.py | 3 ++- docker/scripts/app-create-dirs.sh | 6 ++++++ 5 files changed, 14 insertions(+), 4 deletions(-) diff --git a/dev/deploy-to-container/settings_local.py b/dev/deploy-to-container/settings_local.py index 15b44433ea..25eacc3004 100644 --- a/dev/deploy-to-container/settings_local.py +++ b/dev/deploy-to-container/settings_local.py @@ -60,10 +60,11 @@ BOFREQ_PATH = '/assets/ietf-ftp/bofreq/' CONFLICT_REVIEW_PATH = '/assets/ietf-ftp/conflict-reviews/' STATUS_CHANGE_PATH = '/assets/ietf-ftp/status-changes/' -INTERNET_DRAFT_ARCHIVE_DIR = '/assets/archive/id' +INTERNET_DRAFT_ARCHIVE_DIR = '/assets/collection/draft-archive' INTERNET_ALL_DRAFTS_ARCHIVE_DIR = '/assets/archive/id' BIBXML_BASE_PATH = '/assets/ietfdata/derived/bibxml' IDSUBMIT_REPOSITORY_PATH = INTERNET_DRAFT_PATH +FTP_DIR = '/assets/ftp' NOMCOM_PUBLIC_KEYS_DIR = 'data/nomcom_keys/public_keys/' SLIDE_STAGING_PATH = '/test/staging/' diff --git a/dev/diff/settings_local.py b/dev/diff/settings_local.py index 593ccadd7f..774c7797cf 100644 --- a/dev/diff/settings_local.py +++ b/dev/diff/settings_local.py @@ -57,9 +57,10 @@ BOFREQ_PATH = '/assets/ietf-ftp/bofreq/' CONFLICT_REVIEW_PATH = '/assets/ietf-ftp/conflict-reviews/' STATUS_CHANGE_PATH = '/assets/ietf-ftp/status-changes/' -INTERNET_DRAFT_ARCHIVE_DIR = '/assets/ietf-ftp/internet-drafts/' +INTERNET_DRAFT_ARCHIVE_DIR = '/assets/collection/draft-archive' INTERNET_ALL_DRAFTS_ARCHIVE_DIR = '/assets/ietf-ftp/internet-drafts/' BIBXML_BASE_PATH = '/assets/ietfdata/derived/bibxml' +FTP_DIR = '/assets/ftp' NOMCOM_PUBLIC_KEYS_DIR = 'data/nomcom_keys/public_keys/' SLIDE_STAGING_PATH = 'test/staging/' diff --git a/dev/tests/settings_local.py b/dev/tests/settings_local.py index 0cd761c0a9..8b5d90b1ec 100644 --- a/dev/tests/settings_local.py +++ b/dev/tests/settings_local.py @@ -56,9 +56,10 @@ BOFREQ_PATH = '/assets/ietf-ftp/bofreq/' CONFLICT_REVIEW_PATH = '/assets/ietf-ftp/conflict-reviews/' STATUS_CHANGE_PATH = '/assets/ietf-ftp/status-changes/' -INTERNET_DRAFT_ARCHIVE_DIR = '/assets/ietf-ftp/internet-drafts/' +INTERNET_DRAFT_ARCHIVE_DIR = '/assets/collection/draft-archive' INTERNET_ALL_DRAFTS_ARCHIVE_DIR = '/assets/ietf-ftp/internet-drafts/' BIBXML_BASE_PATH = '/assets/ietfdata/derived/bibxml' +FTP_DIR = '/assets/ftp' NOMCOM_PUBLIC_KEYS_DIR = 'data/nomcom_keys/public_keys/' SLIDE_STAGING_PATH = 'test/staging/' diff --git a/docker/configs/settings_local.py b/docker/configs/settings_local.py index 07c16c2e9a..bcd04898ea 100644 --- a/docker/configs/settings_local.py +++ b/docker/configs/settings_local.py @@ -46,10 +46,11 @@ BOFREQ_PATH = '/assets/ietf-ftp/bofreq/' CONFLICT_REVIEW_PATH = '/assets/ietf-ftp/conflict-reviews/' STATUS_CHANGE_PATH = '/assets/ietf-ftp/status-changes/' -INTERNET_DRAFT_ARCHIVE_DIR = '/assets/archive/id' +INTERNET_DRAFT_ARCHIVE_DIR = '/assets/collection/draft-archive' INTERNET_ALL_DRAFTS_ARCHIVE_DIR = '/assets/archive/id' BIBXML_BASE_PATH = '/assets/ietfdata/derived/bibxml' IDSUBMIT_REPOSITORY_PATH = INTERNET_DRAFT_PATH +FTP_DIR = '/assets/ftp' NOMCOM_PUBLIC_KEYS_DIR = 'data/nomcom_keys/public_keys/' SLIDE_STAGING_PATH = 'test/staging/' diff --git a/docker/scripts/app-create-dirs.sh b/docker/scripts/app-create-dirs.sh index d9296ecffe..b75c57767d 100755 --- a/docker/scripts/app-create-dirs.sh +++ b/docker/scripts/app-create-dirs.sh @@ -9,6 +9,8 @@ for sub in \ test/wiki/ietf \ data/nomcom_keys/public_keys \ /assets/archive/id \ + /assets/collection \ + /assets/collection/draft-archive \ /assets/ietf-ftp \ /assets/ietf-ftp/bofreq \ /assets/ietf-ftp/charter \ @@ -33,6 +35,10 @@ for sub in \ /assets/www6/iesg \ /assets/www6/iesg/evaluation \ /assets/media/photo \ + /assets/ftp \ + /assets/ftp/charter \ + /assets/ftp/internet-drafts \ + /assets/ftp/review \ ; do if [ ! -d "$sub" ]; then echo "Creating dir $sub" From c51120a4c57ecb7a9793761cb662cc6bb8d661b7 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Thu, 18 Apr 2024 11:47:43 -0500 Subject: [PATCH 06/16] fix: test charter submits write to ftp dir --- ietf/doc/tests_charter.py | 13 ++++++++++--- ietf/doc/views_charter.py | 4 ++-- ietf/utils/test_utils.py | 1 + 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/ietf/doc/tests_charter.py b/ietf/doc/tests_charter.py index 1bd6c1701d..c89f14d408 100644 --- a/ietf/doc/tests_charter.py +++ b/ietf/doc/tests_charter.py @@ -87,6 +87,10 @@ def test_view_revisions(self): class EditCharterTests(TestCase): settings_temp_path_overrides = TestCase.settings_temp_path_overrides + ['CHARTER_PATH'] + def setUp(self): + super().setUp() + (Path(settings.FTP_DIR)/"charter").mkdir() + def write_charter_file(self, charter): (Path(settings.CHARTER_PATH) / f"{charter.name}-{charter.rev}.txt").write_text("This is a charter.") @@ -506,13 +510,16 @@ def test_submit_charter(self): self.assertEqual(charter.rev, next_revision(prev_rev)) self.assertTrue("new_revision" in charter.latest_event().type) - file_contents = ( - Path(settings.CHARTER_PATH) / (charter.name + "-" + charter.rev + ".txt") - ).read_text("utf-8") + charter_path = Path(settings.CHARTER_PATH) / (charter.name + "-" + charter.rev + ".txt") + file_contents = (charter_path).read_text("utf-8") self.assertEqual( file_contents, "Windows line\nMac line\nUnix line\n" + utf_8_snippet.decode("utf-8"), ) + ftp_charter_path = Path(settings.FTP_DIR) / "charter" / charter_path.name + self.assertTrue(ftp_charter_path.exists()) + self.assertTrue(charter_path.samefile(ftp_charter_path)) + def test_submit_initial_charter(self): group = GroupFactory(type_id='wg',acronym='mars',list_email='mars-wg@ietf.org') diff --git a/ietf/doc/views_charter.py b/ietf/doc/views_charter.py index 7fb1a532fc..3a1da4f5d6 100644 --- a/ietf/doc/views_charter.py +++ b/ietf/doc/views_charter.py @@ -436,7 +436,6 @@ def submit(request, name, option=None): events.append(e) # Save file on disk - # TODO : MULTIWRITE - this also needs to be written to ftp charter_filename = charter_filename.with_name( f"{name}-{charter.rev}.txt" ) # update rev @@ -451,7 +450,8 @@ def submit(request, name, option=None): ftp_filename = Path(settings.FTP_DIR) / "charter" / charter_filename.name try: os.link(charter_filename, ftp_filename) # os.link until we are on python>=3.10 - except IOError: + except IOError as e: + debug.show("e") log( "There was an error creating a hardlink at %s pointing to %s" % (ftp_filename, charter_filename) diff --git a/ietf/utils/test_utils.py b/ietf/utils/test_utils.py index ddd274a613..ba35665a8d 100644 --- a/ietf/utils/test_utils.py +++ b/ietf/utils/test_utils.py @@ -211,6 +211,7 @@ class TestCase(django.test.TestCase): 'INTERNET_DRAFT_ARCHIVE_DIR', 'INTERNET_DRAFT_PATH', 'BIBXML_BASE_PATH', + 'FTP_DIR', ] parser = html5lib.HTMLParser(strict=True) From 51cef45b492ef036e2ceb1731f4f881492654574 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Thu, 18 Apr 2024 11:49:13 -0500 Subject: [PATCH 07/16] chore: remove debug --- ietf/doc/views_charter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ietf/doc/views_charter.py b/ietf/doc/views_charter.py index 3a1da4f5d6..a1f70a80f1 100644 --- a/ietf/doc/views_charter.py +++ b/ietf/doc/views_charter.py @@ -451,7 +451,6 @@ def submit(request, name, option=None): try: os.link(charter_filename, ftp_filename) # os.link until we are on python>=3.10 except IOError as e: - debug.show("e") log( "There was an error creating a hardlink at %s pointing to %s" % (ftp_filename, charter_filename) From f2dd48e9007771e129b7bb147e40c5b61a4d0c5a Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Thu, 18 Apr 2024 11:55:24 -0500 Subject: [PATCH 08/16] fix: test charter approval writes to ftp dir --- ietf/doc/tests_charter.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ietf/doc/tests_charter.py b/ietf/doc/tests_charter.py index c89f14d408..e0207fe842 100644 --- a/ietf/doc/tests_charter.py +++ b/ietf/doc/tests_charter.py @@ -815,9 +815,11 @@ def test_approve(self): self.assertTrue(not charter.ballot_open("approve")) self.assertEqual(charter.rev, "01") - self.assertTrue( - (Path(settings.CHARTER_PATH) / ("charter-ietf-%s-%s.txt" % (group.acronym, charter.rev))).exists() - ) + charter_path = Path(settings.CHARTER_PATH) / ("charter-ietf-%s-%s.txt" % (group.acronym, charter.rev)) + charter_ftp_path = Path(settings.FTP_DIR) / "charter" / charter_path.name + self.assertTrue(charter_path.exists()) + self.assertTrue(charter_ftp_path.exists()) + self.assertTrue(charter_path.samefile(charter_ftp_path)) self.assertEqual(len(outbox), 2) # From c8009246cec3f0cba32673d7aa34dc6ae2ccc023 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Thu, 18 Apr 2024 12:40:59 -0500 Subject: [PATCH 09/16] fix: link review revisions into ftp dir --- ietf/doc/tests_review.py | 31 +++++++++++++++++++------------ ietf/doc/views_review.py | 12 ++++++++---- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/ietf/doc/tests_review.py b/ietf/doc/tests_review.py index d9aca94e86..a956fd3287 100644 --- a/ietf/doc/tests_review.py +++ b/ietf/doc/tests_review.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- +from pathlib import Path import datetime, os, shutil import io import tarfile, tempfile, mailbox @@ -47,6 +48,7 @@ def setUp(self): self.review_dir = self.tempdir('review') self.old_document_path_pattern = settings.DOCUMENT_PATH_PATTERN settings.DOCUMENT_PATH_PATTERN = self.review_dir + "/{doc.type_id}/" + (Path(settings.FTP_DIR) / "review").mkdir() self.review_subdir = os.path.join(self.review_dir, "review") if not os.path.exists(self.review_subdir): @@ -57,6 +59,13 @@ def tearDown(self): settings.DOCUMENT_PATH_PATTERN = self.old_document_path_pattern super().tearDown() + def verify_review_files_were_written(self, assignment, expected_content = "This is a review\nwith two lines"): + review_file = Path(self.review_subdir) / f"{assignment.review.name}.txt" + content = review_file.read_text() + self.assertEqual(content, expected_content) + review_ftp_file = Path(settings.FTP_DIR) / "review" / review_file.name + self.assertTrue(review_file.samefile(review_ftp_file)) + def test_request_review(self): doc = WgDraftFactory(group__acronym='mars',rev='01') NewRevisionDocEventFactory(doc=doc,rev='01') @@ -830,8 +839,7 @@ def test_complete_review_upload_content(self): self.assertTrue(assignment.review_request.team.acronym.lower() in assignment.review.name) self.assertTrue(assignment.review_request.doc.rev in assignment.review.name) - with io.open(os.path.join(self.review_subdir, assignment.review.name + ".txt")) as f: - self.assertEqual(f.read(), "This is a review\nwith two lines") + self.verify_review_files_were_written(assignment) self.assertEqual(len(outbox), 1) self.assertIn(assignment.review_request.team.list_email, outbox[0]["To"]) @@ -885,8 +893,7 @@ def test_complete_review_enter_content(self): completed_time_diff = timezone.now() - assignment.completed_on self.assertLess(completed_time_diff, datetime.timedelta(seconds=10)) - with io.open(os.path.join(self.review_subdir, assignment.review.name + ".txt")) as f: - self.assertEqual(f.read(), "This is a review\nwith two lines") + self.verify_review_files_were_written(assignment) self.assertEqual(len(outbox), 1) self.assertIn(assignment.review_request.team.list_email, outbox[0]["To"]) @@ -926,8 +933,7 @@ def test_complete_review_enter_content_by_secretary(self): self.assertLess(event0_time_diff, datetime.timedelta(seconds=10)) self.assertEqual(events[1].time, datetime.datetime(2012, 12, 24, 12, 13, 14, tzinfo=DEADLINE_TZINFO)) - with io.open(os.path.join(self.review_subdir, assignment.review.name + ".txt")) as f: - self.assertEqual(f.read(), "This is a review\nwith two lines") + self.verify_review_files_were_written(assignment) self.assertEqual(len(outbox), 1) self.assertIn(assignment.review_request.team.list_email, outbox[0]["To"]) @@ -1013,8 +1019,7 @@ def test_complete_review_link_to_mailing_list(self, mock): assignment = reload_db_objects(assignment) self.assertEqual(assignment.state_id, "completed") - with io.open(os.path.join(self.review_subdir, assignment.review.name + ".txt")) as f: - self.assertEqual(f.read(), "This is a review\nwith two lines") + self.verify_review_files_were_written(assignment) self.assertEqual(len(outbox), 0) self.assertTrue("http://example.com" in assignment.review.external_url) @@ -1063,8 +1068,7 @@ def test_complete_unsolicited_review_link_to_mailing_list_by_secretary(self, moc self.assertEqual(assignment.reviewer, rev_role.person.role_email('reviewer')) self.assertEqual(assignment.state_id, "completed") - with io.open(os.path.join(self.review_subdir, assignment.review.name + ".txt")) as f: - self.assertEqual(f.read(), "This is a review\nwith two lines") + self.verify_review_files_were_written(assignment) self.assertEqual(len(outbox), 0) self.assertTrue("http://example.com" in assignment.review.external_url) @@ -1172,8 +1176,9 @@ def test_revise_review_enter_content(self): self.assertLess(event_time_diff, datetime.timedelta(seconds=10)) self.assertTrue('revised' in event1.desc.lower()) - with io.open(os.path.join(self.review_subdir, assignment.review.name + ".txt")) as f: - self.assertEqual(f.read(), "This is a review\nwith two lines") + # See https://github.com/ietf-tools/datatracker/issues/6941 + # These are _not_ getting written as a new version as intended. + self.verify_review_files_were_written(assignment) self.assertEqual(len(outbox), 0) @@ -1200,6 +1205,8 @@ def test_revise_review_enter_content(self): # Ensure that a new event was created for the new revision (#2590) self.assertNotEqual(event1.id, event2.id) + self.verify_review_files_were_written(assignment, "This is a revised review") + self.assertEqual(len(outbox), 0) def test_edit_comment(self): diff --git a/ietf/doc/views_review.py b/ietf/doc/views_review.py index 89413c689e..ce55050f22 100644 --- a/ietf/doc/views_review.py +++ b/ietf/doc/views_review.py @@ -7,6 +7,7 @@ import json import os import datetime +from pathlib import Path import requests import email.utils @@ -803,10 +804,13 @@ def complete_review(request, name, assignment_id=None, acronym=None): else: content = form.cleaned_data['review_content'] - # TODO: MULTIWRITE - this also needs to be written to ftp - filename = os.path.join(review.get_file_path(), '{}.txt'.format(review.name)) - with io.open(filename, 'w', encoding='utf-8') as destination: - destination.write(content) + review_path = Path(review.get_file_path()) / f"{review.name}.txt" + review_path.write_text(content) + review_ftp_path = Path(settings.FTP_DIR) / "review" / review_path.name + # See https://github.com/ietf-tools/datatracker/issues/6941 - when that's + # addressed, making this link should not be conditional + if not review_ftp_path.exists(): + os.link(review_path, review_ftp_path) # switch this to Path.hardlink when python>=3.10 is available completion_datetime = timezone.now() if "completion_date" in form.cleaned_data: From f9c1fb1763f127737d0f8970852fdd39a42c057e Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Thu, 18 Apr 2024 13:53:46 -0500 Subject: [PATCH 10/16] fix: link to all archive and ftp on submission post --- ietf/submit/tests.py | 19 +++++++++++++++++++ ietf/submit/utils.py | 4 ++++ 2 files changed, 23 insertions(+) diff --git a/ietf/submit/tests.py b/ietf/submit/tests.py index 6a4051f829..08b898c13a 100644 --- a/ietf/submit/tests.py +++ b/ietf/submit/tests.py @@ -221,6 +221,7 @@ def test_manualpost_cancel(self): class SubmitTests(BaseSubmitTestCase): def setUp(self): super().setUp() + (Path(settings.FTP_DIR) / "internet-drafts").mkdir() # Submit views assume there is a "next" IETF to look for cutoff dates against MeetingFactory(type_id='ietf', date=date_today()+datetime.timedelta(days=180)) @@ -954,6 +955,24 @@ def submit_new_individual(self, formats): self.assertEqual(new_revision.by.name, "Submitter Name") self.verify_bibxml_ids_creation(draft) + repository_path = Path(draft.get_file_name()) + self.assertTrue(repository_path.exists()) # Note that this doesn't check that it has the right _content_ + ftp_path = Path(settings.FTP_DIR) / "internet-drafts" / repository_path.name + self.assertTrue(repository_path.samefile(ftp_path)) + all_archive_path = Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR) / repository_path.name + self.assertTrue(repository_path.samefile(all_archive_path)) + for ext in settings.IDSUBMIT_FILE_TYPES: + if ext == "txt": + continue + variant_path = repository_path.parent / f"{repository_path.stem}.{ext}" + if variant_path.exists(): + variant_ftp_path = Path(settings.FTP_DIR) / "internet-drafts" / variant_path.name + self.assertTrue(variant_path.samefile(variant_ftp_path)) + variant_all_archive_path = Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR) / variant_path.name + self.assertTrue(variant_path.samefile(variant_all_archive_path)) + + + def test_submit_new_individual_txt(self): self.submit_new_individual(["txt"]) diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index 632b1e9a4a..7415ed4694 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -657,6 +657,10 @@ def move_files_to_repository(submission): # TODO: MULTIWRITE, both to the all-archive dir and the ftp dir # while here, question whether having IDSUBMIT_REPOSITORY_PATH and INTERNET_DRAFT_PATH as separate settings makes sense. move(source, dest) + all_archive_dest = Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR) / dest.name + ftp_dest = Path(settings.FTP_DIR) / "internet-drafts" / dest.name + os.link(dest, all_archive_dest) + os.link(dest, ftp_dest) elif dest.exists(): log.log("Intended to move '%s' to '%s', but found source missing while destination exists.") elif ext in submission.file_types.split(','): From 8c042c4fb85e5e1c06fee97ea2c57bd7e2a4eb84 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Thu, 18 Apr 2024 14:01:44 -0500 Subject: [PATCH 11/16] chore: clean comments, move action to github issue --- ietf/submit/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ietf/submit/utils.py b/ietf/submit/utils.py index 7415ed4694..a19c42ecf8 100644 --- a/ietf/submit/utils.py +++ b/ietf/submit/utils.py @@ -654,8 +654,6 @@ def move_files_to_repository(submission): source = Path(settings.IDSUBMIT_STAGING_PATH) / fname dest = Path(settings.IDSUBMIT_REPOSITORY_PATH) / fname if source.exists(): - # TODO: MULTIWRITE, both to the all-archive dir and the ftp dir - # while here, question whether having IDSUBMIT_REPOSITORY_PATH and INTERNET_DRAFT_PATH as separate settings makes sense. move(source, dest) all_archive_dest = Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR) / dest.name ftp_dest = Path(settings.FTP_DIR) / "internet-drafts" / dest.name From df1ca0d853668994c6b55cc9a775a42e876ae9a7 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Thu, 18 Apr 2024 17:11:47 -0500 Subject: [PATCH 12/16] fix: link idindex files to all archive and ftp --- ietf/idindex/tasks.py | 19 ++++++++++++++----- ietf/idindex/tests.py | 31 +++++++++++++++++-------------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/ietf/idindex/tasks.py b/ietf/idindex/tasks.py index 290095b8f5..5a50a80f4f 100644 --- a/ietf/idindex/tasks.py +++ b/ietf/idindex/tasks.py @@ -2,6 +2,7 @@ # # Celery task definitions # +import os import shutil import debug # pyflakes:ignore @@ -10,6 +11,7 @@ from contextlib import AbstractContextManager from pathlib import Path from tempfile import NamedTemporaryFile +from typing import List from .index import all_id_txt, all_id2_txt, id_index_txt @@ -26,10 +28,14 @@ def make_temp_file(self, content): tf.write(content) return tf_path - def move_into_place(self, src_path: Path, dest_path: Path): + def move_into_place(self, src_path: Path, dest_path: Path, hardlink_dirs: List[Path] = []): shutil.move(src_path, dest_path) dest_path.chmod(0o644) self.cleanup_list.remove(src_path) + for path in hardlink_dirs: + target = path / dest_path.name + target.unlink(missing_ok=True) + os.link(dest_path, target) # until python>=3.10 def cleanup(self): for tf_path in self.cleanup_list: @@ -43,9 +49,12 @@ def __exit__(self, exc_type, exc_val, exc_tb): @shared_task def idindex_update_task(): """Update I-D indexes""" + # Why are these not using values from django.conf.settings? id_path = Path("/a/ietfdata/doc/draft/repository") derived_path = Path("/a/ietfdata/derived") download_path = Path("/a/www/www6s/download") + ftp_path = Path("/a/ftp/internet-drafts") + all_archive_path = Path("/a/ietfdata/doc/draft/archive") with TempFileManager("/a/tmp") as tmp_mgr: # Generate copies of new contents @@ -70,17 +79,17 @@ def idindex_update_task(): # Move temp files as-atomically-as-possible into place # TODO: multiwrite - anything being written into id_path also needs to be written to the all archive path - tmp_mgr.move_into_place(all_id_tmpfile, id_path / "all_id.txt") + tmp_mgr.move_into_place(all_id_tmpfile, id_path / "all_id.txt", [ftp_path, all_archive_path]) tmp_mgr.move_into_place(derived_all_id_tmpfile, derived_path / "all_id.txt") tmp_mgr.move_into_place(download_all_id_tmpfile, download_path / "id-all.txt") - tmp_mgr.move_into_place(id_index_tmpfile, id_path / "1id-index.txt") + tmp_mgr.move_into_place(id_index_tmpfile, id_path / "1id-index.txt", [ftp_path, all_archive_path]) tmp_mgr.move_into_place(derived_id_index_tmpfile, derived_path / "1id-index.txt") tmp_mgr.move_into_place(download_id_index_tmpfile, download_path / "id-index.txt") - tmp_mgr.move_into_place(id_abstracts_tmpfile, id_path / "1id-abstracts.txt") + tmp_mgr.move_into_place(id_abstracts_tmpfile, id_path / "1id-abstracts.txt", [ftp_path, all_archive_path]) tmp_mgr.move_into_place(derived_id_abstracts_tmpfile, derived_path / "1id-abstracts.txt") tmp_mgr.move_into_place(download_id_abstracts_tmpfile, download_path / "id-abstract.txt") - tmp_mgr.move_into_place(all_id2_tmpfile, id_path / "all_id2.txt") + tmp_mgr.move_into_place(all_id2_tmpfile, id_path / "all_id2.txt", [ftp_path, all_archive_path]) tmp_mgr.move_into_place(derived_all_id2_tmpfile, derived_path / "all_id2.txt") diff --git a/ietf/idindex/tests.py b/ietf/idindex/tests.py index 31c3aaafbf..44abf805f0 100644 --- a/ietf/idindex/tests.py +++ b/ietf/idindex/tests.py @@ -188,17 +188,20 @@ def test_idindex_update_task( def test_temp_file_manager(self): with TemporaryDirectory() as temp_dir: - temp_path = Path(temp_dir) - with TempFileManager(temp_path) as tfm: - path1 = tfm.make_temp_file("yay") - path2 = tfm.make_temp_file("boo") # do not keep this one - self.assertTrue(path1.exists()) - self.assertTrue(path2.exists()) - dest = temp_path / "yay.txt" - tfm.move_into_place(path1, dest) - # make sure things were cleaned up... - self.assertFalse(path1.exists()) # moved to dest - self.assertFalse(path2.exists()) # left behind - # check destination contents and permissions - self.assertEqual(dest.read_text(), "yay") - self.assertEqual(dest.stat().st_mode & 0o777, 0o644) + with TemporaryDirectory() as other_dir: + temp_path = Path(temp_dir) + other_path = Path(other_dir) + with TempFileManager(temp_path) as tfm: + path1 = tfm.make_temp_file("yay") + path2 = tfm.make_temp_file("boo") # do not keep this one + self.assertTrue(path1.exists()) + self.assertTrue(path2.exists()) + dest = temp_path / "yay.txt" + tfm.move_into_place(path1, dest, [other_path]) + # make sure things were cleaned up... + self.assertFalse(path1.exists()) # moved to dest + self.assertFalse(path2.exists()) # left behind + # check destination contents and permissions + self.assertEqual(dest.read_text(), "yay") + self.assertEqual(dest.stat().st_mode & 0o777, 0o644) + self.assertTrue(dest.samefile(other_path / "yay.txt")) From 716c819fc4b7c3469c89e6396f01f32dbf87732c Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Thu, 18 Apr 2024 17:15:28 -0500 Subject: [PATCH 13/16] chore: deflake --- ietf/doc/views_charter.py | 2 +- ietf/doc/views_review.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ietf/doc/views_charter.py b/ietf/doc/views_charter.py index a1f70a80f1..d44a675f68 100644 --- a/ietf/doc/views_charter.py +++ b/ietf/doc/views_charter.py @@ -450,7 +450,7 @@ def submit(request, name, option=None): ftp_filename = Path(settings.FTP_DIR) / "charter" / charter_filename.name try: os.link(charter_filename, ftp_filename) # os.link until we are on python>=3.10 - except IOError as e: + except IOError: log( "There was an error creating a hardlink at %s pointing to %s" % (ftp_filename, charter_filename) diff --git a/ietf/doc/views_review.py b/ietf/doc/views_review.py index ce55050f22..646b51b09c 100644 --- a/ietf/doc/views_review.py +++ b/ietf/doc/views_review.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- -import io import itertools import json import os From aa1e8803646271af98aa6ca29a5a0b1c3cbbaa4c Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Fri, 19 Apr 2024 14:41:42 -0500 Subject: [PATCH 14/16] chore: remove TODO comment --- ietf/idindex/tasks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ietf/idindex/tasks.py b/ietf/idindex/tasks.py index 5a50a80f4f..9a6fedc74c 100644 --- a/ietf/idindex/tasks.py +++ b/ietf/idindex/tasks.py @@ -78,7 +78,6 @@ def idindex_update_task(): derived_all_id2_tmpfile = tmp_mgr.make_temp_file(all_id2_content) # Move temp files as-atomically-as-possible into place - # TODO: multiwrite - anything being written into id_path also needs to be written to the all archive path tmp_mgr.move_into_place(all_id_tmpfile, id_path / "all_id.txt", [ftp_path, all_archive_path]) tmp_mgr.move_into_place(derived_all_id_tmpfile, derived_path / "all_id.txt") tmp_mgr.move_into_place(download_all_id_tmpfile, download_path / "id-all.txt") From 2f8576562b118b15797d8f159a2415433f72cb49 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Fri, 19 Apr 2024 14:55:05 -0500 Subject: [PATCH 15/16] fix: use settings --- ietf/idindex/tasks.py | 13 +++++++------ ietf/settings.py | 1 + 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/ietf/idindex/tasks.py b/ietf/idindex/tasks.py index 9a6fedc74c..6c80fc0df6 100644 --- a/ietf/idindex/tasks.py +++ b/ietf/idindex/tasks.py @@ -13,6 +13,8 @@ from tempfile import NamedTemporaryFile from typing import List +from django.conf import settings + from .index import all_id_txt, all_id2_txt, id_index_txt @@ -49,12 +51,11 @@ def __exit__(self, exc_type, exc_val, exc_tb): @shared_task def idindex_update_task(): """Update I-D indexes""" - # Why are these not using values from django.conf.settings? - id_path = Path("/a/ietfdata/doc/draft/repository") - derived_path = Path("/a/ietfdata/derived") - download_path = Path("/a/www/www6s/download") - ftp_path = Path("/a/ftp/internet-drafts") - all_archive_path = Path("/a/ietfdata/doc/draft/archive") + id_path = Path(settings.INTERNET_DRAFT_PATH) + derived_path = Path(settings.DERIVED_DIR) + download_path = Path(settings.ALLID_DOWNLOAD_DIR) + ftp_path = Path(settings.FTP_DIR) / "internet-drafts" + all_archive_path = Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR) with TempFileManager("/a/tmp") as tmp_mgr: # Generate copies of new contents diff --git a/ietf/settings.py b/ietf/settings.py index 089cda8d19..179f9e7a2f 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -685,6 +685,7 @@ def skip_unreadable_post(record): MEETING_RECORDINGS_DIR = '/a/www/audio' DERIVED_DIR = '/a/ietfdata/derived' FTP_DIR = '/a/ftp' +ALLID_DOWNLOAD_DIR = '/a/www/www6s/download' DOCUMENT_FORMAT_ALLOWLIST = ["txt", "ps", "pdf", "xml", "html", ] From 6de2457483c1c59a522c6005acdb3951984abda4 Mon Sep 17 00:00:00 2001 From: Robert Sparks Date: Fri, 19 Apr 2024 15:36:39 -0500 Subject: [PATCH 16/16] chore: rename new setting --- ietf/idindex/tasks.py | 2 +- ietf/settings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ietf/idindex/tasks.py b/ietf/idindex/tasks.py index 6c80fc0df6..6ae2efc5ee 100644 --- a/ietf/idindex/tasks.py +++ b/ietf/idindex/tasks.py @@ -53,7 +53,7 @@ def idindex_update_task(): """Update I-D indexes""" id_path = Path(settings.INTERNET_DRAFT_PATH) derived_path = Path(settings.DERIVED_DIR) - download_path = Path(settings.ALLID_DOWNLOAD_DIR) + download_path = Path(settings.ALL_ID_DOWNLOAD_DIR) ftp_path = Path(settings.FTP_DIR) / "internet-drafts" all_archive_path = Path(settings.INTERNET_ALL_DRAFTS_ARCHIVE_DIR) diff --git a/ietf/settings.py b/ietf/settings.py index 179f9e7a2f..cd8c0700a1 100644 --- a/ietf/settings.py +++ b/ietf/settings.py @@ -685,7 +685,7 @@ def skip_unreadable_post(record): MEETING_RECORDINGS_DIR = '/a/www/audio' DERIVED_DIR = '/a/ietfdata/derived' FTP_DIR = '/a/ftp' -ALLID_DOWNLOAD_DIR = '/a/www/www6s/download' +ALL_ID_DOWNLOAD_DIR = '/a/www/www6s/download' DOCUMENT_FORMAT_ALLOWLIST = ["txt", "ps", "pdf", "xml", "html", ]