From dc4c6bbfddc2efbafe621b31ea0c507fb9e95725 Mon Sep 17 00:00:00 2001 From: Fabian Vogt Date: Wed, 22 Nov 2023 17:23:58 +0100 Subject: [PATCH] Handle ZSTD compressed primary.xml.zst Add explicit zstd support to repo2fileprovides.py and port update_repo_handler.py over to use libsolv's transparent decompression. --- CONTENTS.md | 2 +- osclib/repochecks.py | 6 ++---- pkglistgen/update_repo_handler.py | 13 +++++-------- repo2fileprovides.py | 10 +++++++--- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/CONTENTS.md b/CONTENTS.md index 4e6c1b45a..727d208b8 100644 --- a/CONTENTS.md +++ b/CONTENTS.md @@ -178,7 +178,7 @@ Script to generate necessary FileProvides lines needed by OBS from repo data. * Sources: [repo2fileprovides.py](repo2fileprovides.py) * Documentation: -- * Package: -- -* Usage: repo2fileprovides.py primary.xml(.gz) +* Usage: repo2fileprovides.py primary.xml(.gz|.zst) ### Bots diff --git a/osclib/repochecks.py b/osclib/repochecks.py index 70691a812..5fcf204d0 100644 --- a/osclib/repochecks.py +++ b/osclib/repochecks.py @@ -174,17 +174,15 @@ def installcheck(directories, arch, whitelist, ignore_conflicts): def mirrorRepomd(cachedir, url): - # Use repomd.xml to get the location of primary.xml.gz + # Use repomd.xml to get the location of primary.xml.* repoindex = ET.fromstring(requests.get('{}/repodata/repomd.xml'.format(url)).content) primarypath = repoindex.xpath("string(./repo:data[@type='primary']/repo:location/@href)", namespaces={'repo': 'http://linux.duke.edu/metadata/repo'}) - if not primarypath.endswith(".xml.gz"): - raise Exception('unsupported primary format') primarydest = os.path.join(cachedir, os.path.basename(primarypath)) if not os.path.exists(primarydest): # Delete the old files first - for oldfile in glob.glob(glob.escape(cachedir) + "/*.xml.gz"): + for oldfile in glob.glob(glob.escape(cachedir) + "/*.xml.*"): os.unlink(oldfile) with tempfile.NamedTemporaryFile(dir=cachedir) as primarytemp: diff --git a/pkglistgen/update_repo_handler.py b/pkglistgen/update_repo_handler.py index 2d803da24..7381f932c 100644 --- a/pkglistgen/update_repo_handler.py +++ b/pkglistgen/update_repo_handler.py @@ -1,8 +1,6 @@ import glob -import gzip import hashlib -import io import logging import os.path import re @@ -99,12 +97,11 @@ def parse_repomd(repo, baseurl): if sha != sha_expected: raise Exception('checksums do not match {} != {}'.format(sha, sha_expected)) - content = gzip.GzipFile(fileobj=io.BytesIO(primary.content)) os.lseek(f.fileno(), 0, os.SEEK_SET) - f.write(content.read()) + f.write(primary.content) f.flush() os.lseek(f.fileno(), 0, os.SEEK_SET) - repo.add_rpmmd(solv.xfopen_fd(None, f.fileno()), None, 0) + repo.add_rpmmd(solv.xfopen_fd(url, f.fileno()), None, 0) return True return False @@ -132,13 +129,13 @@ def parse_susetags(repo, baseurl): if packages.status_code != requests.codes.ok: raise Exception(url + ' does not exist') - content = gzip.GzipFile(fileobj=io.BytesIO(packages.content)) os.lseek(f.fileno(), 0, os.SEEK_SET) - f.write(content.read()) + f.write(packages.content) f.flush() os.lseek(f.fileno(), 0, os.SEEK_SET) try: - repo.add_susetags(f, defvendorid, None, solv.Repo.REPO_NO_INTERNALIZE | solv.Repo.SUSETAGS_RECORD_SHARES) + repo.add_susetags(solv.xfopen_fd(url, f.fileno()), defvendorid, None, + solv.Repo.REPO_NO_INTERNALIZE | solv.Repo.SUSETAGS_RECORD_SHARES) except TypeError: logger.error(f"Failed to add susetags for {url}") return False diff --git a/repo2fileprovides.py b/repo2fileprovides.py index e8a3de8d5..9d3042ead 100755 --- a/repo2fileprovides.py +++ b/repo2fileprovides.py @@ -1,18 +1,22 @@ #!/usr/bin/python3 import gzip +import pyzstd import sys from collections import defaultdict from lxml import etree if len(sys.argv) != 2: print("Script to generate necessary FileProvides lines needed by OBS from repo data.", file=sys.stderr) - print("Usage: repo2fileprovides.py primary.xml(.gz)", file=sys.stderr) + print("Usage: repo2fileprovides.py primary.xml(.gz|.zst)", file=sys.stderr) sys.exit(1) repofilename = sys.argv[1] -xmlfile = open(repofilename, 'rb') if repofilename.endswith('.gz'): - xmlfile = gzip.GzipFile(fileobj=xmlfile) + xmlfile = gzip.GzipFile(repofilename) +elif repofilename.endswith('.zst'): + xmlfile = pyzstd.ZstdFile(repofilename) +else: + xmlfile = open(repofilename, 'rb') NS = {'md': 'http://linux.duke.edu/metadata/common', 'rpm': 'http://linux.duke.edu/metadata/rpm'}