Permalink
Browse files

generator: Look at the previously written Components file to decide i…

…f we need to update

It's not enough to see if there are new components, since things can
move between suites, for example when transitioning from unstable to
testing. Instead we now read the previous Components file, if any, and
write a new one if we either find a package that wasn't in there before,
or don't find a package that was (deletions).
  • Loading branch information...
1 parent 0268ddf commit 86fa951946c5af63b7273fe78f80cdc7be455907 @iainlane committed Mar 17, 2016
Showing with 83 additions and 74 deletions.
  1. +83 −74 dep11/generator.py
View
@@ -25,6 +25,7 @@
from argparse import ArgumentParser
import multiprocessing as mp
import logging as log
+import yaml
from dep11 import DataCache, MetadataExtractor
from .component import get_dep11_header
@@ -49,7 +50,7 @@ def extract_metadata(mde, sn, pkg):
cpts = mde.process(pkg)
msgtxt = "Processed ({0}/{1}): %s (%s/%s), found %i" % (pkg.name, sn, pkg.arch, len(cpts))
- return (msgtxt, all(not x.has_ignore_reason() for x in cpts))
+ return msgtxt
class DEP11Generator:
@@ -174,7 +175,6 @@ def process_suite(self, suite_name):
for component in suite['components']:
all_cpt_pkgs = list()
- new_components = False
for arch in suite['architectures']:
pkglist = self._get_packages_for(suite_name, component, arch)
@@ -189,94 +189,103 @@ def process_suite(self, suite_name):
pkgs_todo[pkid] = pkg
if not pkgs_todo:
- log.info("Skipped %s/%s/%s, no new packages to process." % (suite_name, component, arch))
- continue
-
- # set up metadata extractor
- icon_theme = suite.get('useIconTheme')
- iconh = IconHandler(suite_name, component, arch, self._archive_root,
- icon_theme, base_suite_name=suite.get('baseSuite'))
- iconh.set_wanted_icon_sizes(self._icon_sizes)
- mde = MetadataExtractor(suite_name,
- component,
- self._cache,
- iconh)
-
- # Multiprocessing can't cope with LMDB open in the cache,
- # but instead of throwing an error or doing something else
- # that makes debugging easier, it just silently skips each
- # multprocessing task. Stupid thing.
- # (remember to re-open the cache later)
- self._cache.close()
-
- # set up multiprocessing
- with mp.Pool(maxtasksperchild=24) as pool:
- count = 1
- def handle_results(result):
- nonlocal count
- nonlocal new_components
- (message, any_components) = result
- new_components = new_components or any_components
- log.info(message.format(count, len(pkgs_todo)))
- count += 1
-
- def handle_error(e):
- traceback.print_exception(type(e), e, e.__traceback__)
- log.error(str(e))
- pool.terminate()
- sys.exit(5)
-
- log.info("Processing %i packages in %s/%s/%s" % (len(pkgs_todo), suite_name, component, arch))
- for pkid, pkg in pkgs_todo.items():
- package_fname = os.path.join (self._archive_root, pkg.filename)
- if not os.path.exists(package_fname):
- log.warning('Package not found: %s' % (package_fname))
- continue
- pkg.filename = package_fname
- pool.apply_async(extract_metadata,
- (mde, suite_name, pkg),
- callback=handle_results, error_callback=handle_error)
- pool.close()
- pool.join()
-
- # reopen the cache, we need it
- self._cache.reopen()
+ log.info("No new packages to process in %s/%s/%s." % (suite_name, component, arch))
+ else:
+ # set up metadata extractor
+ icon_theme = suite.get('useIconTheme')
+ iconh = IconHandler(suite_name, component, arch, self._archive_root,
+ icon_theme, base_suite_name=suite.get('baseSuite'))
+ iconh.set_wanted_icon_sizes(self._icon_sizes)
+ mde = MetadataExtractor(suite_name,
+ component,
+ self._cache,
+ iconh)
+
+ # Multiprocessing can't cope with LMDB open in the cache,
+ # but instead of throwing an error or doing something else
+ # that makes debugging easier, it just silently skips each
+ # multprocessing task. Stupid thing.
+ # (remember to re-open the cache later)
+ self._cache.close()
+
+ # set up multiprocessing
+ with mp.Pool(maxtasksperchild=24) as pool:
+ count = 1
+ def handle_results(result):
+ nonlocal count
+ log.info(result.format(count, len(pkgs_todo)))
+ count += 1
+
+ def handle_error(e):
+ traceback.print_exception(type(e), e, e.__traceback__)
+ log.error(str(e))
+ pool.terminate()
+ sys.exit(5)
+
+ log.info("Processing %i packages in %s/%s/%s" % (len(pkgs_todo), suite_name, component, arch))
+ for pkid, pkg in pkgs_todo.items():
+ package_fname = os.path.join (self._archive_root, pkg.filename)
+ if not os.path.exists(package_fname):
+ log.warning('Package not found: %s' % (package_fname))
+ continue
+ pkg.filename = package_fname
+ pool.apply_async(extract_metadata,
+ (mde, suite_name, pkg),
+ callback=handle_results, error_callback=handle_error)
+ pool.close()
+ pool.join()
+
+ # reopen the cache, we need it
+ self._cache.reopen()
hints_dir = os.path.join(self._export_dir, "hints", suite_name, component)
if not os.path.exists(hints_dir):
os.makedirs(hints_dir)
hints_fname = os.path.join(hints_dir, "DEP11Hints_%s.yml.gz" % (arch))
hints_f = gzip.open(hints_fname+".new", 'wb')
- dep11_header = get_dep11_header(self._repo_name, suite_name, component, os.path.join(self._dep11_url, component), suite.get('dataPriority', 0))
-
dep11_dir = os.path.join(self._export_dir, "data", suite_name, component)
- if not os.path.exists(dep11_dir):
- os.makedirs(dep11_dir)
-
- if not new_components:
- log.info("Skipping %s/%s/%s, no components in any of the new packages.", suite_name, component, arch)
- else:
- # now write data to disk
- data_fname = os.path.join(dep11_dir, "Components-%s.yml.gz" % (arch))
-
- data_f = gzip.open(data_fname+".new", 'wb')
-
- data_f.write(bytes(dep11_header, 'utf-8'))
-
+ data_fname = os.path.join(dep11_dir, "Components-%s.yml.gz" % (arch))
+ data_output = b''
+ last_seen_pkgs = set()
+ try:
+ for y in yaml.load_all(gzip.open(data_fname, 'r')):
+ if 'Package' in y:
+ last_seen_pkgs.add(y['Package'])
+ except FileNotFoundError:
+ pass
+
+ data_output = b''
+ write_components = False
+ # now write data to disk
for pkg in pkglist:
pkid = pkg.pkid
- if new_components:
- data = self._cache.get_metadata_for_pkg(pkid)
- if data:
- data_f.write(bytes(data, 'utf-8'))
+ data = self._cache.get_metadata_for_pkg(pkid)
+ if data:
+ data_output += bytes(data, 'utf-8')
+ if pkg.name not in last_seen_pkgs:
+ log.info ("Haven't seen %s before, so will write output.", pkid)
+ write_components = True
+ else:
+ last_seen_pkgs.remove(pkg.name)
hint = self._cache.get_hints(pkid)
if hint:
hints_f.write(bytes(hint, 'utf-8'))
- if new_components:
+ if write_components or last_seen_pkgs:
+ dep11_header = get_dep11_header(self._repo_name, suite_name, component, os.path.join(self._dep11_url, component), suite.get('dataPriority', 0))
+
+ if not os.path.exists(dep11_dir):
+ os.makedirs(dep11_dir)
+
+ data_f = gzip.open(data_fname+".new", 'wb')
+
+ data_f.write(bytes(dep11_header, 'utf-8') + data_output)
+
data_f.close()
safe_move_file(data_fname+".new", data_fname)
+ else:
+ log.info("Skipping %s/%s/%s, no components in any of the new packages, and no packages moved suite.", suite_name, component, arch)
hints_f.close()
safe_move_file(hints_fname+".new", hints_fname)

0 comments on commit 86fa951

Please sign in to comment.