@@ -25,6 +25,7 @@
from argparse import ArgumentParser
import multiprocessing as mp
import logging as log
+import yaml
from dep11 import DataCache, MetadataExtractor
from .component import get_dep11_header
@@ -49,7 +50,7 @@ def extract_metadata(mde, sn, pkg):
cpts = mde.process(pkg)
msgtxt = " Processed ({0} /{1} ): %s (%s /%s ), found %i " % (pkg.name, sn, pkg.arch, len (cpts))
- return ( msgtxt, all ( not x.has_ignore_reason() for x in cpts))
+ return msgtxt
class DEP11Generator :
@@ -174,7 +175,6 @@ def process_suite(self, suite_name):
for component in suite[' components' ]:
all_cpt_pkgs = list ()
- new_components = False
for arch in suite[' architectures' ]:
pkglist = self ._get_packages_for(suite_name, component, arch)
@@ -189,94 +189,103 @@ def process_suite(self, suite_name):
pkgs_todo[pkid] = pkg
if not pkgs_todo:
- log.info(" Skipped %s /%s /%s , no new packages to process." % (suite_name, component, arch))
- continue
-
- # set up metadata extractor
- icon_theme = suite.get(' useIconTheme' )
- iconh = IconHandler(suite_name, component, arch, self ._archive_root,
- icon_theme, base_suite_name = suite.get(' baseSuite' ))
- iconh.set_wanted_icon_sizes(self ._icon_sizes)
- mde = MetadataExtractor(suite_name,
- component,
- self ._cache,
- iconh)
-
- # Multiprocessing can't cope with LMDB open in the cache,
- # but instead of throwing an error or doing something else
- # that makes debugging easier, it just silently skips each
- # multprocessing task. Stupid thing.
- # (remember to re-open the cache later)
- self ._cache.close()
-
- # set up multiprocessing
- with mp.Pool(maxtasksperchild = 24 ) as pool:
- count = 1
- def handle_results (result ):
- nonlocal count
- nonlocal new_components
- (message, any_components) = result
- new_components = new_components or any_components
- log.info(message.format(count, len (pkgs_todo)))
- count += 1
-
- def handle_error (e ):
- traceback.print_exception(type (e), e, e.__traceback__ )
- log.error(str (e))
- pool.terminate()
- sys.exit(5 )
-
- log.info(" Processing %i packages in %s /%s /%s " % (len (pkgs_todo), suite_name, component, arch))
- for pkid, pkg in pkgs_todo.items():
- package_fname = os.path.join (self ._archive_root, pkg.filename)
- if not os.path.exists(package_fname):
- log.warning(' Package not found: %s ' % (package_fname))
- continue
- pkg.filename = package_fname
- pool.apply_async(extract_metadata,
- (mde, suite_name, pkg),
- callback = handle_results, error_callback = handle_error)
- pool.close()
- pool.join()
-
- # reopen the cache, we need it
- self ._cache.reopen()
+ log.info(" No new packages to process in %s /%s /%s ." % (suite_name, component, arch))
+ else :
+ # set up metadata extractor
+ icon_theme = suite.get(' useIconTheme' )
+ iconh = IconHandler(suite_name, component, arch, self ._archive_root,
+ icon_theme, base_suite_name = suite.get(' baseSuite' ))
+ iconh.set_wanted_icon_sizes(self ._icon_sizes)
+ mde = MetadataExtractor(suite_name,
+ component,
+ self ._cache,
+ iconh)
+
+ # Multiprocessing can't cope with LMDB open in the cache,
+ # but instead of throwing an error or doing something else
+ # that makes debugging easier, it just silently skips each
+ # multprocessing task. Stupid thing.
+ # (remember to re-open the cache later)
+ self ._cache.close()
+
+ # set up multiprocessing
+ with mp.Pool(maxtasksperchild = 24 ) as pool:
+ count = 1
+ def handle_results (result ):
+ nonlocal count
+ log.info(result.format(count, len (pkgs_todo)))
+ count += 1
+
+ def handle_error (e ):
+ traceback.print_exception(type (e), e, e.__traceback__ )
+ log.error(str (e))
+ pool.terminate()
+ sys.exit(5 )
+
+ log.info(" Processing %i packages in %s /%s /%s " % (len (pkgs_todo), suite_name, component, arch))
+ for pkid, pkg in pkgs_todo.items():
+ package_fname = os.path.join (self ._archive_root, pkg.filename)
+ if not os.path.exists(package_fname):
+ log.warning(' Package not found: %s ' % (package_fname))
+ continue
+ pkg.filename = package_fname
+ pool.apply_async(extract_metadata,
+ (mde, suite_name, pkg),
+ callback = handle_results, error_callback = handle_error)
+ pool.close()
+ pool.join()
+
+ # reopen the cache, we need it
+ self ._cache.reopen()
hints_dir = os.path.join(self ._export_dir, " hints" , suite_name, component)
if not os.path.exists(hints_dir):
os.makedirs(hints_dir)
hints_fname = os.path.join(hints_dir, " DEP11Hints_%s .yml.gz" % (arch))
hints_f = gzip.open(hints_fname+ " .new" , ' wb' )
- dep11_header = get_dep11_header(self ._repo_name, suite_name, component, os.path.join(self ._dep11_url, component), suite.get(' dataPriority' , 0 ))
-
dep11_dir = os.path.join(self ._export_dir, " data" , suite_name, component)
- if not os.path.exists (dep11_dir):
- os.makedirs(dep11_dir)
-
- if not new_components :
- log.info( " Skipping %s / %s / %s , no components in any of the new packages. " , suite_name, component, arch)
- else :
- # now write data to disk
- data_fname = os.path.join(dep11_dir, " Components- %s .yml.gz " % (arch))
-
- data_f = gzip.open(data_fname + " .new " , ' wb ' )
-
- data_f.write( bytes (dep11_header, ' utf-8 ' ))
-
+ data_fname = os.path.join (dep11_dir, " Components- %s .yml.gz " % (arch))
+ data_output = b ' '
+ last_seen_pkgs = set ()
+ try :
+ for y in yaml.load_all(gzip.open(data_fname, ' r ' )):
+ if ' Package ' in y :
+ last_seen_pkgs.add(y[ ' Package ' ])
+ except FileNotFoundError :
+ pass
+
+ data_output = b ' '
+ write_components = False
+ # now write data to disk
for pkg in pkglist:
pkid = pkg.pkid
- if new_components:
- data = self ._cache.get_metadata_for_pkg(pkid)
- if data:
- data_f.write(bytes (data, ' utf-8' ))
+ data = self ._cache.get_metadata_for_pkg(pkid)
+ if data:
+ data_output += bytes (data, ' utf-8' )
+ if pkg.name not in last_seen_pkgs:
+ log.info (" Haven't seen %s before, so will write output." , pkid)
+ write_components = True
+ else :
+ last_seen_pkgs.remove(pkg.name)
hint = self ._cache.get_hints(pkid)
if hint:
hints_f.write(bytes (hint, ' utf-8' ))
- if new_components:
+ if write_components or last_seen_pkgs:
+ dep11_header = get_dep11_header(self ._repo_name, suite_name, component, os.path.join(self ._dep11_url, component), suite.get(' dataPriority' , 0 ))
+
+ if not os.path.exists(dep11_dir):
+ os.makedirs(dep11_dir)
+
+ data_f = gzip.open(data_fname+ " .new" , ' wb' )
+
+ data_f.write(bytes (dep11_header, ' utf-8' ) + data_output)
+
data_f.close()
safe_move_file(data_fname+ " .new" , data_fname)
+ else :
+ log.info(" Skipping %s /%s /%s , no components in any of the new packages, and no packages moved suite." , suite_name, component, arch)
hints_f.close()
safe_move_file(hints_fname+ " .new" , hints_fname)
0 comments on commit
86fa951