From 4ffd44a446aa0d2e9e4cfb267703babd53e2e27b Mon Sep 17 00:00:00 2001 From: Mark Grimes Date: Wed, 17 Jun 2015 01:03:15 +0200 Subject: [PATCH 1/2] Add override to scan_opcodes_25 which also scans for process.load statements --- FWCore/ParameterSet/python/TreeCrawler.py | 136 +++++++++++++++++++++- 1 file changed, 134 insertions(+), 2 deletions(-) diff --git a/FWCore/ParameterSet/python/TreeCrawler.py b/FWCore/ParameterSet/python/TreeCrawler.py index 9118f086e98b8..bcf89ea215315 100755 --- a/FWCore/ParameterSet/python/TreeCrawler.py +++ b/FWCore/ParameterSet/python/TreeCrawler.py @@ -23,7 +23,7 @@ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -import sys, os, inspect, copy +import sys, os, inspect, copy, struct, dis import modulefinder def packageNameFromFilename(name): @@ -107,7 +107,7 @@ def import_hook(self, name, caller=None, fromlist=None, level=-1): def import_module(self,partnam,fqname,parent): - if partnam in ("FWCore","os"): + if partnam in ("FWCore","os","unittest"): r = None else: r = modulefinder.ModuleFinder.import_module(self,partnam,fqname,parent) @@ -126,6 +126,138 @@ def load_module(self, fqname, fp, pathname, (suffix, mode, type)): self._types[r.__name__] = type return r + def scan_opcodes_25(self, co, unpack = struct.unpack): + """ + This is basically just the default opcode scanner from ModuleFinder, but extended to also + look for "process.load()' commands. This is complicated by the fact that we don't + know what the name of the Process object is (usually "process", but doesn't have to be). + So we have to also scan for declarations of Process objects. This is in turn is complicated + by the fact that we don't know how FWCore.ParameterSet.Config has been imported (usually + "... as cms" but doesn't have to be) so we also have to scan for that import. + + So, the additional parts are: + + 1) Scan for the FWCore.ParameterSet.Config import and note down what name it's imported as. + 2) Scan for Process declarations using the name noted in (1), record any of the object names. + 3) Scan for "load" method calls to anything noted in (2) and yield their arguments. + + The ModuleFinder.scan_opcodes_25 implementation I based this on I got from + https://hg.python.org/cpython/file/2.7/Lib/modulefinder.py#l364 + """ + # Scan the code, and yield 'interesting' opcode combinations + # Python 2.5 version (has absolute and relative imports) + code = co.co_code + names = co.co_names + consts = co.co_consts + LOAD_CONST = modulefinder.LOAD_CONST + IMPORT_NAME = modulefinder.IMPORT_NAME + STORE_OPS = modulefinder.STORE_OPS + STORE_NAME = modulefinder.STORE_NAME + HAVE_ARGUMENT = modulefinder.HAVE_ARGUMENT + LOAD_NAME = chr(dis.opname.index('LOAD_NAME')) + LOAD_ATTR = chr(dis.opname.index('LOAD_ATTR')) + LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME + + try : + fwCoreIndex=names.index('FWCore.ParameterSet.Config') + loadMethodOpargs=struct.pack( '=len(processDefinitionOpcodes) : # Check for step (2) in comment at top + isProcessDefinition=True + for index in xrange( len(processDefinitionOpcodes) ) : + if code[index]!=processDefinitionOpcodes[index] : + isProcessDefinition=False + if isProcessDefinition : + code=code[len(processDefinitionOpcodes):] # Trim off what I've just checked + # The only thing I'm interested in is what name is given to the new object, + # so that I can search for ".load( )". + while code[0]!=STORE_NAME : + if code[0] >= HAVE_ARGUMENT: + code = code[3:] + else: + code = code[1:] + # I've hit the opcode which tells me what the name of the object is (probably "process" + # but I don't know for sure). Note that I'm storing the opcodes for retrieving the object + # rather than a string of the object name + processObjects.append( (LOAD_NAME,code[1],code[2]) ) + continue + + # Wasn't a declaration of a new Process object. See if it is accessing a pre existing one + if len(code)>=9 : # Check for step (3) in comment at top + for processObject in processObjects : + if processObject==(code[0],code[1],code[2]) : + # One of the process objects is being accessed. See if it's calling the "load" method + if (code[3],code[4],code[5])==loadMethodOpcodes : + if code[6]==LOAD_CONST : + moduleNameIndex=unpack('= HAVE_ARGUMENT: + code = code[3:] + else: + code = code[1:] + def transformIntoGraph(depgraph,toplevel): packageDict = {} From 0a444c07cd7ff11cf01bd2a1ec488a757ad0ce6e Mon Sep 17 00:00:00 2001 From: Mark Grimes Date: Thu, 18 Jun 2015 15:45:43 +0200 Subject: [PATCH 2/2] Change scan_opcodes_25 override to look for any "load" call so that is also works with customisation functions --- FWCore/ParameterSet/python/TreeCrawler.py | 179 +++++++++++----------- 1 file changed, 90 insertions(+), 89 deletions(-) diff --git a/FWCore/ParameterSet/python/TreeCrawler.py b/FWCore/ParameterSet/python/TreeCrawler.py index bcf89ea215315..821a320c089c2 100755 --- a/FWCore/ParameterSet/python/TreeCrawler.py +++ b/FWCore/ParameterSet/python/TreeCrawler.py @@ -23,7 +23,7 @@ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -import sys, os, inspect, copy, struct, dis +import sys, os, inspect, copy, struct, dis, imp import modulefinder def packageNameFromFilename(name): @@ -107,7 +107,7 @@ def import_hook(self, name, caller=None, fromlist=None, level=-1): def import_module(self,partnam,fqname,parent): - if partnam in ("FWCore","os","unittest"): + if partnam in ("os","unittest"): r = None else: r = modulefinder.ModuleFinder.import_module(self,partnam,fqname,parent) @@ -129,19 +129,16 @@ def load_module(self, fqname, fp, pathname, (suffix, mode, type)): def scan_opcodes_25(self, co, unpack = struct.unpack): """ This is basically just the default opcode scanner from ModuleFinder, but extended to also - look for "process.load()' commands. This is complicated by the fact that we don't - know what the name of the Process object is (usually "process", but doesn't have to be). - So we have to also scan for declarations of Process objects. This is in turn is complicated - by the fact that we don't know how FWCore.ParameterSet.Config has been imported (usually - "... as cms" but doesn't have to be) so we also have to scan for that import. - - So, the additional parts are: - - 1) Scan for the FWCore.ParameterSet.Config import and note down what name it's imported as. - 2) Scan for Process declarations using the name noted in (1), record any of the object names. - 3) Scan for "load" method calls to anything noted in (2) and yield their arguments. - - The ModuleFinder.scan_opcodes_25 implementation I based this on I got from + look for "process.load()' commands. Since the Process object might not necassarily + be called "process", it scans for a call to a "load" method with a single parameter on + *any* object. If one is found it checks if the parameter is a string that refers to a valid + python module in the local or global area. If it does, the scanner assumes this was a call + to a Process object and yields the module name. + It's not possible to scan first for Process object declarations to get the name of the + objects since often (e.g. for customisation functions) the object is passed to a function + in a different file. + + The ModuleFinder.scan_opcodes_25 implementation this is based was taken from https://hg.python.org/cpython/file/2.7/Lib/modulefinder.py#l364 """ # Scan the code, and yield 'interesting' opcode combinations @@ -152,69 +149,78 @@ def scan_opcodes_25(self, co, unpack = struct.unpack): LOAD_CONST = modulefinder.LOAD_CONST IMPORT_NAME = modulefinder.IMPORT_NAME STORE_OPS = modulefinder.STORE_OPS - STORE_NAME = modulefinder.STORE_NAME HAVE_ARGUMENT = modulefinder.HAVE_ARGUMENT - LOAD_NAME = chr(dis.opname.index('LOAD_NAME')) LOAD_ATTR = chr(dis.opname.index('LOAD_ATTR')) + LOAD_NAME = chr(dis.opname.index('LOAD_NAME')) + CALL_FUNCTION = chr(dis.opname.index('CALL_FUNCTION')) LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME try : - fwCoreIndex=names.index('FWCore.ParameterSet.Config') - loadMethodOpargs=struct.pack( '=len(processDefinitionOpcodes) : # Check for step (2) in comment at top - isProcessDefinition=True - for index in xrange( len(processDefinitionOpcodes) ) : - if code[index]!=processDefinitionOpcodes[index] : - isProcessDefinition=False - if isProcessDefinition : - code=code[len(processDefinitionOpcodes):] # Trim off what I've just checked - # The only thing I'm interested in is what name is given to the new object, - # so that I can search for ".load( )". - while code[0]!=STORE_NAME : - if code[0] >= HAVE_ARGUMENT: - code = code[3:] - else: - code = code[1:] - # I've hit the opcode which tells me what the name of the object is (probably "process" - # but I don't know for sure). Note that I'm storing the opcodes for retrieving the object - # rather than a string of the object name - processObjects.append( (LOAD_NAME,code[1],code[2]) ) + # Check to see if this is a call to a "load" method + if loadMethodOpcodes!=None and len(code)>=9 : # Need at least 9 codes for the full call + if code[:3]==loadMethodOpcodes : + # The attribute "load" is being accessed, need to make sure this is a function call. + # I'll look ahead and see if the CALL_FUNCTION code is used - this could be in a different + # place depending on the number of arguments, but I'm only interested in methods with a + # single argument so I know exactly where CALL_FUNCTION should be. + if code[6]==CALL_FUNCTION : + # I know this is calling a method called "load" with one argument. I need + # to find out what the argument is. Note that I still don't know if this is + # on a cms.Process object. + indexInTable=unpack('=9 : # Check for step (3) in comment at top - for processObject in processObjects : - if processObject==(code[0],code[1],code[2]) : - # One of the process objects is being accessed. See if it's calling the "load" method - if (code[3],code[4],code[5])==loadMethodOpcodes : - if code[6]==LOAD_CONST : - moduleNameIndex=unpack('" + print " ",node.name + currentStack[-1].dependencies.remove(node) + except ValueError: + # No recursive loop found, so continue traversing the tree + currentStack.append( node ) + for subnode in node.dependencies : + removeRecursiveLoops( subnode, verbose, currentStack[:] ) def transformIntoGraph(depgraph,toplevel): packageDict = {} @@ -276,6 +276,7 @@ def transformIntoGraph(depgraph,toplevel): package = packageDict[key] package.dependencies = [packageDict[name] for name in value.keys() if name.count(".") == 2] + removeRecursiveLoops( packageDict[toplevel] ) # find and return the top level config return packageDict[toplevel]