From a2b302d78416c8d214a2a72abc27f1062daf5ef3 Mon Sep 17 00:00:00 2001
From: Heshy Roskes <jroskes1@jhu.edu>
Date: Tue, 13 Sep 2016 21:35:10 +0200
Subject: [PATCH 1/5] - split __createSnippet into 3 functions - filter the
 file list when possible to avoid unnecessary opening and closing - add
 function for HipPy file list

---
 .../python/TkAlAllInOneTool/dataset.py        | 139 ++++++++++++------
 1 file changed, 95 insertions(+), 44 deletions(-)

diff --git a/Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py b/Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py
index a4b029262bd76..d9550f614e1ca 100644
--- a/Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py
+++ b/Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py
@@ -112,42 +112,9 @@ def __chunks( self, theList, n ):
                                "input = cms.untracked.int32(%(nEvents)s) )\n"
                                "%(skipEventsString)s\n")
 
-    def __createSnippet( self, jsonPath = None, begin = None, end = None,
-                         firstRun = None, lastRun = None, repMap = None,
-                         crab = False, parent = False ):
-        if firstRun:
-            firstRun = int( firstRun )
-        if lastRun:
-            lastRun = int( lastRun )
-        if ( begin and firstRun ) or ( end and lastRun ):
-            msg = ( "The Usage of "
-                    + "'begin' & 'firstRun' " * int( bool( begin and
-                                                           firstRun ) )
-                    + "and " * int( bool( ( begin and firstRun ) and
-                                         ( end and lastRun ) ) )
-                    + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
-                    + "is ambigous." )
-            raise AllInOneError( msg )
-        if begin or end:
-            ( firstRun, lastRun ) = self.convertTimeToRun(
-                begin = begin, end = end, firstRun = firstRun,
-                lastRun = lastRun )
-        if ( firstRun and lastRun ) and ( firstRun > lastRun ):
-            msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
-                    "chosen is greater than the upper time/runrange limit "
-                    "('end'/'lastRun').")
-            raise AllInOneError( msg )
-        if self.predefined() and (jsonPath or begin or end or firstRun or lastRun):
-            msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'"
-                    "only work for official datasets, not predefined _cff.py files" )
-            raise AllInOneError( msg )
-        goodLumiSecStr = ""
-        lumiStr = ""
+    def __lumiSelectionSnippet( self, jsonPath = None, firstRun = None, lastRun = None ):
         lumiSecExtend = ""
         if firstRun or lastRun or jsonPath:
-            goodLumiSecStr = ( "lumiSecs = cms.untracked."
-                               "VLuminosityBlockRange()\n" )
-            lumiStr = "                    lumisToProcess = lumiSecs,\n"
             if not jsonPath:
                 selectedRunList = self.__getRunList()
                 if firstRun:
@@ -224,15 +191,11 @@ def __createSnippet( self, jsonPath = None, begin = None, end = None,
             else:
                 msg = "You are trying to run a validation without any runs!  Check that:"
                 if firstRun or lastRun:
-                    msg += "\n - firstRun and lastRun are correct for this dataset, and there are runs in between containing data"
+                    msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data"
                 if jsonPath:
                     msg += "\n - your JSON file is correct for this dataset, and the runs contain data"
                 if (firstRun or lastRun) and jsonPath:
-                    msg += "\n - firstRun and lastRun are consistent with your JSON file"
-                if begin:
-                    msg = msg.replace("firstRun", "begin")
-                if end:
-                    msg = msg.replace("lastRun", "end")
+                    msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file"
                 raise AllInOneError(msg)
 
         else:
@@ -240,23 +203,66 @@ def __createSnippet( self, jsonPath = None, begin = None, end = None,
             self.__firstusedrun = int(self.__findInJson(self.__getRunList()[0],"run_number"))
             self.__lastusedrun = int(self.__findInJson(self.__getRunList()[-1],"run_number"))
 
+        return lumiSecExtend
+
+    def __fileListSnippet(self, crab=False, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
         if crab:
             files = ""
         else:
-            splitFileList = list( self.__chunks( self.fileList(), 255 ) )
+            splitFileList = list( self.__chunks( self.fileList(firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
             fileStr = [ "',\n'".join( files ) for files in splitFileList ]
             fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \
                         for files in fileStr ]
             files = "\n".join( fileStr )
 
             if parent:
-                splitParentFileList = list( self.__chunks( self.fileList(parent = True), 255 ) )
+                splitParentFileList = list( self.__chunks( self.fileList(parent=True, firstRun=firstRun, lastRun=lastRun, forcerunselection=forcerunselection), 255 ) )
                 parentFileStr = [ "',\n'".join( parentFiles ) for parentFiles in splitParentFileList ]
                 parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \
                             for parentFiles in parentFileStr ]
                 parentFiles = "\n".join( parentFileStr )
                 files += "\n\n" + parentFiles
 
+        return files
+
+    def __createSnippet( self, jsonPath = None, begin = None, end = None,
+                         firstRun = None, lastRun = None, repMap = None,
+                         crab = False, parent = False ):
+
+        if firstRun:
+            firstRun = int( firstRun )
+        if lastRun:
+            lastRun = int( lastRun )
+        if ( begin and firstRun ) or ( end and lastRun ):
+            msg = ( "The Usage of "
+                    + "'begin' & 'firstRun' " * int( bool( begin and
+                                                           firstRun ) )
+                    + "and " * int( bool( ( begin and firstRun ) and
+                                         ( end and lastRun ) ) )
+                    + "'end' & 'lastRun' " * int( bool( end and lastRun ) )
+                    + "is ambigous." )
+            raise AllInOneError( msg )
+        if begin or end:
+            ( firstRun, lastRun ) = self.convertTimeToRun(
+                begin = begin, end = end, firstRun = firstRun,
+                lastRun = lastRun )
+        if ( firstRun and lastRun ) and ( firstRun > lastRun ):
+            msg = ( "The lower time/runrange limit ('begin'/'firstRun') "
+                    "chosen is greater than the upper time/runrange limit "
+                    "('end'/'lastRun').")
+            raise AllInOneError( msg )
+        if self.predefined() and (jsonPath or begin or end or firstRun or lastRun):
+            msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'"
+                    "only work for official datasets, not predefined _cff.py files" )
+            raise AllInOneError( msg )
+
+        lumiSecExtend = self.__lumiSelectionSnippet(jsonPath=jsonPath, firstRun=firstRun, lastRun=lastRun)
+        lumiStr = goodLumiSecStr = ""
+        if lumiSecExtend:
+            goodLumiSecStr = "lumiSecs = cms.untracked.VLuminosityBlockRange()\n"
+            lumiStr = "                    lumisToProcess = lumiSecs,\n"
+
+        files = self.__fileListSnippet(crab=crab, parent=parent, firstRun=firstRun, lastRun=lastRun, forcerunselection=False)
 
         theMap = repMap
         theMap["files"] = files
@@ -820,15 +826,60 @@ def dump_cff( self, outName = None, jsonPath = None, begin = None,
         theFile.close()
         return
 
-    def fileList( self, parent = False ):
+    def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun):
+        with open(filename, "w") as f:
+            for job in self.__chunks(self.fileList(firstRun=firstrun, lastRun=lastrun, forcerunselection=True), filesperjob):
+                f.write(",".join("'{}'".format(file) for file in job))
+
+    @staticmethod
+    def getrunnumberfromfilename(filename):
+        parts = filename.split("/")
+        result = error = None
+        if parts[0] != "" or parts[1] != "store":
+            error = "does not start with /store"
+        elif parts[2] in ["mc", "relval"]:
+            result = 1
+        elif parts[-2] != "00000" or not parts[-1].endswith(".root"):
+            error = "does not end with 00000/something.root"
+        elif len(parts) != 12:
+            error = "should be exactly 11 slashes counting the first one"
+        else:
+            runnumberparts = parts[-5:-2]
+            if not all(len(part)==3 for part in runnumberparts):
+                error = "the 3 directories {} do not have length 3 each".format("/".join(runnumberparts))
+            try:
+                result = int("".join(runnumberparts))
+            except ValueError:
+                error = "the 3 directories {} do not form an integer".format("/".join(runnumberparts))
+
+        if error:
+            error = "could not figure out which run number this file is from:\n{}\n{}".format(filename, error)
+            raise AllInOneError(error)
+
+        return result
+
+    def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=False):
         if self.__fileList and not parent:
             return self.__fileList
         if self.__parentFileList and parent:
             return self.__parentFileList
 
-        fileList = [ self.__findInJson(fileInfo,"name") \
+        fileList = [ self.__findInJson(fileInfo,"name")
                      for fileInfo in self.fileInfoList(parent) ]
 
+        if firstRun is not None or lastRun is not None:
+            if firstRun is None: firstRun = -5
+            if lastRun is None: lastrun = float('infinity')
+            e = None
+            for filename in fileList[:]:
+                try:
+                    if not firstRun < self.getrunnumberfromfilename(filename) < lastRun:
+                        fileList.remove(filename)
+                except AllInOneError as e:
+                    if forcerunselection: raise
+                    print e.message
+            if e is not None:
+                 print "\nWill include those files.  They will be filtered at the CMSSW level anyway."
         if not parent:
             self.__fileList = fileList
         else:

From ce82fd536b72e560f1dd85ad8be5e3f0262d60ed Mon Sep 17 00:00:00 2001
From: Heshy Roskes <jroskes1@jhu.edu>
Date: Tue, 13 Sep 2016 22:06:49 +0200
Subject: [PATCH 2/5] script to create file list for hippy

---
 .../scripts/createfilelist.py                   | 17 +++++++++++++++++
 .../python/TkAlAllInOneTool/dataset.py          |  6 +++---
 2 files changed, 20 insertions(+), 3 deletions(-)
 create mode 100755 Alignment/HIPAlignmentAlgorithm/scripts/createfilelist.py

diff --git a/Alignment/HIPAlignmentAlgorithm/scripts/createfilelist.py b/Alignment/HIPAlignmentAlgorithm/scripts/createfilelist.py
new file mode 100755
index 0000000000000..9f77df34c8232
--- /dev/null
+++ b/Alignment/HIPAlignmentAlgorithm/scripts/createfilelist.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+from Alignment.OfflineValidation.TkAlAllInOneTool.dataset import Dataset
+import argparse
+import os
+import sys
+
+parser = argparse.ArgumentParser()
+parser.add_argument("outputfilename", help="Goes into $CMSSW_BASE/src/Alignment/HIPAlignmentAlgorithm/data unless an absolute path starting with / is provided.  example: ALCARECOTkAlMinBias.dat_example")
+parser.add_argument("datasetname", help="example: /ZeroBias/Run2016G-TkAlMinBias-PromptReco-v1/ALCARECO")
+parser.add_argument("filesperjob", type=int, help="max number of files in each job")
+parser.add_argument("firstrun", type=int, nargs="?", help="first run to use")
+parser.add_argument("lastrun", type=int, nargs="?", help="last run to use")
+args = parser.parse_args()
+
+dataset = Dataset(args.datasetname, tryPredefinedFirst=False)
+outputfilename = os.path.join(os.environ["CMSSW_BASE"], "src", "Alignment", "HIPAlignmentAlgorithm", "data", args.outputfilename)
+dataset.createdatasetfile_hippy(outputfilename, args.filesperjob, args.firstrun, args.lastrun)
diff --git a/Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py b/Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py
index d9550f614e1ca..a1dd796d2c018 100644
--- a/Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py
+++ b/Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py
@@ -829,7 +829,7 @@ def dump_cff( self, outName = None, jsonPath = None, begin = None,
     def createdatasetfile_hippy(self, filename, filesperjob, firstrun, lastrun):
         with open(filename, "w") as f:
             for job in self.__chunks(self.fileList(firstRun=firstrun, lastRun=lastrun, forcerunselection=True), filesperjob):
-                f.write(",".join("'{}'".format(file) for file in job))
+                f.write(",".join("'{}'".format(file) for file in job)+"\n")
 
     @staticmethod
     def getrunnumberfromfilename(filename):
@@ -868,8 +868,8 @@ def fileList(self, parent=False, firstRun=None, lastRun=None, forcerunselection=
                      for fileInfo in self.fileInfoList(parent) ]
 
         if firstRun is not None or lastRun is not None:
-            if firstRun is None: firstRun = -5
-            if lastRun is None: lastrun = float('infinity')
+            if firstRun is None: firstRun = -1
+            if lastRun is None: lastRun = float('infinity')
             e = None
             for filename in fileList[:]:
                 try:

From 2fe508a49fa9cb22a05e31c9c33b1762d9954cfe Mon Sep 17 00:00:00 2001
From: Heshy Roskes <jroskes1@jhu.edu>
Date: Wed, 14 Sep 2016 16:11:02 +0200
Subject: [PATCH 3/5] remove nonsense line (the root file has never been called
 that as far back as I can tell, and it's copied later anyway
 https://github.com/cms-sw/cmssw/blob/813e5a/Alignment/OfflineValidation/python/TkAlAllInOneTool/configTemplates.py#L104)

---
 .../python/TkAlAllInOneTool/geometryComparison.py              | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/Alignment/OfflineValidation/python/TkAlAllInOneTool/geometryComparison.py b/Alignment/OfflineValidation/python/TkAlAllInOneTool/geometryComparison.py
index 486fab589b9d4..111ded37a5a34 100644
--- a/Alignment/OfflineValidation/python/TkAlAllInOneTool/geometryComparison.py
+++ b/Alignment/OfflineValidation/python/TkAlAllInOneTool/geometryComparison.py
@@ -252,9 +252,6 @@ def createScript(self, path):
                 resultingFile = os.path.expandvars( resultingFile )
                 resultingFile = os.path.abspath( resultingFile )
                 resultingFile = "root://eoscms//eos/cms" + resultingFile   #needs to be AFTER abspath so that it doesn't eat the //
-                repMap["runComparisonScripts"] += \
-                    ("xrdcp -f OUTPUT_comparison.root %s\n"
-                     %resultingFile)
                 self.filesToCompare[ name ] = resultingFile
 
             else:

From ec8d82ac7f88e15b40a0d1c71491928af6a49fcd Mon Sep 17 00:00:00 2001
From: Heshy Roskes <jroskes1@jhu.edu>
Date: Wed, 28 Sep 2016 21:47:41 +0200
Subject: [PATCH 4/5] fix parallel offline

---
 Alignment/OfflineValidation/scripts/validateAlignments.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Alignment/OfflineValidation/scripts/validateAlignments.py b/Alignment/OfflineValidation/scripts/validateAlignments.py
index e447273411296..b42b66ca46cc6 100755
--- a/Alignment/OfflineValidation/scripts/validateAlignments.py
+++ b/Alignment/OfflineValidation/scripts/validateAlignments.py
@@ -350,6 +350,10 @@ def createMergeScript( path, validations ):
                                          repMap["mergeOfflineParJobsScriptPath"] )
         repMap["copyMergeScripts"] += ("cp .oO[Alignment/OfflineValidation]Oo./scripts/merge_TrackerOfflineValidation.C .\n"
                                        "rfcp %s .\n" % repMap["mergeOfflineParJobsScriptPath"])
+        repMap_offline = repMap.copy()
+        repMap_offline.update(PlottingOptions(config, "offline"))
+        repMap["copyMergeScripts"] = \
+            replaceByMap(repMap["copyMergeScripts"], repMap_offline)
 
     if anythingToMerge:
         # DownloadData is the section which merges output files from parallel jobs

From 2cba4fe3156e36fc9dce5ffb49f4b2c7a028b249 Mon Sep 17 00:00:00 2001
From: Heshy Roskes <jroskes1@jhu.edu>
Date: Wed, 28 Sep 2016 21:50:24 +0200
Subject: [PATCH 5/5] move error message to the function that actually gets
 called

---
 .../OfflineValidation/python/TkAlAllInOneTool/dataset.py  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py b/Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py
index a1dd796d2c018..f08548e30a4b3 100644
--- a/Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py
+++ b/Alignment/OfflineValidation/python/TkAlAllInOneTool/dataset.py
@@ -251,10 +251,6 @@ def __createSnippet( self, jsonPath = None, begin = None, end = None,
                     "chosen is greater than the upper time/runrange limit "
                     "('end'/'lastRun').")
             raise AllInOneError( msg )
-        if self.predefined() and (jsonPath or begin or end or firstRun or lastRun):
-            msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'"
-                    "only work for official datasets, not predefined _cff.py files" )
-            raise AllInOneError( msg )
 
         lumiSecExtend = self.__lumiSelectionSnippet(jsonPath=jsonPath, firstRun=firstRun, lastRun=lastRun)
         lumiStr = goodLumiSecStr = ""
@@ -710,6 +706,10 @@ def parentDataset( self ):
 
     def datasetSnippet( self, jsonPath = None, begin = None, end = None,
                         firstRun = None, lastRun = None, crab = False, parent = False ):
+        if self.__predefined and (jsonPath or begin or end or firstRun or lastRun):
+            msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun' "
+                    "only work for official datasets, not predefined _cff.py files" )
+            raise AllInOneError( msg )
         if self.__predefined and parent:
                 with open(self.__filename) as f:
                     if "secFiles.extend" not in f.read():