# GridSearcher

In [1]:
import glob
import itertools
import json
import os

In [2]:
# Magic (builds list of option dictionaries)
def get_options(filename):
    with open(filename)as f:
        config = json.load(f)
    
    # Build list of options for non-common options
    options = {key: config[key] if isinstance(config[key], list) else [config[key]] for key in config}
    keys, vals = options.keys(), options.values()
    product = itertools.product(*vals)
    options = [dict(zip(keys, x)) for x in product]
    
    return options

## Add samples

In [3]:
train = "samples/train.h5"
test = "samples/test.h5"

## Add grid configurations

In [4]:
options = []

options.extend(get_options("config/xgb_template.json"))
#options.extend(get_options("config/mycustomgrid.json"))

For convenience

In [5]:
tid_1pvars = ["TauJets.centFrac", "TauJets.etOverPtLeadTrk",
              "TauJets.innerTrkAvgDist", "TauJets.absipSigLeadTrk",
              "TauJets.SumPtTrkFrac", "TauJets.ChPiEMEOverCaloEME",
              "TauJets.EMPOverTrkSysP", "TauJets.ptRatioEflowApprox",
              "TauJets.mEflowApprox"]

tid_3pvars = ["TauJets.centFrac", "TauJets.etOverPtLeadTrk",
              "TauJets.innerTrkAvgDist", "TauJets.dRmax",
              "TauJets.trFlightPathSig", "TauJets.massTrkSys",
              "TauJets.ChPiEMEOverCaloEME", "TauJets.EMPOverTrkSysP",
              "TauJets.ptRatioEflowApprox", "TauJets.mEflowApprox"]

Choose variables:

In [7]:
variables = tid_1pvars
weight = "weight"
classlabel = "is_sig"

## Save configurations

In [12]:
# Check for existing configs
identifiers = []
for filename in glob.glob("models/*.json"):
    with open(filename, "r") as f:
        config = json.load(f)
    identifiers.append(int(config["identifier"]))

id_start = 0 if len(identifiers) == 0 else max(identifiers) + 1

# 
for i, opt in enumerate(options, id_start):
    label = "{:04}".format(i)
    
    model_desc = {"identifier": "{:04}".format(i),
                  "train": os.path.abspath(train),
                  "test": os.path.abspath(test),
                  "variables": variables,
                  "weight": weight,
                  "classlabel": classlabel,
                  "config": opt,
                  "submitted": False,
                  "processed": False}
    
    with open("models/{}.json".format(label), "w") as f:
        json.dump(model_desc, f, indent=4)

## Prepare submission

In [9]:
#PBS script for BAF
with open("config/pbs_template.sh", "r") as f:
    pbs_script = f.read()

In [10]:
batch_settings = {"name": "GridSearcher", "workdir": os.path.abspath("models"),
                  "req_mem": "4g", "req_file": "10g", "queue": "medium",
                  "runner": os.path.abspath("scripts/runner.py")}

In [11]:
with open("scripts/pbs.sh", "w") as f:
    f.write(pbs_script.format(**batch_settings))