In [None]:
from QWorker import DistributedStripedSession as Session
from QWorker import IPythonDisplay
from vega import VegaLite

from histbook import Hist, beside, groupby, below, grid, overlay, bin

registry_url = "http://ifdb01.fnal.gov:9867"
data_server_url = "http://dbweb7.fnal.gov:9091/striped/app"


The following class describes what will be executed by workers. However, you need to run this cell too.

__Important__: do not remove the "#_\_worker\_class_\_" line

In [None]:
#__worker_class__

class Worker(object):

    def columns(self):
        return ["NJets"]

    def process(self, events, emit, params):
        emit(NJets = events.NJets)
        

Start and run your jobs, one per dataset

In [None]:
session = Session(data_server_url, registry_url)

datasets = [
    "Summer16.GJets_HT-400To600_TuneCUETP8M1_13TeV-madgraphMLM-pythia8",
    "Summer16.GJets_HT-600ToInf_TuneCUETP8M1_13TeV-madgraphMLM-pythia8",
    "Summer16.QCD_HT1000to1500_TuneCUETP8M1_13TeV-madgraphMLM-pythia8"
]

h_by_dataset = {
    dataset: Hist(bin("NJets", 10, 0, 10))
    for dataset in datasets
}

h_combined = Hist(bin("NJets", 20, 0, 20))

for dataset_name in datasets:
    job = session.createJob(dataset_name,
                    histograms=[h_combined, h_by_dataset[dataset_name]])
    job.run()
    runtime = job.TFinish - job.TStart
    nevents = job.EventsProcessed
    print "%s:\n    %.4f million events\n    %.4f million events/second" % (dataset_name, nevents/1000000.0, nevents/runtime/1000000)
    nworkers = len(job.WorkerAddresses)
    


Display results

In [None]:
h_grouped = Hist.group(**h_by_dataset)

beside(
        h_grouped.stack("source").area("NJets"),
        h_combined.line("NJets", width=300, yscale={"type":"log"})
).to(VegaLite)
