In [38]:
from __future__ import absolute_import, division, print_function, unicode_literals

import six
from collections import OrderedDict
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
import logging
import os
import xml.etree.ElementTree as ET


In [2]:
logging.basicConfig(
    format='%(asctime)-5.5s %(name)-20.20s %(levelname)-7.7s %(message)s',
    datefmt='%H:%M',
    level=logging.DEBUG
)

for key in logging.Logger.manager.loggerDict:
    if "madminer" not in key:
        logging.getLogger(key).setLevel(logging.WARNING)

In [3]:
base_dir = '/Users/johannbrehmer/work/projects/madminer/diboson_mining/'
mg_dir = '/Users/johannbrehmer/work/projects/madminer/MG5_aMC_v2_6_4/'

In [4]:
sample_dir = base_dir + 'data/samples/wgamma_sys/'
card_dir = base_dir + 'cards/wgamma/'
ufo_model_dir = card_dir + 'SMWgamma_UFO'
run_card_dir = card_dir + 'run_cards/'
mg_process_dir = base_dir + 'data/mg_processes/wgamma_sys/'
log_dir = base_dir + 'logs/wgamma_sys/'
temp_dir = base_dir + 'data/temp'
delphes_dir = mg_dir + 'Delphes'

## Parse LHE file

In [5]:
filename = mg_process_dir + "/Events/run_01/unweighted_events.lhe"

In [7]:
def untar_and_parse_lhe_file(filename):
    # Untar event file
    new_filename, extension = os.path.splitext(filename)
    if extension == ".gz":
        if not os.path.exists(new_filename):
            call_command("gunzip -k {}".format(filename))
        filename = new_filename

    # In some cases, the LHE comments can contain bad characters
    with open(filename, "r") as file:
        lhe_content = file.read()
    lhe_lines = lhe_content.split("\n")
    for i, line in enumerate(lhe_lines):
        comment_pos = line.find("#")
        if comment_pos >= 0:
            lhe_lines[i] = line[:comment_pos]
    lhe_content = "\n".join(lhe_lines)

    # Parse XML tree
    root = ET.fromstring(lhe_content)

    return root

In [8]:
root = untar_and_parse_lhe_file(filename)

## Run card

In [20]:
run_card = root.find("header").find("MGRunCard").text

In [24]:
for line in run_card.split("\n"):
    print(line)



























  tag_1	= run_tag ! name of the run 





  100000	= nevents ! Number of unweighted events requested 
 21 = iseed ! rnd seed (0=assigned automatically=default))





  1	= lpp1 ! beam 1 type 
  1	= lpp2 ! beam 2 type
  6500.0	= ebeam1 ! beam 1 total energy in GeV
  6500.0	= ebeam2 ! beam 2 total energy in GeV



     0.0     = polbeam1 ! beam polarization for beam 1
     0.0     = polbeam2 ! beam polarization for beam 2




  lhapdf	= pdlabel ! PDF set                                     
  90900	= lhaid ! if pdlabel=lhapdf, this is the lhapdf number




  False	= fixed_ren_scale ! if .true. use fixed ren scale
  False	= fixed_fac_scale ! if .true. use fixed fac scale
  91.188	= scale ! fixed ren scale
  91.188	= dsqrt_q2fact1 ! fixed fact scale for pdf1
  91.188	= dsqrt_q2fact2 ! fixed fact scale for pdf2
  -1	= dynamical_scale_choice ! Choose one of the preselected dynamical choices
  1.0	= scalefact ! scale factor for event-by-event scales



  False	= grid

## Weight groups

In [27]:
weight_groups = root.findall("header")[0].findall("initrwgt")[0].findall("weightgroup")

In [45]:
wg = weight_groups[1]

In [47]:
wg.attrib['name']

'CT10nlo'

In [53]:
w = wg.findall("weight")[0]

In [55]:
w.attrib

{'id': '45', 'MUR': '1.0', 'MUF': '1.0', 'PDF': '11000'}

## Events

In [25]:
events = root.findall("event")

event = events[1]

In [27]:
print(event.text)


 6      1 +1.4360220e-03 2.96188700e+02 7.81860800e-03 1.00369400e-01
       -2 -1    0    0    0  501 -0.0000000000e+00 +0.0000000000e+00 +1.2357566225e+03 1.2357566225e+03 0.0000000000e+00 0.0000e+00 1.0000e+00
        1 -1    0    0  501    0 +0.0000000000e+00 -0.0000000000e+00 -4.0299093188e+02 4.0299093188e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00
      -24  2    1    2    0    0 +2.7228584413e+00 +2.8568238938e+02 -3.2881655402e+02 4.4254725652e+02 7.8140300091e+01 0.0000e+00 0.0000e+00
       22  1    1    2    0    0 -2.7228584413e+00 -2.8568238938e+02 +1.1615822447e+03 1.1962002979e+03 0.0000000000e+00 0.0000e+00 1.0000e+00
       11  1    3    3    0    0 +2.3469596044e+01 +8.6543976032e+01 -1.4647549544e+02 1.7174327495e+02 0.0000000000e+00 0.0000e+00 -1.0000e+00
      -12  1    3    3    0    0 -2.0746737603e+01 +1.9913841335e+02 -1.8234105858e+02 2.7080398158e+02 0.0000000000e+00 0.0000e+00 1.0000e+00



In [28]:
weights = event.find("rwgt").findall("wgt")

In [34]:
for weight in weights:
    print( weight.attrib["id"], weight.text)

1  +1.5392939e-03 
2  +1.3436796e-03 
3  +1.5392939e-03 
4  +1.3436796e-03 
5  +1.4360220e-03 
6  +1.5474268e-03 
7  +1.4105018e-03 
8  +1.4353439e-03 
9  +1.4396149e-03 
10  +1.4237539e-03 
11  +1.4417906e-03 
12  +1.4361506e-03 
13  +1.4329066e-03 
14  +1.4374325e-03 
15  +1.4387012e-03 
16  +1.4345429e-03 
17  +1.4357060e-03 
18  +1.4355601e-03 
19  +1.4292326e-03 
20  +1.4348486e-03 
21  +1.4333090e-03 
22  +1.4338135e-03 
23  +1.4354436e-03 
24  +1.4327494e-03 
25  +1.4417914e-03 
26  +1.4369995e-03 
27  +1.4403849e-03 
28  +1.4355560e-03 
29  +1.4317030e-03 
30  +1.4361479e-03 
31  +1.4196547e-03 
32  +1.4325559e-03 
33  +1.4329746e-03 
34  +1.4366798e-03 
35  +1.4361440e-03 
36  +1.3473100e-03 
37  +1.3473100e-03 
38  +1.4400775e-03 
39  +1.2414552e-03 
morphing_basis_vector_1  +1.0976270e-03 
morphing_basis_vector_2  +9.6976289e-04 
morphing_basis_vector_3  +1.7826746e-03 
morphing_basis_vector_4  +1.7649479e-03 
morphing_basis_vector_5  +2.1515632e-03 
