# Create mixed bkg file

In [5]:
import ROOT
import random
import glob

def concatenate_first_N_events(input_filenames, output_filename, N):
    # Create an output ROOT file
    output_file = ROOT.TFile(output_filename, "RECREATE")
    
    # Initialize output_tree to None
    output_tree = None
    
    # Loop over all the input filenames
    for filename in input_filenames:
        print(f"Processing file: {filename}")
        
        # Open the input ROOT file
        input_file = ROOT.TFile.Open(filename)
        
        # Get the TTree named "Events"
        input_tree = input_file.Get("Events")
        
        # Check if the tree exists
        if not input_tree:
            print(f"TTree 'Events' not found in file {filename}")
            input_file.Close()
            continue
        
        # Create the output TTree structure if it's not already created
        if output_tree is None:
            output_file.cd()
            output_tree = input_tree.CloneTree(0)  # Create an empty copy of the structure
            output_tree.SetDirectory(output_file)
        
        # Ensure all branches are active for the new input tree
        input_tree.SetBranchStatus("*", 1)
        
        # Get the number of entries in the input tree
        n_entries = input_tree.GetEntries()
        
        # Determine the number of events to read (up to N)
        n_events_to_read = min(N, n_entries)
        
        # Loop over the first N events (or as many as available) and fill the output tree
        for i in range(n_events_to_read):
            input_tree.GetEntry(i)
            output_tree.Fill()
        # print(len(output_tree.SDVSecVtx_Lxy))
        
        # Close the input ROOT file
        input_file.Close()

    # for i, event in enumerate(output_tree):
    #     print(f'Event {i}')
    #     for j in range(len(event.SDVSecVtx_Lxy)):
    #         print(j, event.SDVSecVtx_Lxy[j])
    
    # Write the output tree to the output file and close the output file
    output_file.cd()
    output_tree.Write()
    output_file.Close()
    print(f"Output file '{output_filename}' has been created with the concatenated events.")

In [8]:
BKGDIR = '/scratch-cbe/users/alikaan.gueven/ML_KAAN/MC2018/all/'

# input_filenames = [random.choice(glob.glob(f'{BKGDIR}/wjets*0100*/**/*.root', recursive=True)),
#                    random.choice(glob.glob(f'{BKGDIR}/wjets*0200*/**/*.root', recursive=True)),
#                    ]

input_filenames = []

input_filenames.extend([random.choice(glob.glob(f'{BKGDIR}/wjets*0100*/**/*.root', recursive=True)),
                        random.choice(glob.glob(f'{BKGDIR}/wjets*0200*/**/*.root', recursive=True)),
                        random.choice(glob.glob(f'{BKGDIR}/wjets*0400*/**/*.root', recursive=True)),
                        random.choice(glob.glob(f'{BKGDIR}/wjets*0600*/**/*.root', recursive=True)),
                        random.choice(glob.glob(f'{BKGDIR}/wjets*0800*/**/*.root', recursive=True)),
                        random.choice(glob.glob(f'{BKGDIR}/wjets*1200*/**/*.root', recursive=True)),
                        random.choice(glob.glob(f'{BKGDIR}/wjets*2500*/**/*.root', recursive=True)),
                        random.choice(glob.glob(f'{BKGDIR}/zjets*0100*/**/*.root', recursive=True)),
                        random.choice(glob.glob(f'{BKGDIR}/zjets*0200*/**/*.root', recursive=True)),
                        random.choice(glob.glob(f'{BKGDIR}/zjets*0400*/**/*.root', recursive=True)),
                        random.choice(glob.glob(f'{BKGDIR}/zjets*0600*/**/*.root', recursive=True)),
                        random.choice(glob.glob(f'{BKGDIR}/zjets*0800*/**/*.root', recursive=True)),
                        random.choice(glob.glob(f'{BKGDIR}/zjets*1200*/**/*.root', recursive=True)),
                        random.choice(glob.glob(f'{BKGDIR}/zjets*2500*/**/*.root', recursive=True)),
                        ])



output_filename = f"{BKGDIR}/skimmed_mixed_bkg.root"
concatenate_first_N_events(input_filenames, output_filename, 100)

Processing file: /scratch-cbe/users/alikaan.gueven/ML_KAAN/MC2018/all/wjetstolnuht0100_2018/CustomNanoAOD_pv1/240229_204543/0000/output_1.root
Processing file: /scratch-cbe/users/alikaan.gueven/ML_KAAN/MC2018/all/wjetstolnuht0200_2018/CustomNanoAOD_pv1/240229_204444/0000/output_5.root
Processing file: /scratch-cbe/users/alikaan.gueven/ML_KAAN/MC2018/all/wjetstolnuht0400_2018/CustomNanoAOD_pv1/240229_204654/0000/output_3.root
Processing file: /scratch-cbe/users/alikaan.gueven/ML_KAAN/MC2018/all/wjetstolnuht0600_2018/CustomNanoAOD_pv1/240229_204532/0000/output_4.root
Processing file: /scratch-cbe/users/alikaan.gueven/ML_KAAN/MC2018/all/wjetstolnuht0800_2018/CustomNanoAOD_pv2/240301_112430/0000/output_4.root
Processing file: /scratch-cbe/users/alikaan.gueven/ML_KAAN/MC2018/all/wjetstolnuht1200_2018/CustomNanoAOD_pv1/240229_204631/0000/output_8.root
Processing file: /scratch-cbe/users/alikaan.gueven/ML_KAAN/MC2018/all/wjetstolnuht2500_2018/CustomNanoAOD_pv1/240229_204606/0000/output_4.root

In [None]:
import random
import glob
import uproot
import awkward as ak

# List of input files
BKGDIR = '/scratch-cbe/users/alikaan.gueven/ML_KAAN/predict'
files = [random.choice(glob.glob(f'{BKGDIR}/wjets*0100*/**/*.root', recursive=True))  + ':Events',
         random.choice(glob.glob(f'{BKGDIR}/wjets*0200*/**/*.root', recursive=True))  + ':Events',
        ]  # Replace with your actual file names

my_records = []
for file in files:
    X_it = uproot.iterate(file, step_size=10)
    my_records.append(next(X_it))

my_records = ak.concatenate(my_records)

# f = uproot.recreate('skimmed_mixed_bkg.root')

with uproot.recreate('skimmed_mixed_bkg.root') as f:
    f['Events'] = my_records

In [None]:
def is_rootcompat(a):
    """Is it a flat or 1-d jagged array?"""
    t = ak.type(a)
    if isinstance(t, ak.types.ArrayType):
        if isinstance(t.content, ak.types.NumpyType):
            return True
        if isinstance(t.content, ak.types.ListType) and isinstance(t.content.content, ak.types.NumpyType):
            return True
    return False


def uproot_writeable(events):
    """Restrict to columns that uproot can write compactly"""
    out = {}
    for bname in events.fields:
        if events[bname].fields:
            out[bname] = ak.zip({n: ak.to_packed(ak.without_parameters(events[bname][n])) for n in events[bname].fields if is_rootcompat(events[bname][n])})
        else:
            out[bname] = ak.to_packed(ak.without_parameters(events[bname]))
    return out

In [None]:
# https://raw.githubusercontent.com/CoffeaTeam/coffea/master/tests/samples/nano_dy.root
import awkward as ak
import uproot
from coffea.nanoevents import NanoEventsFactory


filename = 'skimmed_mixed_bkg.root'
events = NanoEventsFactory.from_root(
    filename,
    metadata={"dataset": "SomeDataset"},
).events()

events = events[ak.sum(events.Electron.pt > 10, axis=1) > 0]
with uproot.recreate("events.root") as fout:
    fout["Events"] = uproot_writeable(events)

In [None]:
with uproot.recreate('skimmed_mixed_bkg.root') as f:
    f['Events'] = uproot_writeable(my_records)

In [None]:
f1 = uproot.open('skimmed_mixed_bkg.root:Events')
f2 = uproot.open('/scratch-cbe/users/alikaan.gueven/ML_KAAN/predict/wjetstolnuht0100_2018/outputs/output_1_Skim.root:Events')

X1 = next(uproot.iterate(f1, step_size=20))
X2 = next(uproot.iterate(f2, step_size=20))

f1.close()
f2.close()

fields1 = X1.fields
fields2 = X2.fields

to_be_deleted = list(set(fields1) - set(fields2))

to_be_deleted
# with uproot.open('skimmed_mixed_bkg.root:Events') as f:
#     for branch in to_be_deleted:
#         del f[branch]

In [None]:
for field in fields2:
    if field.startswith('Muon_fsrPhoto'):
        print(field)

In [None]:
del

In [None]:
events.Muon.fields

In [None]:
# https://raw.githubusercontent.com/CoffeaTeam/coffea/master/tests/samples/nano_dy.root
import awkward as ak
import uproot
from coffea.nanoevents import NanoEventsFactory


file = files[0]
events = NanoEventsFactory.from_root(
    {'/scratch-cbe/users/alikaan.gueven/ML_KAAN/predict/wjetstolnuht0100_2018/outputs/output_1_Skim.root': 'Events'},
    entry_stop=10, delayed=False).events()

# with uproot.recreate('skimmed_mixed_bkg.root') as f:
#     f['Events'] = uproot_writeable(events)

In [None]:
with uproot.recreate('skimmed_mixed_bkg.root') as fout:
    fout["Events"] = uproot_writeable(events)

In [None]:
X = uproot_writeable(my_records)

In [None]:
for bname in my_records.fields:
    print(bname, ak.without_parameters(my_records[bname]))
    break
        # if is_rootcompat(events[bname][n]):
        #     print(bname, n)

In [None]:
ak.without_parameters(my_records[bname])

In [None]:
my_records['run'].layout

In [None]:
ak.to_packed()

In [None]:
for field in my_records.fields:
    f[field] = my_records[field]

In [None]:
f.close()

In [None]:
with uproot.recreate('skimmed_mixed_bkg.root') as f:
    f['Events'] = my_records

In [None]:
!mv skimmed_mixed_bkg.root /scratch-cbe/users/alikaan.gueven/ML_KAAN/test/skimmed_mixed_bkg.root

In [None]:
!mkdir /scratch-cbe/users/alikaan.gueven/ML_KAAN/test/

In [None]:
!pwd

In [1]:
%%time

import ROOT
import random
import glob

Welcome to JupyROOT 6.28/00
CPU times: user 2.71 s, sys: 1.67 s, total: 4.38 s
Wall time: 22 s


In [2]:
tree_name = "Events"
BKGDIR = '/scratch-cbe/users/alikaan.gueven/ML_KAAN/predict'
input_filenames = [random.choice(glob.glob(f'{BKGDIR}/wjets*0100*/**/*.root', recursive=True)),
                   random.choice(glob.glob(f'{BKGDIR}/wjets*0200*/**/*.root', recursive=True)),
                   random.choice(glob.glob(f'{BKGDIR}/wjets*0400*/**/*.root', recursive=True)),
                   random.choice(glob.glob(f'{BKGDIR}/wjets*0600*/**/*.root', recursive=True)),
                   random.choice(glob.glob(f'{BKGDIR}/wjets*0800*/**/*.root', recursive=True)),
                   random.choice(glob.glob(f'{BKGDIR}/wjets*1200*/**/*.root', recursive=True)),
                   random.choice(glob.glob(f'{BKGDIR}/wjets*2500*/**/*.root', recursive=True)),
                   random.choice(glob.glob(f'{BKGDIR}/zjets*0100*/**/*.root', recursive=True)),
                   random.choice(glob.glob(f'{BKGDIR}/zjets*0200*/**/*.root', recursive=True)),
                   random.choice(glob.glob(f'{BKGDIR}/zjets*0400*/**/*.root', recursive=True)),
                   random.choice(glob.glob(f'{BKGDIR}/zjets*0600*/**/*.root', recursive=True)),
                   random.choice(glob.glob(f'{BKGDIR}/zjets*0800*/**/*.root', recursive=True)),
                   random.choice(glob.glob(f'{BKGDIR}/zjets*1200*/**/*.root', recursive=True)),
                   random.choice(glob.glob(f'{BKGDIR}/zjets*2500*/**/*.root', recursive=True))]

In [69]:
import re
re.search('(wjets|zjets).+(2018)', input_filenames[-1]).group()

'zjetstonunuht2500_2018'

In [None]:
for i, file in enumerate(input_filenames):
    name = re.search('(wjets|zjets).+(2018)', file).group()
    df = ROOT.RDataFrame("Events", file).Range(10_000)
    df.Snapshot("Events", f"{name}_5K.root", list(df.GetColumnNames()))
    print(f"bkg_mix_5K_{i}.root", end=" ")

In [5]:
input_filenames[0]

'/scratch-cbe/users/alikaan.gueven/ML_KAAN/predict/wjetstolnuht0100_2018/outputs/output_2_Skim.root'

In [11]:
for i, file in enumerate(input_filenames):
    df = ROOT.RDataFrame("Events", file).Range(5_000)
    df.Snapshot("Events", f"bkg_mix_5K_{i}.root", list(df.GetColumnNames()))

In [12]:
for i in range(len(input_filenames)):
    print(f"bkg_mix_5K_{i}.root", end=" ")

bkg_mix_5K_0.root bkg_mix_5K_1.root bkg_mix_5K_2.root bkg_mix_5K_3.root bkg_mix_5K_4.root bkg_mix_5K_5.root bkg_mix_5K_6.root bkg_mix_5K_7.root bkg_mix_5K_8.root bkg_mix_5K_9.root bkg_mix_5K_10.root bkg_mix_5K_11.root bkg_mix_5K_12.root bkg_mix_5K_13.root 

In [None]:
from array import array

f = ROOT.Open('bkg_mix_5K_0.root', 'UPDATE')

tree = file.Get("Events")

pred_scores = array('f', [0.])
tree.Branch("ev_PART-241", new_float_var, "ev_PART-241/F")

for i in range(tree.GetEntries()):
    tree.GetEntry(i)
    pred_scores[0] = save_dict[file][i]
    tree.Fill()

# Write the modified TTree back to the file
tree.Write("", ROOT.TObject.kOverwrite)
file.Close()

In [57]:
from vtxLevelDataset2 import superbatch_iterator

In [64]:
si = superbatch_iterator('bkg_mix_5K_0.root', ['SDVSecVtx_x'], 4000)

In [67]:
next(si)

StopIteration: 

In [43]:
ui = uproot.iterate('bkg_mix_5K_0.root', ['SDVSecVtx_x'], step_size=4000)

In [46]:
next(ui)

StopIteration: 

In [20]:
import uproot

In [69]:
f = ROOT.TFile('bkg_mix_5K_0.root', 'UPDATE')

In [80]:
tree = f.Get("Events")

In [83]:
tree

94439

In [85]:
for i, event in enumerate(tree):
    print(i, event.nSDVSecVtx)

0 0
1 1
2 1
3 7
4 3
5 4
6 3
7 2
8 0
9 4
10 3
11 7
12 0
13 7
14 1
15 0
16 1
17 1
18 1
19 2
20 2
21 2
22 0
23 6
24 4
25 5
26 10
27 4
28 2
29 4
30 3
31 0
32 3
33 1
34 4
35 9
36 4
37 3
38 4
39 5
40 1
41 0
42 2
43 2
44 4
45 3
46 0
47 0
48 0
49 1
50 1
51 5
52 2
53 1
54 2
55 3
56 1
57 3
58 4
59 1
60 2
61 4
62 2
63 1
64 3
65 0
66 0
67 6
68 1
69 4
70 0
71 1
72 2
73 8
74 0
75 9
76 4
77 5
78 1
79 5
80 4
81 3
82 2
83 6
84 4
85 3
86 1
87 0
88 1
89 2
90 1
91 3
92 9
93 4
94 6
95 0
96 0
97 2
98 2
99 1
100 1
101 7
102 2
103 2
104 5
105 1
106 1
107 1
108 5
109 0
110 8
111 2
112 2
113 1
114 3
115 5
116 7
117 3
118 2
119 2
120 3
121 2
122 4
123 1
124 1
125 7
126 2
127 1
128 3
129 6
130 0
131 0
132 1
133 1
134 3
135 2
136 1
137 1
138 3
139 1
140 2
141 2
142 3
143 2
144 3
145 7
146 3
147 3
148 1
149 0
150 4
151 1
152 0
153 0
154 1
155 3
156 2
157 2
158 6
159 3
160 1
161 2
162 2
163 2
164 2
165 2
166 4
167 1
168 3
169 3
170 3
171 3
172 3
173 5
174 0
175 2
176 3
177 4
178 4
179 1
180 2
181 1
182 1
183 3
184 7

KeyboardInterrupt: 

In [86]:
from array import array

In [90]:
pred_scores = array('f', 200*[0.])

In [91]:
pred_scores[0] = 5
pred_scores[1] = 5
pred_scores[2] = 5
pred_scores[3] = 5

In [92]:
pred_scores

array('f', [5.0, 5.0, 5.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0

20.0