In [1]:
import numpy as np
import pandas as pd
import json

In [2]:
cutflow_path = "output/cuts_EventSelection-weighted.csv"
events_path = "output/tbl_dataset.jet_pt_binned--JetPt.csv"

### show the cutflow

In [3]:
cutflow = pd.read_csv(cutflow_path)
cutflow

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,passed_only_cut,passed_only_cut.1,passed_incl,passed_incl.1,totals_incl,totals_incl.1
0,,,,unweighted,weight,unweighted,weight,unweighted,weight
1,dataset,depth,cut,,,,,,
2,Background,0,All,0,0,0,0,0,0
3,Background,1,jet_pt > 200,84508,84510.59613378995,84508,84510.59613378995,100000,100000
4,Background,1,jet_pt < 800,97837,97834.59955586787,82345,82345.195689657805,84508,84510.59613378995
5,Signal,0,All,0,0,0,0,0,0
6,Signal,1,jet_pt > 200,100000,12500,100000,12500,100000,12500
7,Signal,1,jet_pt < 800,99394,12424.158414084641,99394,12424.158414084641,100000,12500
8,Data,0,All,0,0,0,0,0,0
9,Data,1,jet_pt > 200,99478,99478,99478,99478,115000,115000


### events that passed the cut

In [4]:
# read into dataframe
df = pd.read_csv(events_path)
df

Unnamed: 0,dataset,jet_pt_binned,n,weighted:sumw,weighted:sumw2
0,Background,"[-inf, 200.0)",0,0.0,0.0
1,Background,"[200.0, 300.0)",22318,22326.030978,22554.532085
2,Background,"[300.0, 400.0)",22801,22798.138083,23024.127428
3,Background,"[400.0, 500.0)",17148,17155.294128,17336.270468
4,Background,"[500.0, 600.0)",11215,11204.588788,11305.493463
5,Background,"[600.0, 700.0)",5939,5931.339805,5982.373416
6,Background,"[700.0, 800.0)",2924,2929.803908,2964.685214
7,Background,"[800.0, inf)",0,0.0,0.0
8,Signal,"[-inf, 200.0)",0,0.0,0.0
9,Signal,"[200.0, 300.0)",0,0.0,0.0


In [5]:
def get_yields_from_dataframe(dataset_name):
    yield_column = "n" if (dataset_name == "Data") else "weighted:sumw"
    yields = df.loc[df["dataset"] == dataset_name, yield_column].astype(float).values
    # remove under/overflow bins
    yields = yields[1:-1]
    return yields.tolist()

def get_errors_from_dataframe(dataset_name):
    error_column = "weighted:sumw2"
    errors = np.sqrt(df.loc[df["dataset"] == dataset_name, error_column].astype(float).values)
    # remove under/overflow bins
    errors = errors[1:-1]
    return errors.tolist()

### create the workspace

In [6]:
regions = ["Signal_region"]
samplenames = ["Background", "Signal"]
measurements_list = ["minimal_example"]

ws = {} # the workspace

# channels
channels = []
for reg in regions:
    channel = {}
    channel.update({"name": reg})
    samples = []
    for sam in samplenames:
        current_sample = {}
        current_sample.update({"name": sam})
        current_sample.update({"data": get_yields_from_dataframe(sam)})
        
        # build all the modifiers
        modifiers = [{}, {}, {}]
        lumi = {"data": None,
                "name": "lumi",
                "type": "lumi"}
        gammas = {"data": get_errors_from_dataframe(sam),
                  "type": "staterror",
                  "name": "staterror_" + reg}
        custom_lumi = {"data": {"hi": 1.02,
                                "lo": 0.98},
                       "name": "Luminosity",
                       "type": "normsys"}
        modifiers = [lumi, gammas, custom_lumi]
        if sam == "Signal":
            # add the normalization uncertainty for signal
            modifiers.append({"data": None, 
                              "name": "Signal_norm",
                              "type": "normfactor"})
        
        current_sample.update({"modifiers": modifiers})
        samples.append(current_sample)
    channel.update({"samples": samples})
    channels.append(channel)

ws.update({"channels": channels})

# measurements
measurements = []
for m in measurements_list:
    measurement = {}
    measurement.update({"name": m})
    config = {}
    parameters = {"parameters": [{"auxdata": [1.0],
                                  "bounds": [[0.5, 1.5]],
                                  "fixed": True,
                                  "inits": [1.0],
                                  "name": "lumi",
                                  "sigmas": [0.1]}]}
    config.update(parameters)
    config.update({"poi": "Signal_norm"})
    measurement.update({"config": config})
    measurements.append(measurement)
ws.update({"measurements": measurements})

# observations
observations = []
observation = {}
for reg in regions:
    observation.update({"name": reg})
    observation.update({"data": get_yields_from_dataframe("Data")})
    observations.append(observation)
ws.update({"observations": observations})

# version
ws.update({"version": "1.0.0"})

# save to file
with open("workspace_from_dataframe.json", "w") as f:
    json.dump(ws, f, sort_keys=True, indent=4)

The resulting workspace should be identical to the one obtained when converting the `.xml` workspace produced by TRExFitter.