In [1]:
#include <TFile.h>
#include <TTree.h>
#include <TRandom3.h>
#include <TCanvas.h>
#include <TMath.h>
#include <TString.h>
#include <TH2D.h>
#include <iostream>

In [2]:
using namespace std;

# E906 Messy MC Data

Let $\beta_{0}$ and $\beta_{1}$ define two distributions, $p(x|\beta_{0})$ and $p(x|\beta_{1})$ respectively. Then the likelihood ratio is defined by;
$$
\mathcal{L}(x| \beta_{0}, \beta_{1}) =  \frac{p(x|\beta_{0})}{p(x|\beta_{1})}
$$

A classifier function $f$, designed to distinguish samples drawn from $p(x|\beta_{0})$ and $p(x|\beta_{1})$, can be used to approximate likelihood ratios;

$$
\mathcal{L}(x| \beta_{0}, \beta_{1}) =  \frac{f(x, \beta_{0}, \beta_{1})}{1 - f(x, \beta_{0}, \beta_{1})}
$$

Consider the cross-section of the Drell-Yan (DY) angular distribution;

$$
\frac{d\sigma}{d\Omega} \propto 1 + \lambda \cos^{2}\theta + \mu\sin2\theta\cos\phi + \frac{\nu}{2}\sin^{2}\theta\cos2\phi
$$

Our goal is to extract Drell-Yan (DY) angular coefficients, $\lambda$, $\mu$, and $\nu$, using the likelihood ratio method. We aim to train a classifier (neural network) capable of classifying two samples. We sample $\lambda$, $\mu$, and $\nu$ from uniform ranges: $\lambda$ in $[-1, 1]$, $\mu$ and $\nu$ in $[-0.5, 0.5]$. We extract the bin center and bin content of $\phi$ vs. $\cos\theta$ histograms as inputs and weights in the classifier and loss function. The two classes are defined as follows:

```
H0: (lambda=0, mu=0, nu=0, phi, costheta) with beta = (lambda, mu, nu) --> label = 0
H1: (lambda, mu, nu, phi, costheta) with beta = (lambda, mu, nu) --> label = 1
```

Note that the class with label 0 has mismatching $\beta$ values.

In [3]:
double PI = TMath::Pi();
int NBINS = 15;

int num_reco = 10000;
const int num_samples = 1000000;

In [4]:
TString fname = "split.root";

double lambda_secret = 0.8;
double mu_secret = 0.1;
double nu_secret = 0.2;

In [5]:
#ifndef _TREE_DATA__HH_
#define _TREE_DATA__HH_

double thetas[3];
double X_par[2];
double X_det[2];
double W_par[2];
double W_det[2];
double label;

double true_phi;
double true_costh;
double phi;
double costh;

TTree* save;

double lambda[num_samples];
double mu[num_samples];
double nu[num_samples];

TH2D* hPar;
TH2D* hDet;

TRandom3* event = new TRandom3();

double weight_fn(double lambda, double mu, double nu, double phi, double costh);

void make_tree(TString tname);

void fill_histo(TFile* inputs, TString tname, double lambda, double mu, double nu);

void fill_train_tree();

void fill_test_tree();

#endif /*_TREE_DATA__HH_*/

In [6]:
double weight_fn(double lambda, double mu, double nu, double phi, double costh)
{
    double weight = 1. + lambda* costh* costh + 2.* mu* costh* sqrt(1. - costh* costh) *cos(phi) + 0.5* nu* (1. - costh* costh)* cos(2.* phi);
    return weight/(1. + costh* costh);
}

In [7]:
void make_tree(TString tname)
{
    save = new TTree(tname.Data(), tname.Data());

    save->Branch("thetas",      &thetas,        "thetas[3]/D");
    save->Branch("X_par",       &X_par,         "X_par[2]/D");
    save->Branch("X_det",       &X_det,         "X_det[2]/D");
    save->Branch("W_par",       &W_par,         "W_par[2]/D");
    save->Branch("W_det",       &W_det,         "W_det[2]/D");
    save->Branch("label",       &label,         "label/D");
}

In [8]:
void fill_histo(TFile* inputs, TString tname, double lambda, double mu, double nu)
{
    TTree* tree = (TTree*)inputs->Get(tname.Data());

    int nevents = tree->GetEntries();

//     cout << "---> events " << nevents << endl;

    tree->SetBranchAddress("true_phi",          &true_phi);
    tree->SetBranchAddress("true_costh",        &true_costh);
    tree->SetBranchAddress("phi",               &phi);
    tree->SetBranchAddress("costh",             &costh);

    hPar = new TH2D("hPar", "hPar", NBINS, -PI, PI, NBINS, -0.6, 0.6);
    hDet = new TH2D("hDet", "hDet", NBINS, -PI, PI, NBINS, -0.5, 0.5);

    int nfill = 0;
    int ii;

    for(ii = 0; ii < nevents && nfill < num_reco; ii++)
    {
        tree->GetEntry(ii);
        double acc = event->Uniform(0., 1.);
        if(acc > 0.5)
        {
            double weight = weight_fn(lambda, mu, nu, true_phi, true_costh);
            hPar->Fill(true_phi, true_costh, weight);
            hDet->Fill(phi, costh, weight);
            nfill++;
        }
    }

//     cout << "---> stopped @ " << ii << " event with " << nfill << " events " << endl;

    hPar->Scale(1./hPar->Integral());
    hDet->Scale(1./hDet->Integral());
}

In [9]:
void fill_train_tree()
{
    for(int ii = 0; ii < NBINS* NBINS; ii++)
    {
        int binx = TMath::Nint(event->Uniform(0., NBINS));
        int biny = TMath::Nint(event->Uniform(0., NBINS));

        X_par[0] = hPar->GetXaxis()->GetBinCenter(binx+1);
        X_par[1] = hPar->GetYaxis()->GetBinCenter(biny+1);
        W_par[0] = hPar->GetBinContent(binx+1, biny+1);
        W_par[1] = hPar->GetBinError(binx+1, biny+1);

        X_det[0] = hDet->GetXaxis()->GetBinCenter(binx+1);
        X_det[1] = hDet->GetYaxis()->GetBinCenter(biny+1);
        W_det[0] = hDet->GetBinContent(binx+1, biny+1);
        W_det[1] = hDet->GetBinError(binx+1, biny+1);

        if(W_par[0] > 0. && W_det[0] > 0.)
        {
            save->Fill();
//             cout << "---> filled with " << binx << " & " << biny << endl;
            break;
        }
    }
}

In [10]:
void fill_test_tree()
{
    for(int ii = 0; ii < NBINS; ii++)
    {
        for(int jj = 0; jj < NBINS; jj++)
        {
            X_par[0] = hPar->GetXaxis()->GetBinCenter(ii+1);
            X_par[1] = hPar->GetYaxis()->GetBinCenter(jj+1);
            W_par[0] = hPar->GetBinContent(ii+1, jj+1);
            W_par[1] = hPar->GetBinError(ii+1, jj+1);

            X_det[0] = hDet->GetXaxis()->GetBinCenter(ii+1);
            X_det[1] = hDet->GetYaxis()->GetBinCenter(jj+1);
            W_det[0] = hDet->GetBinContent(ii+1, jj+1);
            W_det[1] = hDet->GetBinError(ii+1, jj+1);

            save->Fill();
        }
    }
}

In [11]:
// get random numbers
for(int ii = 0; ii < num_samples; ii++)
{
    lambda[ii] = event->Uniform(-1., 1.);
    mu[ii] = event->Uniform(-0.5, 0.5);
    nu[ii] = event->Uniform(-0.5, 0.5);
}

In [12]:
// save outputs
TFile* inputs = TFile::Open(fname.Data(), "READ");

TFile* outputs = new TFile("net.root", "RECREATE");

In [13]:
// make X0 samples
cout << "---> make X0 train tree " << endl;
make_tree("X0_train_tree");

for(int ii = 0; ii < num_samples; ii++)
{
    thetas[0] = lambda[ii];
    thetas[1] = mu[ii];
    thetas[2] = nu[ii];
    
    label = 0.0;
    
    fill_histo(inputs, "X0_train", 0.0, 0.0, 0.0);
    fill_train_tree();
    delete hPar;
    delete hDet;
    if(ii%10000==0){cout << "---> " << ii << " events completed " << endl;}
}


outputs->cd();
save->Write();
delete save;

---> make X0 train tree 
---> 0 events completed 
---> 10000 events completed 
---> 20000 events completed 
---> 30000 events completed 
---> 40000 events completed 
---> 50000 events completed 
---> 60000 events completed 
---> 70000 events completed 
---> 80000 events completed 
---> 90000 events completed 
---> 100000 events completed 
---> 110000 events completed 
---> 120000 events completed 
---> 130000 events completed 
---> 140000 events completed 
---> 150000 events completed 
---> 160000 events completed 
---> 170000 events completed 
---> 180000 events completed 
---> 190000 events completed 
---> 200000 events completed 
---> 210000 events completed 
---> 220000 events completed 
---> 230000 events completed 
---> 240000 events completed 
---> 250000 events completed 
---> 260000 events completed 
---> 270000 events completed 
---> 280000 events completed 
---> 290000 events completed 
---> 300000 events completed 
---> 310000 events completed 
---> 320000 events completed 

In [14]:
// make X1 samples
cout << "---> make X1 train tree " << endl;
make_tree("X1_train_tree");


for(int ii = 0; ii < num_samples; ii++)
{
    thetas[0] = lambda[ii];
    thetas[1] = mu[ii];
    thetas[2] = nu[ii];
    
    label = 1.0;
    
    fill_histo(inputs, "X1_train", lambda[ii], mu[ii], nu[ii]);
    fill_train_tree();
    delete hPar;
    delete hDet;
    
    if(ii%10000==0){cout << "---> " << ii << " events completed " << endl;}
}


outputs->cd();
save->Write();
delete save;

---> make X1 train tree 
---> 0 events completed 
---> 10000 events completed 
---> 20000 events completed 
---> 30000 events completed 
---> 40000 events completed 
---> 50000 events completed 
---> 60000 events completed 
---> 70000 events completed 
---> 80000 events completed 
---> 90000 events completed 
---> 100000 events completed 
---> 110000 events completed 
---> 120000 events completed 
---> 130000 events completed 
---> 140000 events completed 
---> 150000 events completed 
---> 160000 events completed 
---> 170000 events completed 
---> 180000 events completed 
---> 190000 events completed 
---> 200000 events completed 
---> 210000 events completed 
---> 220000 events completed 
---> 230000 events completed 
---> 240000 events completed 
---> 250000 events completed 
---> 260000 events completed 
---> 270000 events completed 
---> 280000 events completed 
---> 290000 events completed 
---> 300000 events completed 
---> 310000 events completed 
---> 320000 events completed 

In [15]:
// make X0 secret data

cout << "---> make X0 test tree " << endl;
make_tree("X0_test_tree");

thetas[0] = 0.0;
thetas[1] = 0.0;
thetas[2] = 0.0;

label = 0.0;

fill_histo(inputs, "X0_test", 0.0, 0.0, 0.0);
fill_test_tree();

delete hPar;
delete hDet;

outputs->cd();
save->Write();
delete save;

---> make X0 test tree 


In [16]:
// make X1 secret data
cout << "---> make X1 test tree " << endl;
make_tree("X1_test_tree");

thetas[0] = lambda_secret;
thetas[1] = mu_secret;
thetas[2] = nu_secret;

label = 1.0;

fill_histo(inputs, "X1_test", lambda_secret, mu_secret, nu_secret);
fill_test_tree();

delete hPar;
delete hDet;

outputs->cd();
save->Write();
delete save;

---> make X1 test tree 


In [17]:
outputs->Close();