## Generation of Dataset for Pole (01)

Run this notebook to generate multiple line shapes using pole configuration #01: 1 pole in $[bt]$-sheet.

In [None]:
import math
import numpy as np
import cmath as cm
import random
import itertools
import pickle
import os

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)
from tabulate import tabulate

import sklearn
from sklearn.utils import shuffle

# Import the module containing the class pole
import import_ipynb
import module_Poles
from module_Poles import T1, T2, T4, Nreal, Nimag, TotalN
from module_Poles import unif_pole, gen_Eaxis, skip_duplicate, dalitz, polynom
from module_Poles import Ereal, Eimag, Erealfar, Eimagfar
from module_Poles import energy_low, energy_high
from module_Poles import invmass, weighted_candidates, upper_err

# Create directory to store data
directory = 'Datasets'
if not os.path.isdir(directory):
    os.makedirs(directory)
out = directory

### Define function

We create a function that can generate 1 pole in $[bt]$-sheet and construct its line shape, for `TotalN` times.

In [None]:
# Generate dataset for output label 01: '1 pole in [bt]'
def gen_ccdataset01(Ereal, Eimag, Erealfar, Eimagfar):

    # Create arrays to store the generated amplitude, energy axis, labels, and parameter info
    energy_axis = []
    ampt11_real = []
    ampt11_imag = []
    amplitude = []
    labelout = []
    data_info = []
    
    # Possible Riemann sheets
    RSpole = [[-1, 1], [-1, -1], [1, -1]]
    
    # Shuffle indexing
    for lst in [Ereal, Eimag, Erealfar, Eimagfar]:
        for _ in range(10):
            np.random.shuffle(lst)

    counter = 1
    # Generate poles in each Riemann sheet
    for real1, imag1 in itertools.product(range(Nreal),range(Nimag)):
        
        # Randomly choose Riemann sheet
        def rs_choice():
            return np.random.choice([1,2])  # Do not choose relevant Reimann sheet for background
        # To avoid repeated poles, we introduce random index
        indx = skip_duplicate(real1, imag1, Nreal, Nimag)
        
        # Generate relevant 1 pole in [bt]-sheet
        pole01 = unif_pole(RSpole[0], Ereal[indx[0][0]], Eimag[indx[1][0]] )
        
        # Generate BG poles in different Riemann sheets
        pole1 = unif_pole( RSpole[rs_choice()], Erealfar[indx[0][1]], Eimagfar[indx[1][1]] )
        pole2 = unif_pole( RSpole[rs_choice()], Erealfar[indx[0][1]], Eimagfar[indx[1][1]] )
        pole3 = unif_pole( RSpole[rs_choice()], Erealfar[indx[0][2]], Eimagfar[indx[1][2]] )
        pole4 = unif_pole( RSpole[rs_choice()], Erealfar[indx[0][3]], Eimagfar[indx[1][3]] )
        pole5 = unif_pole( RSpole[rs_choice()], Erealfar[indx[0][4]], Eimagfar[indx[1][4]] )
        pole6 = unif_pole( RSpole[rs_choice()], Erealfar[indx[0][5]], Eimagfar[indx[1][5]] )

        # Generate random energy axis
        Einput = gen_Eaxis()
        Einput = np.array(Einput)

        # Calculate the phase space from the Dalitz plot
        phase_space = dalitz(Einput)

        # Cut both axes to our region of interest
        phase_space = phase_space[Einput < energy_high]
        Einput = Einput[Einput < energy_high]
        phase_space = phase_space[Einput >= energy_low]
        Einput = Einput[Einput >= energy_low]
        
        # Calculate S-matrix contribution for each pole for all energy points
        smat11_01 = np.array(pole01.smat11(Einput))
        
        # Calculate S-matrix contribution for each BG pole for all energy points
        smat11_1 = np.array(pole1.smat11(Einput))
        smat11_2 = np.array(pole2.smat11(Einput))
        smat11_3 = np.array(pole3.smat11(Einput))
        smat11_4 = np.array(pole4.smat11(Einput))
        smat11_5 = np.array(pole5.smat11(Einput))
        smat11_6 = np.array(pole6.smat11(Einput))

        # Random on/off switch for irrelevant poles
        q1 = np.random.choice([0,1])
        q2 = np.random.choice([0,1])
        q3 = np.random.choice([0,1])
        q4 = np.random.choice([0,1])
        q5 = np.random.choice([0,1])
        q6 = np.random.choice([0,1])
        
        # Calculate combined S-matrix
        SMAT = smat11_01*((1-q1)+q1*smat11_1)*((1-q2)+q2*smat11_2)*((1-q3)+q3*smat11_3)*((1-q4)+q4*smat11_4)*((1-q5)+q5*smat11_5)*((1-q6)+q6*smat11_6)

        # Get partial-wave amplitude
        pwat11 = (SMAT - 1.0) / (2j)
        pwat11_real = pwat11.real
        pwat11_imag = pwat11.imag
        PWAT = pwat11_real**2.0 + pwat11_imag**2.0

        # Combine with a polynomial bg and get the total lineshape
        poly_bg = polynom(Einput)
        A = np.random.uniform(0.35, 0.65)
        totamp = np.rot90(phase_space)*(PWAT + poly_bg)*A
        totamp = np.rot90(totamp, k=3)
        totamp[np.where(phase_space==0)] = -1
        totamp += 1
        totamp = np.sum(totamp, axis=1)
        
        # Collect data
        energy_axis.append(Einput)
        ampt11_real.append(pwat11_real)
        ampt11_imag.append(pwat11_imag)
        amplitude.append(totamp)

        # Create label (as output in DNN)
        labelout.append(1)

        # Collect parameters
        information = [[pole01.pos, pole1.pos, pole2.pos, pole3.pos, pole4.pos, pole5.pos, pole6.pos],
                       [pole01.RS, pole1.RS, pole2.RS, pole3.RS, pole4.RS, pole5.RS, pole6.RS],
                       [1, q1, q2, q3, q4, q5, q6]]
        
        data_info.append(information)

        # Print a counter to keep track of progress
        if counter < 10 or counter % 100 == 0:
            print('Generated ' +str(counter)+' of '+str(TotalN))
        counter = counter +1

    return energy_axis, ampt11_real, ampt11_imag, amplitude, labelout, data_info

### Generate Dataset

Let us now allow the computer to generate the dataset. This is where we have to wait.

In [None]:
# Call the function above
P01data_energy, P01data_t11real, P01data_t11imag, P01data_amps, P01outputs, P01data_info = gen_ccdataset01(Ereal, Eimag, Erealfar, Eimagfar)

print("Done generating!")

### Inspect and Export

Plot the generated line shapes and inspect validity of dataset.

In [None]:
# Randomly select one amplitude from the generated dataset
indx = random.sample(range(len(P01data_amps)), 1)[0]

# Plot PWAT_11
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
ax1.plot(P01data_energy[indx], P01data_t11imag[indx], label = "$Im(T_{11})$", color="tab:orange", linewidth = 2)
ax1.plot(P01data_energy[indx], P01data_t11real[indx], label = "$Re(T_{11})$", color="tab:green", linewidth = 2)
ax1.plot(P01data_energy[indx], P01data_t11real[indx]**2 + P01data_t11imag[indx]**2, label = "$|T_{11}|^2$", color="tab:blue",linewidth = 2)
ax1.legend(loc = 'upper left', fontsize = 9)
ax1.set_xticks(np.arange(energy_low,energy_high,100))
ax1.set_yticks([0.0, 0.5, 1.0])
ax1.xaxis.set_major_locator(MultipleLocator(25))
ax1.xaxis.set_minor_locator(MultipleLocator(5))
ax1.set_xlim([energy_low,energy_high])
ax1.margins(x=0)

# Plot total line shape
ax2.plot(P01data_energy[indx], P01data_amps[indx], label = "Data # "+str(indx), linewidth = 2)
ax2.errorbar(invmass, weighted_candidates, yerr = upper_err, fmt = 'o', 
            ecolor = "k", color = "k", elinewidth = 1, markersize = 3.5, label = "LHCb data")
ax.set_xticks(np.arange(4200,4351,50))
ax.set_yticks(np.arange(400,801,100))
ax.xaxis.set_minor_locator(MultipleLocator(10))
ax.tick_params(axis='both', which='major', labelsize=15)
ax.set_xlabel('$m_{\,J/\psi\,p}$ [MeV]', fontsize = 15, labelpad=8)
ax.set_ylabel('Weighted Candidates/(2 MeV)', fontsize = 15, labelpad=10)
ax.margins(x=0)
ax.grid(True, linestyle='--')
ax2.legend(loc = 'upper left', fontsize = 12)
ax.set_xlim([4200,4350])
ax.set_ylim([400, 850])

plt.show()

# Print the parameters used for the particular line shape
pars1 = P01data_info[indx]
table1 = [["#","Energy pole (MeV)","RS","Active?"], 
          ["[bt]", str(pars1[0][0]), pars1[1][0], pars1[2][0]],
          ["1", str(pars1[0][1]), pars1[1][1], pars1[2][1]],
          ["2", str(pars1[0][2]), pars1[1][2], pars1[2][2]],
          ["3", str(pars1[0][3]), pars1[1][3], pars1[2][3]],
          ["4", str(pars1[0][4]), pars1[1][4], pars1[2][4]],
          ["5", str(pars1[0][5]), pars1[1][5], pars1[2][5]],
          ["6", str(pars1[0][6]), pars1[1][6], pars1[2][6]]]

print("Data #"+str(indx)+" parameters:")
print(tabulate(table1, headers="firstrow"))

Once everything is satisfactory, export the dataset.

In [None]:
# Convert arrays
P01data_amps = np.array(P01data_amps)
P01data_energy = np.array(P01data_energy)

# Concatenate energy axis and intensity axis (as input in DNN)
P01inputs = np.concatenate((P01data_energy,P01data_amps),axis=1)

# Export dataset to directory
pickle.dump(P01data_amps, open(os.path.join(out,'P01data_amps_test.pkl'),'wb'), protocol=4)
pickle.dump(P01data_energy, open(os.path.join(out,'P01data_energy_test.pkl'),'wb'), protocol=4)
pickle.dump(P01inputs, open(os.path.join(out,'P01inputs_valid.pkl'),'wb'), protocol=4)
pickle.dump(P01outputs, open(os.path.join(out,'P01outputs_test.pkl'),'wb'), protocol=4)
#pickle.dump(P01data_info, open(os.path.join(out, 'P01data_info.pkl'),'wb'), protocol=4)

print('Done exporting!')