# Unagged cross-section ratio of $(e,e'\pi^+)/(e,e'\pi^+)$ 

$$\left(\frac{\sigma(e,e'\pi^+)}{\sigma(e,e'\pi^+)}\right)$$

last edit Aug-29, 2022

## Imports and definitions

In [1]:
import sys; 
software_path = '/Users/erezcohen/Desktop/Software/'
sys.path.insert(0, software_path + '/mySoftware/Python/');
sys.path.insert(0, software_path + '/CLAS12/BAND/SIDIS_at_BAND/PythonAnalysis/AcceptanceCorrections/');
sys.path.insert(0, software_path + '/CLAS12/BAND/SIDIS_at_BAND/PythonAnalysis/python_auxiliary/');
from my_tools                     import *; 
from plot_tools                   import *;
from my_data_analysis_tools       import *;
from acceptance_correction_tools  import *;
from sidis_analysis_tools         import *;

In [2]:
%config InlineBackend.figure_format = 'retina'
plt.rcParams['mathtext.fontset']    = 'stix'
plt.rcParams['font.family']         = 'STIXGeneral'

## (1) Load data

In [None]:

# ----------------------- #
def load_SIDIS_data(runs_filename  = "good_runs_10-2-final.txt",
                    main_data_path = '/Users/erezcohen/Desktop/data/BAND/',
                    Nruns          = 1,
                    do_e_e_pi      = True,
                    do_e_e_pi_n    = True,
                    do_e_e_pi_FreeP= True,
                    do_all_vars    = False,
                    fdebug         = 2,
                    prefix         = "sidisdvcs",
                    subdirname     = "",
                    FreeP_prefix   = "ntupleNew"):#{
    '''
    Load SIDIS data, and fill e_e_pi and e_e_pi_n with data
    last update Sep-8, 2022
    
    input:
    -------------
    do_e_e_pi       flag to read d(e,e'π) data  from RGB - takes much time for a large number of runs
    do_e_e_pi_n     flag to read d(e,e'πn) data from RGB - takes less time
    do_e_e_pi_FreeP flag to read p(e,e'π) data from RGA  - takes much time
    prefix          "sidisdvcs" / "inc"      - inclusive skimming train
    subdirname      "With_W0.5cut" / "With_W2.5cut"
    
    Comments:
    -------------
    e_e_pi, e_e_pi_n, e_e_pi_FreeP       dict(['piplus','piminus'])
    e.g. :
    e_e_pi['piplus'] = pandas.DataFrame( (e,e'π) events data )
    
    '''
    global e_e_pi, e_e_pi_n, e_e_pi_FreeP;

    e_e_pi_data_path       = main_data_path + 'SIDIS_skimming/' + prefix + '/' + subdirname + '/'
    e_e_pi_n_data_path     = main_data_path + 'merged_SIDIS_and_BAND_skimming/'
    e_e_pi_FreeP_data_path = main_data_path + 'RGA_Free_proton/'

    runs = read_run_nunmbers(runs_filename=runs_filename,Nruns=Nruns)
    
    for runnum,runIdx in zip(runs,range(len(runs))):#{
        if fdebug>1: print('Run number ',runnum,'(%d/%d runs)'%(runIdx+1,len(runs)))
        for pi_charge_name,pi_print in zip(pi_charge_names,pi_prints):
            if do_e_e_pi:#{
                if do_all_vars:
                    eepi   = pd.read_csv(e_e_pi_data_path
                                     +'skimmed_SIDIS_'
                                     +prefix + '_'
                                     +'00%d_e_%s_selected_eepi_kinematics.csv'%(runnum,pi_charge_name))

                else: # more economic
                    eepi   = pd.read_csv(e_e_pi_data_path
                                     +'skimmed_SIDIS_'
                                     +prefix + '_'
                                     +'00%d_e_%s_selected_eepi_kinematics.csv'%(runnum,pi_charge_name),
                                     usecols=['runnum','evnum',
                                              'e_P','e_Theta','e_Phi',
                                              'pi_P', 'pi_Theta', 'pi_Phi',
                                              'Q2', 'W',
                                              'xB', 'Zpi',
                                              'M_x', 'e_DC_sector',
                                              'pi_DC_sector','pi_qFrame_pT','pi_qFrame_pL'],
                                     dtype={'runnum':int,'evnum': int,
                                            'e_DC_sector':int, 'pi_DC_sector':int,
                                            'e_P':np.half,'e_Theta':np.half,'e_Phi':np.half,
                                            'pi_P':np.half,'pi_Theta':np.half, 'pi_Phi':np.half,
                                            'Q2':np.half,  'W':np.half,
                                            'xB':np.half, 'Zpi':np.half,
                                            'M_x':np.half,
                                            'pi_qFrame_pT':np.half,'pi_qFrame_pL':np.half})
                
                if runIdx==0: e_e_pi[pi_charge_name] = eepi
                else:         e_e_pi[pi_charge_name] = pd.concat([e_e_pi[pi_charge_name],eepi])

                
                if fdebug>1: print('Loaded',len(eepi)," d(e,e'"+pi_print+") events")

            #}
            if do_e_e_pi_n:#{
                eepin = pd.read_csv(e_e_pi_n_data_path
                                    + 'skimmed_SIDIS_and_BAND_'
                                    + prefix + '_'
                                    + '00%d_e_%s_n.csv'%(runnum,pi_charge_name))
                
                if fdebug>1: print('Loaded',len(eepin)," d(e,e'"+pi_print+"n) events")

                if runIdx==0: e_e_pi_n[pi_charge_name] = eepin
                else:         e_e_pi_n[pi_charge_name] = pd.concat([e_e_pi_n[pi_charge_name],eepin])
            #}
        #}
    #}
    if do_e_e_pi_FreeP:#{
        for pi_charge_name,pi_print in zip(pi_charge_names,pi_prints):
            if do_all_vars:
                eepi   = pd.read_csv(e_e_pi_FreeP_data_path
                                 +FreeP_prefix
                                 +'_e_e_%s_selected_eepi_kinematics.csv'%(pi_charge_name))

            else: # more economic
                eepi   = pd.read_csv(e_e_pi_FreeP_data_path
                                 +FreeP_prefix
                                 +'_e_e_%s_selected_eepi_kinematics.csv'%(pi_charge_name),
                                 usecols=['runnum','evnum',
                                          'e_P','e_Theta','e_Phi',
                                          'pi_P', 'pi_Theta', 'pi_Phi',
                                          'Q2', 'W',
                                          'xB', 'Zpi',
                                          'M_x', 'e_DC_sector', 'pi_DC_sector','pi_qFrame_pT','pi_qFrame_pL'],
                                 dtype={'runnum':int,'evnum': int,
                                        'e_DC_sector':int, 'pi_DC_sector':int,
                                        'e_P':np.half,'e_Theta':np.half,'e_Phi':np.half,
                                        'pi_P':np.half,'pi_Theta':np.half, 'pi_Phi':np.half,
                                        'Q2':np.half,  'W':np.half,
                                        'xB':np.half, 'Zpi':np.half,
                                        'M_x':np.half,
                                        'pi_qFrame_pT':np.half,'pi_qFrame_pL':np.half})
            
            # Aug-2022: in e_e_pi_FreeP we only have 1 data-file
            e_e_pi_FreeP[pi_charge_name] = eepi
#            if runIdx==0: e_e_pi_FreeP[pi_charge_name] = eepi
#            else:         e_e_pi_FreeP[pi_charge_name] = pd.concat([e_e_pi_FreeP[pi_charge_name],eepi])
            if fdebug>1: print('Loaded',len(eepi)," p(e,e'"+pi_print+") events")
        #}
    #}
       
    print('Done loading files.')
    
    if fdebug>0:
        print('')
        print('Total statistics:')
        for pi_charge_name,pi_print in zip(pi_charge_names,pi_prints):
            if do_e_e_pi:       print(len(e_e_pi[pi_charge_name])      ," d(e,e'"+pi_print+")  events")
            if do_e_e_pi_n:     print(len(e_e_pi_n[pi_charge_name])    ," d(e,e'"+pi_print+"n) events")
            if do_e_e_pi_FreeP: print(len(e_e_pi_FreeP[pi_charge_name])," p(e,e'"+pi_print+")  events")
    #}
#}
# ----------------------- #




In [3]:
load_SIDIS_data( Nruns = -1, do_e_e_pi_n=False, do_e_e_pi_FreeP=False, fdebug=0, )

ValueError: Usecols do not match columns, columns expected but not found: ['W_d', 'M_x_d']

## (2) Apply selection cuts not previously imposed

In [None]:
import time

In [None]:
print('number of pi+ events: %.1f'%(len(e_e_pi['piplus'])/1e6),'M')

In [None]:
Nevents = -1
t0= time.time()
e_e_pi_pass_cuts,_,_,_ = apply_further_selection_cuts_to_data(fdebug=1, 
                                                            NeventsMax=Nevents,
                                                            NMaxPerSubset=500000);
t1 = time.time() 
print("Time elapsed: %.2f"%(t1 - t0), 'sec (%g us/event)'%((t1 - t0)/Nevents*1.e6)) # CPU seconds elapsed (floating point)

## (3) Extract cross-section ratio as a function of Bjorken $x$
For a standing proton use $x_B$, defined as
$ x_B = Q^2/2m_p\omega$

In [None]:
z_bins   = np.arange(0.3,0.85,0.05)
z_widths = 0.01*np.ones(len(z_bins))
x_bins   = np.linspace(0.2,0.6,11)
x        = (x_bins[1:] + x_bins[:-1])/2
x_err    = (x_bins[1:] - x_bins[:-1])/2
print('x bins:',x_bins)
print('z bins:',z_bins)
for z_bin,z_width in zip(z_bins,z_widths):
    z_min,z_max = z_bin-z_width, z_bin+z_width
    print('%.3f < z < %.3f:'%(z_min,z_max))

In [None]:
var_label = "standing proton x"
# var_label = "moving proton x"

if   var_label == "standing proton x":
    var    = 'xB'
    xlabel = "Bjorken $x$"
    
elif var_label == "moving proton x":
    var    = 'xPrime2'
    xlabel = "$x' = Q^2/(W'^2 - m_N^2 + Q^2)$"    

# (4) Extract tabular data and save results to file

In [None]:
extract_SIDIS_ratio(df_dict  = e_e_pi_pass_cuts,
                           x_var    = var ,                           
                           x_bins   = x_bins,                           
                           z_bins   = z_bins,                           
                           z_widths = z_widths,                           
                           fdebug   = 0,
                           W_min    = 2.5,
                           prefix   = 'Untagged_SIDIS_ratio_',                           
                           suffix   = '')

## load results and plot them

In [None]:
prefix = 'Untagged_SIDIS_ratio_'
suffix = ''
SIDIS_results = load_SIDIS_ratio(#z_bins=z_bins,z_widths=z_widths,
                                           prefix = prefix, suffix = suffix, doPlotResults=True)
ax = plt.gca()
ax.set_ylim(1,2.5);

# (5) Extract modified results
With no acceptance matching cut in $p-\theta$ plane

## (5.1) Results in different bins of M_x

In [None]:
M_x_min_arr = [0.5,1.0,1.5,2.0,2.5]
M_x_max_arr = [1.0,1.5,2.0,2.5,3.0]

In [None]:
for M_x_min,M_x_max in zip(M_x_min_arr,M_x_max_arr):
    print(M_x_min,M_x_max)
    extract_SIDIS_ratio(df_dict  = e_e_pi_pass_cuts,
                        x_var    = var ,
                        x_bins   = x_bins,
                        z_bins   = z_bins,
                        z_widths = z_widths,          
                        fdebug   = 0,
                        data_path= '/Users/erezcohen/Desktop/data/BAND/Results/Mx_bins/',
                        prefix   = 'Untagged_SIDIS_ratio_',                    
                        suffix   = '_Mx_%.2f-%.2f_GeV'%(M_x_min,M_x_max),
                        M_x_min  = M_x_min,
                        M_x_max  = M_x_max)
    
print('done.')

## (5.2) extract results for many more bins in $z$

In [None]:
z_bins   = np.arange(0.3,0.8,0.01)
z_widths = 0.001*np.ones(len(z_bins))

prefix = 'Untagged_SIDIS_ratio_'
suffix = ''

In [None]:
extract_SIDIS_ratio(df_dict  = e_e_pi_pass_cuts,                                
                           x_var    = var ,                           
                           x_bins   = x_bins,                           
                           z_bins   = z_bins,                           
                           z_widths = z_widths,                           
                           fdebug   = 0,
                           prefix   = 'Untagged_SIDIS_ratio_',                           
                           suffix   = '')

In [None]:
SIDIS_results = load_SIDIS_ratio_DataFrame(z_bins=z_bins,z_widths=z_widths,
                                           prefix = prefix, suffix = suffix, 
                                           doPlotResults=True)

## (5.3) Results with no acceptance matching cut in $p-\theta$ 

In [None]:
Nevents = -1
t0= time.time()
e_e_pi_pass_cuts_no_p_theta_cut,_,_ = apply_further_selection_cuts_to_data(fdebug=2,
                                                                           NeventsMax=Nevents, 
                                                                           doAcceptanceMatchingCut=False);
t1 = time.time() 
print("Time elapsed: %.2f"%(t1 - t0), 'sec (%g us/event)'%((t1 - t0)/Nevents*1.e6)) # CPU seconds elapsed (floating point)

In [None]:
save_SIDIS_ratio_DataFrame(df_dict  = e_e_pi_pass_cuts_no_p_theta_cut,                                
                           x_var    = var ,                           
                           x_bins   = x_bins,                           
                           z_bins   = z_bins,                           
                           z_widths = z_widths,                           
                           fdebug   = 0,
                           prefix   = 'Untagged_SIDIS_ratio_',                           
                           suffix   = '_No_AcceptanceMatchingCut')

In [None]:
prefix = 'Untagged_SIDIS_ratio_'
suffix = '_No_AcceptanceMatchingCut'
SIDIS_results = load_SIDIS_ratio_DataFrame(prefix = prefix, suffix = suffix, doPlotResults=True)

# arxiv

In [None]:

# Rpips2pims, Rpips2pims_errup, Rpips2pims_errdw = dict(), dict(), dict()
# Y, dY = dict(),dict()
# for z_bin,z_width in zip(z_bins,z_widths):
#     z_min,z_max = z_bin-z_width,z_bin+z_width
#     (Rpips2pims[z_bin],
#      Rpips2pims_errup[z_bin], 
#      Rpips2pims_errdw[z_bin],_,_) = compute_ratio_pips_to_pims(df_dict=e_e_pi_pass_cuts  , 
#                                                              var=var, 
#                                                              bins=x_bins, 
#                                                              z_min=z_min,z_max=z_max)

#     # fit to a constant 
#     y    = Rpips2pims[z_bin]
#     y_err= (Rpips2pims_errup[z_bin],Rpips2pims_errdw[z_bin])
#     p,cov = np.polyfit(x,y,deg=0,cov=True)
#     Y[z_bin],dY[z_bin] = p[0],cov[0,0]

#     # print(z_bin,Rpips2pims_n[z_bin],Rpips2pims_n_errup[z_bin],Rpips2pims_n_errdw[z_bin])
# print('done') 

In [None]:
# fig = plt.figure(figsize=(9,6))
# ax  = fig.add_subplot(1,1,1)
# for z_bin,z_width in zip(z_bins,z_widths):
#     y    = Rpips2pims[z_bin]
#     y_err= (Rpips2pims_errup[z_bin],Rpips2pims_errdw[z_bin])
#     x_fit = np.linspace(np.min(x),np.max(x),100)
#     y_dw = (Y[z_bin]-dY[z_bin])*np.ones(len(x_fit))
#     y_up = (Y[z_bin]+dY[z_bin])*np.ones(len(x_fit))

#     # plot
#     l=ax.errorbar(x=x, xerr=x_err,  y=y, yerr=y_err,
#                 marker='o',markeredgecolor='k',
#                 label='$z=%.2f\pm%.2f, (\pi^+/\pi^-)=%.2f\pm%.2f$'%(z_bin,z_width,Y[z_bin],dY[z_bin]))
    
#     ax.fill_between( x_fit, y_dw, y_up, color=l[0].get_color(), alpha=0.1)
# set_axes(ax,xlabel,"$N(e,e'\pi^+)/N(e,e'\pi^-)$",
#          title="$\pi^+/\pi^-$ ratio as a function of $x_B$ without a tagged neutron",
#          do_add_grid=True, do_add_legend=True, fontsize=18,
#         );
# plt.legend(bbox_to_anchor=(1,1.05),loc='best',fontsize=18)

### Improve analysis timing with a focus on apply_further_selection_cuts_to_data()
Check timing of the function and improve it, as well as memory usage

In [None]:
import timeit
timeit.timeit("apply_further_selection_cuts_to_data(fdebug=2, NeventsMax=-1)", 
              setup="from __main__ import apply_further_selection_cuts_to_data",number=1)

In [None]:
e_e_pi[pi_ch].keys()
# retain only the most important features to reduce memory usage by a large pandas DataFrame
reduced_e_e_pi = dict()
for pi_ch in pi_charge_names:
    reduced_e_e_pi[pi_ch] = e_e_pi[pi_ch][['runnum','evnum',
                                           'e_P','e_Theta','e_Phi', 
                                           'pi_P', 'pi_Theta', 'pi_Phi', 
                                           'Q2', 'W', 'xB', 'Zpi',
                                           'M_X', 'e_DC_sector', 'pi_DC_sector']]
    reduced_e_e_pi[pi_ch] = reduced_e_e_pi[pi_ch].astype({"evnum": int,
                                                          "e_P":np.half,"e_Theta":np.half,"e_Phi":np.half,
                                                          "pi_P":np.half,'pi_Theta':np.half, 'pi_Phi':np.half,
                                                          'Q2':np.half, 'W':np.half, 'xB':np.half, 'Zpi':np.half,
                                                          'M_X':np.half, })

reduced_e_e_pi[pi_ch][0:5]

In [None]:
for pi_ch in pi_charge_names:
    print(pi_ch)
    print('before cuts')
    print(e_e_pi[pi_ch].info(memory_usage="deep",verbose=False))
    print(' ')    
    # print('reduced dataframe before cuts')
    # print(reduced_e_e_pi[pi_ch].info(memory_usage="deep",verbose=False))
    # print(' ')
    print('after cuts')
    print(e_e_pi_pass_cuts[pi_ch].info(memory_usage="deep",verbose=False))
    print(' ')

In [None]:
e_e_pi_pass_cuts[pi_ch].index