# Learn how to get a light yield

Tunnell, Feburary 2016

This tutorial describes how to use analyze the AmBe data to fit the inelastic line.

The following line just runs our standard code for every analysis.  You can change 'run' to 'load' to see what is in there.

In [10]:
%matplotlib inline
%run boiler_plate.py
import hax

In [11]:
## Specify your own data location
hax.config.CONFIG['main_data_paths'] = ['/tmp/data/good/']
hax.runs.update_datasets()

In [20]:
D = hax.runs.DATASETS
D[(D['location'] != "") & (D['source'] != 'Cs137')]

Unnamed: 0,name,source,position,trigger,anode,cathode,shield,livetime,corrected_livetime,events,corrected_events,processed,category,comment,run,tpc,location
473,xe100_110210_1100,AmBe,other,S1,4.4,16,closed,202.16,202.2,5491,5491,0.4.5,standard,"Trg 60/51, holdoff 1ms, HE veto (2x10dB/4/100/40)",10,xenon100,/tmp/data/good/xe100_110210_1100.root
475,xe100_110210_1412,AmBe,other,S1,4.4,16,closed,2025.08,1172.0,47227,27961,0.4.5,standard,"Trg 60/51, holdoff 1ms, HE veto (2x10dB/4/100/40)",10,xenon100,/tmp/data/good/xe100_110210_1412.root
483,xe100_110211_0532,AmBe,other,S1,4.4,16,closed,2274.46,2274.5,51229,51229,0.4.5,standard,"Trg 60/51, holdoff 1ms, HE veto (2x10dB/4/100/40)",10,xenon100,/tmp/data/good/xe100_110211_0532.root
1614,xe100_120404_0804,AmBe,other,S1,4.4,16,closed,843.912,843.9,14032,17000,0.4.5,standard,,10,xenon100,/tmp/data/good/xe100_120404_0804.root


## Load data

Grab the AmBe datasets then just use the first one:

In [9]:
datasets_ambe = hax.runs.DATASETS.query('source == "AmBe" & category == "standard" & tpc == "xenon100"')['name'].values
datasets_ambe = ['xe100_110210_1100',
                 'xe100_110210_1412',
                 'xe100_110211_0532']

Inspect this dataset

In [5]:
df = hax.minitrees.load(datasets_ambe)
df.head(5)

Found minitree at ./xe100_110210_1100_Basics.root
Found minitree at ./xe100_110210_1412_Basics.root
Found minitree at ./xe100_110211_0532_Basics.root


Unnamed: 0,index,cs1,cs2,dataset_number,drift_time,event_number,event_time,largest_coincidence,largest_other_s1,largest_other_s2,largest_unknown,largest_veto,s1,s1_area_fraction_top,s2,s2_area_fraction_top,x,y,z
0,0,4.822919,439.41969,1102101100,49639.042969,0,1297332001005216000,0,0,0.0,0,0.0,3.628556,0.215452,393.525543,0.593098,-12.639623,5.915094,-8.587554
1,1,,,1102101100,,1,1297332001027482112,0,0,0.0,0,0.0,,,,,,,
2,2,4.246333,488.037578,1102101100,123562.6875,2,1297332001054971904,0,0,300.855682,0,0.0,5.172802,0.0,370.854065,0.566815,0.560377,-7.783019,-21.376345
3,3,,,1102101100,,3,1297332001141690880,0,0,534.971069,0,0.0,,,,,,,
4,4,,,1102101100,,4,1297332001147309056,0,0,237.557007,0,203.871292,,,,,,,


## Cuts

Here are the cuts

In [6]:
cut_single_s1 = (df['largest_other_s1'] == 0)
cut_single_s2 = (df['largest_other_s2'] < 100)
cut_radius = (np.sqrt(df['x']**2 + df['y']**2) < 12)
cut_z = (df['z'] < -5) & (df['z'] > -27)
cut_fiducial = (cut_radius & cut_z)
df = df[cut_single_s1 & cut_single_s2 & cut_fiducial]

Inspect $(x,y)$

In [None]:
plt.scatter(df['x'],df['y'], c=df['z'], marker='o', s=10)
plt.colorbar()
plt.xlabel('x [cm]')
plt.ylabel('y [cm]')
plt.show()

Inspect $(r,z)$.

In [None]:
plt.scatter(np.sqrt(df['x']**2 + df['y']**2), df['z'], c=df['cs2'],
            marker='o', s=10)
plt.colorbar()
plt.xlabel('r [cm]')
plt.ylabel('z [cm]')

In [None]:
plt.scatter(np.log10(df['cs1']),
            np.log10(df['cs2']),
            marker='.',
            s=10,
            alpha=1)

plt.xlabel('S1 area [pe]')
plt.ylabel('S2 area [pe]')

plt.show()

In [None]:
df_40kev = df[(df['cs1'] > 50) & (df['cs1'] < 200) & (df['cs2'] > 15000) & (df['cs2'] < 32000)]

plt.scatter(df_40kev['cs1'],
            df_40kev['cs2'], marker='.')
plt.title('40 keV')
#plt.xlim(0, 250) # S1
#plt.ylim(8000, 25000*2) # S2

plt.xlabel('S1 area [pe]')
plt.ylabel('S2 area [pe]')

plt.show()

In [None]:
print('S1 (40 keV) pe:', df_40kev['cs1'].mean(), '+/-', df_40kev['cs1'].std())
print('S2 (40 keV) pe:', df_40kev['cs2'].mean(), '+/-', df_40kev['cs2'].std())

In [None]:
print('S1 pe/keV:', df_40kev['cs1'].mean()/40)
print('S2 pe/keV:', df_40kev['cs2'].mean()/40)

In [None]:
df_peaks = df[(df['cs1'] > 50) & (df['cs1'] < 500) & (df['cs2'] > 15000) & (df['cs2'] < 300e5)]

In [None]:
# concatenate the two datasets into the final training set
X_train = np.dstack((df_peaks.to_records()['cs1'],
                     df_peaks.to_records()['cs2']))[0]

# fit a Gaussian Mixture Model with two components
clf = mixture.GMM(n_components=3, covariance_type='full')
clf.fit(X_train)

# display predicted scores by the model as a contour plot
x = np.linspace(df_peaks['cs1'].min(),
                df_peaks['cs1'].max())
                
y = np.linspace(df_peaks['cs2'].min(),
                df_peaks['cs2'].max())
X, Y = np.meshgrid(x, y)
XX = np.array([X.ravel(), Y.ravel()]).T
Z = -clf.score_samples(XX)[0]
Z = Z.reshape(X.shape)

CS = plt.contour(X, Y, Z, #norm=LogNorm(vmin=1.0, vmax=1000.0),
                 levels=np.logspace(1, 2, 100))
CB = plt.colorbar(CS, shrink=0.8, extend='both')
plt.scatter(X_train[:, 0], X_train[:, 1], .8)

plt.title('Negative log-likelihood predicted by a GMM')
plt.axis('tight')
plt.show()

In [None]:
clf.means_

In [None]:
df_pid = df[(150 < df['s2_area']) & (10000 > df['s2_area'])]
plt.scatter(df_pid['s1_area'], df_pid['pid'], marker='.', alpha=0.1, s=5,
            color='red', label='NR band')
plt.scatter(df_40kev['s1_area'], df_40kev['pid'], marker='.', alpha=1.0, s=5,
            color='blue', label='40 keV $\gamma$')
plt.legend()
plt.xlabel('Corrected S1 [pe]')
plt.ylabel('log10(S2/s1)')
plt.xlim(0, 220)
plt.ylim(1,4)

In [None]:
df_inspect = df_pid[df_pid['pid'] < 1.0]


In [None]:
df_40kev['xed'] = df_40kev['dataset_name']

In [None]:
df_40kev['dataset_name'] = [x[:-11] for x in df_40kev['dataset_name']]

In [None]:
df_40kev.head()

In [None]:
def inspect_event(df_event):
    mypax = core.Processor(config_names='XENON100',
                           config_dict={'pax': {
                 'output': ['Plotting.PlotEventSummary'],
                'input_name':   ('/Users/tunnell/XENON/data/xenon100/run_10/%s/%s' % (df_event['dataset_name'],
                                                                                       df_event['xed'])),
                'events_to_process': [df_event['event_number']],          
                'output_name': 'SCREEN'}})

    mypax.run()

In [None]:
inspect_event(df_40kev.iloc[0])