In [None]:
#|default_exp peak_pattern_puzzle_solver 

# Solving the peak pattern puzzle 

> Which elements are hidden in our data cube?

In [None]:
#|hide 
%load_ext autoreload
%autoreload 2 

In [None]:
#|hide 
import os 

In [None]:
#|hide 
os.chdir('/home/frank/Work/Projecten/DoRe/viz/raw_nmf')

In [None]:
#|hide 
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib notebook

In the previous section we saw how to explain the presence of specific in the individual hotmax spectra by  comparison with the instrument and element peak patterns by hand. Tedious work, but not too difficult. Let's now try to extract the algorithm for solving the peak pattern puzzle. As an example let's walk through hotmax spectrum **#4**. In the plot we see thirteen peaks that exceed the noise threshold. We need to explain these peaks away, one by one... 

In [None]:
from maxrf4u import plot_puzzle, HotmaxAtlas, DataStack 

In [None]:
fig, ax, ax1 = plot_puzzle('RP-T-1898-A-3689.datastack', 4, color_select=[])
ax.set_title('Peak pattern atlas');

Initially, in hotmax spectrum **#4** there are thirteen detected peaks that we need to explain. These peaks are numbered from highest to lowest. Let's see how we can explain them away one by one. To read the thirteen peak indexes, we can use the `DataStack.read_list()` method. Note that in this case we can not use the standard `DataStack.read()` method because the data in the datastack is stored as a ragged list.  

Here are the 13 sub peak indexes of hotmax spectrum **#4**.

In [None]:
ds = DataStack('RP-T-1898-A-3689.datastack')

In [None]:
subpeak_idxs = ds.read_list('hotmax_subpeak_idxs_list')[4]
subpeak_idxs

[95, 1981, 466, 735, 427, 329, 2108, 800, 2206, 1360, 152, 1522, 933]

Let's convert these channel indexes into energies first. 

In [None]:
x_keVs = ds.read('maxrf_energies')
peak_nums = np.arange(len(subpeak_idxs))
peak_keVs = x_keVs[subpeak_idxs]

In [None]:
why_df = pd.DataFrame(data={'keVs': peak_keVs})
why_df.index.name = 'n'
why_df['why'] = '?'
why_df

Unnamed: 0_level_0,keVs,why
n,Unnamed: 1_level_1,Unnamed: 2_level_1
0,-0.028648,?
1,18.89564,?
2,3.693998,?
3,6.393168,?
4,3.302669,?
5,2.319328,?
6,20.169969,?
7,7.045383,?
8,21.153309,?
9,12.664472,?


array([-0.02864804, 18.89563968,  3.69399816,  6.39316751,  3.30266878,
        2.31932827, 20.16996871,  7.04538316, 21.15330921, 12.66447177,
        0.5432949 , 14.28999383,  8.3799167 ])

In [None]:
explanation = [{'n': n, 'keV': keV, 'src': src} for n, keV, src in zip(peak_nums, peak_keVs, peak_srcs)]
explanation

[{'n': 0, 'keV': -0.028648044952708673, 'src': '????'},
 {'n': 1, 'keV': 18.895639680305266, 'src': '????'},
 {'n': 2, 'keV': 3.693998161871633, 'src': '????'},
 {'n': 3, 'keV': 6.393167513989552, 'src': '????'},
 {'n': 4, 'keV': 3.3026687762485896, 'src': '????'},
 {'n': 5, 'keV': 2.3193282687855556, 'src': '????'},
 {'n': 6, 'keV': 20.16996870528287, 'src': '????'},
 {'n': 7, 'keV': 7.0453831566946254, 'src': '????'},
 {'n': 8, 'keV': 21.153309212745903, 'src': '????'},
 {'n': 9, 'keV': 12.664471770769104, 'src': '????'},
 {'n': 10, 'keV': 0.5432949032655865, 'src': '????'},
 {'n': 11, 'keV': 14.289993834126363, 'src': '????'},
 {'n': 12, 'keV': 8.379916702537315, 'src': '????'}]

Now, we need to consult the peak pattern atlas with all element starting with the instrument peak pattern. 

In [None]:
from maxrf4u import get_patterns, get_instrument_pattern
import numpy as np

In [None]:
instr_ptrn = get_instrument_pattern('RP-T-1898-A-3689.datastack')
instr_ptrn

{'name': 'INSTRUMENT',
 'instrument_peaks': array([-0.02864804, 18.82674463, 20.20010005, 20.99840233, 22.72136068])}

We now need to check which peaks in the hotmax spectrum match the instrument peaks. 

In [None]:
instr_keVs = instr_ptrn['instrument_peaks']
instr_keVs

array([-0.02864804, 18.82674463, 20.20010005, 20.99840233, 22.72136068])

Let's see which peaks match within a distance of 0.1 keV. 

In [None]:
def match_instrument(why_df, datastack_file, delta_keV=0.1): 
    
    # first generate instrument pattern
    instr_ptrn = get_instrument_pattern(datastack_file) 
    instr_keVs = instr_ptrn['instrument_peaks'] 
    
    # calculate distances and filter < delta_keV
    distance_matrix = np.sqrt((subpeak_keVs[:, None] - instr_keVs[None, :])**2)
    is_nearby = distance_matrix < delta_keV
    
    # matching peak_nums 
    peak_matches = np.argwhere(is_nearby)[:, 0] 
    
    # insert cause 
    why_df.at[peak_matches, 'why'] = 'INSTR'
    
    return why_df

In [None]:
why_df = match_instrument(why_df, 'RP-T-1898-A-3689.datastack')
why_df

Unnamed: 0_level_0,keVs,why
n,Unnamed: 1_level_1,Unnamed: 2_level_1
0,-0.028648,INSTR
1,18.89564,INSTR
2,3.693998,?
3,6.393168,?
4,3.302669,?
5,2.319328,?
6,20.169969,INSTR
7,7.045383,?
8,21.153309,?
9,12.664472,?


Next phase is a comparison with the element patterns. In order to match a certain element, at least the alpha peak needs to be present... 

[{'elem': 'N',
  'name': 'Nitrogen',
  'peaks_xy': array([[0.3902, 1.    ]]),
  'alpha_escape_keV': -1.3498049024512255,
  'color': array([0.6196, 0.8549, 0.898 ])},
 {'elem': 'O',
  'name': 'Oxygen',
  'peaks_xy': array([[0.5253, 1.    ]]),
  'alpha_escape_keV': -1.2147373686843421,
  'color': array([0.0902, 0.7451, 0.8118])},
 {'elem': 'F',
  'name': 'Fluorine',
  'peaks_xy': array([[0.6753, 1.    ]]),
  'alpha_escape_keV': -1.0646623311655827,
  'color': array([0.8588, 0.8588, 0.5529])},
 {'elem': 'Ne',
  'name': 'Neon',
  'peaks_xy': array([[0.8554, 1.    ]]),
  'alpha_escape_keV': -0.8845722861430715,
  'color': array([0.7373, 0.7412, 0.1333])},
 {'elem': 'Na',
  'name': 'Sodium',
  'peaks_xy': array([[1.0355, 1.    ]]),
  'alpha_escape_keV': -0.7044822411205602,
  'color': array([0.7804, 0.7804, 0.7804])},
 {'elem': 'Mg',
  'name': 'Magnesium',
  'peaks_xy': array([[1.2606, 1.    ]]),
  'alpha_escape_keV': -0.47936968484242115,
  'color': array([0.498, 0.498, 0.498])},
 {'elem': 

In [None]:
def extract_alpha_keVs(elem_ptrns=None): 
    
    if elem_ptrns is None: 
        elem_ptrns = get_patterns()
    
    alpha_keVs = []
    elements = []
    alpha_list = []
    
    for i, p in enumerate(elem_ptrns):     
        a_keV = p['peaks_xy'][0, 0] 
        alpha_keVs.append(a_keV)
        
        elem = p['elem']
        elements.append(elem)
        
        alpha_list.append([elem, a_keV])
         
    return alpha_list
        

In [None]:
alpha_list = extract_alpha_keVs()

In [None]:
[a[1] for a in alpha_list]

[0.3901950975487744,
 0.5252626313156579,
 0.6753376688344173,
 0.8554277138569285,
 1.0355177588794398,
 1.2606303151575788,
 1.485742871435718,
 1.7408704352176088,
 2.0110055027513756,
 2.3111555777888944,
 2.626313156578289,
 3.3166583291645826,
 3.691845922961481,
 3.9319659829914957,
 4.457228614307153,
 4.502251125562782,
 4.9524762381190595,
 5.417708854427214,
 5.897948974487244,
 6.393196598299149,
 6.9184592296148075,
 7.473736868434218,
 8.044022011005502,
 8.629314657328665,
 9.9799899949975,
 10.53526763381691,
 10.550275137568784,
 11.915957978989494,
 14.15207603801901,
 20.200100050025014,
 22.151075537768886,
 23.171585792896447,
 25.27263631815908]

In [None]:
d.items()

dict_items([('N', 0.3901950975487744)])

In [None]:
alpha['elem']

array(['N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'K',
       'Ca', 'I', 'Ba', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu',
       'Zn', 'Hg', 'As', 'Pb', 'Br', 'Sr', 'Rh', 'Ag', 'Cd', 'Sn'],
      dtype='<U8')

Let's color the markers on these matched instrument peaks red...

In [None]:
y_hot = ds.read('hotmax_spectra')[4] 

match_idxs = peak_matches[:, 0]
match_x = subpeak_keVs[match_idxs]
match_y = y_hot[np.array(subpeak_idxs)[match_idxs]]

In [None]:
subpeak_idxs[]

[95, 1981, 466, 735, 427, 329, 2108, 800, 2206, 1360, 152, 1522, 933]

In [None]:
from maxrf4u import HotmaxAtlas

In [None]:
hma = HotmaxAtlas('RP-T-1898-A-3689.datastack')

In [None]:
ax, ann_list = hma.plot_spectrum(4)

ax.scatter(match_x, match_y, facecolor='r', edgecolor='w', marker='X', s=100)
ax.set_title('Matching instrument peaks');

And note down for which peaks we now have an explanation:

In [None]:
np.set_printoptions(precision=4)

[-0.028648044952708673,
 18.895639680305266,
 3.693998161871633,
 6.393167513989552,
 3.3026687762485896,
 2.3193282687855556,
 20.16996870528287,
 7.0453831566946254,
 21.153309212745903,
 12.664471770769104,
 0.5432949032655865,
 14.289993834126363,
 8.379916702537315]

In [None]:
for n in [0, 1, 6]: 
    explanation[n]['src'] = 'INSTR'

In [None]:
explanation

[{'n': 0, 'keV': -0.028648044952708673, 'src': 'INSTR'},
 {'n': 1, 'keV': 18.895639680305266, 'src': 'INSTR'},
 {'n': 2, 'keV': 3.693998161871633, 'src': '????'},
 {'n': 3, 'keV': 6.393167513989552, 'src': '????'},
 {'n': 4, 'keV': 3.3026687762485896, 'src': '????'},
 {'n': 5, 'keV': 2.3193282687855556, 'src': '????'},
 {'n': 6, 'keV': 20.16996870528287, 'src': 'INSTR'},
 {'n': 7, 'keV': 7.0453831566946254, 'src': '????'},
 {'n': 8, 'keV': 21.153309212745903, 'src': '????'},
 {'n': 9, 'keV': 12.664471770769104, 'src': '????'},
 {'n': 10, 'keV': 0.5432949032655865, 'src': '????'},
 {'n': 11, 'keV': 14.289993834126363, 'src': '????'},
 {'n': 12, 'keV': 8.379916702537315, 'src': '????'}]

Now that we matched the instrumental peaks, we need to explain the other remaining peaks following their sorting order (from large to small). This means that next we need to explain peak **(2)**. For this, we need to consult the peak pattern atlas...  

In [None]:
np.argwhere([p['src'] == '????' for p in explanation]).flatten()

array([ 2,  3,  4,  5,  7,  8,  9, 10, 11, 12])