# Reduce HERFD data
This notebook offers two functions (defined below)

 -  **``average(spec_filename, scan_numbers, export_base_filename = None)``**
 
 For a given SPEC file, a list of scan numbers can be averaged. The specified scans should be HERFD scans, i.e., the columns ```energy```, ```ketek``` and ```i0s``` are expected to be present. If ```export_base_filename``` is not None, three files will be exported: 
 
   -  ```export_base_filename``` + '.txt'
   
       The raw, averaged data.
   -  ```export_base_filename``` + '_normalized.txt'
   
      The raw, averaged data, normalized to the peak maximum. 
      
   -  ```export_base_filename``` + '_sgb_normalized.txt'
   
      The normalized data set, filtered with the scipy.signal.savgol_filter (x, window_length = 3, polyorder = 1)
      
 -  **``compare(filenames)``**
 
 Plot a list of exported data sets, which have been reduced with the ``average`` function.

In [9]:
%matplotlib widget
import os
import matplotlib.pyplot as plt
import numpy as np
from silx.io.specfile import SpecFile
from scipy.signal import savgol_filter

def average(
    spec_filename,
    scan_numbers,
    export_base_filename = None
):
    """
    Average (and export to file if desired) multiple scans
    """
    sf = SpecFile(spec_filename)
    
    plt.figure()
    
    # Determine scan shape
    energy = sf[scan_numbers[0] - 1].data_column_by_name('energy')
    data     = np.empty( (len(scan_numbers), len(energy)) )
    data[:]  = np.nan
    
    for idx, scan_no in enumerate(scan_numbers):
        
        scan_no = scan_no - 1

        x = sf[scan_no].data_column_by_name('energy')
        y = sf[scan_no].data_column_by_name('ketek')
        i0 = sf[scan_no].data_column_by_name('i0s')
        
        idx0, idx1 = np.argmin(np.abs(energy - x[0])), np.argmin(np.abs(energy - x[-1]))
        
        if not np.all(np.round(energy[idx0:idx1 + 1], 2) == np.round(x, 2)):
            print("Warning! You seem to average scans with different energy axes.")

        data[idx, idx0:idx1 + 1] = y
        
        
            
        plt.plot(x, y, lw = 0.5)
    
    data = np.nansum(data, axis = 0)
    error = np.sqrt(data)
    
    # Plot the averaged data and error
    plt.title(export_base_filename)
    plt.plot(energy, data, 'k.-', lw = 2, label = 'averaged')
    plt.fill_between(energy, data - error/2, data + error/2, alpha = 0.25, color = 'k', label = 'Poisson error $\sqrt{N}$')
    plt.xlabel('Energy')
    plt.ylabel('Intensity (counts)')
    
    data_norm = (data - np.nanmin(data)) / (np.nanmax(data) - np.nanmin(data))
    
    # Note that PyMca seems to treat the SavGol parameters differently. 
    # In PyMca we can set 'filter_width'
    data_savgol = savgol_filter(data, window_length = 3, polyorder = 1)
    
    # Plot the smoothed data
    plt.plot(energy, data_savgol, 'r', lw = 1, label = 'SG-filtered')
    plt.legend()

    
    # If a basename was given, export 
    # - the average, 
    # - average normalized and 
    # - avarge normalized smoothed data
    
    if not export_base_filename is None:
        
        filename = export_base_filename + '.txt'  
        filename_n1 = export_base_filename + '_normalized.txt'
        filename_sbg = export_base_filename + '_normalized_sgb.txt'
        
        
        print(f'Info: Saving result to files:')
        print(f'Average: {filename}:')
        print(f'Normalized: {filename_n1}:')
        print(f'Smoothed: {filename_sbg}:')
        
        np.savetxt(filename, np.vstack([energy, data]).T)
        np.savetxt(filename_n1, np.vstack([energy, data_norm]).T)
        np.savetxt(filename_sbg, np.vstack([energy, data_savgol]).T)
 
    
    
def compare(
    filenames,
):
    """
    Plot several scans from different samples for easier comparison
    """
        
    plt.figure()
    for filename in filenames:
        x, y = np.loadtxt(filename).T
        plt.plot(x, y, '.-', label = filename)
        
    plt.legend()

# Average datasets

In [10]:
average(
    spec_filename = '/data/bm20/inhouse/data_TOP/20221004_Vejar_U_Pu/Pu21_21', 
    scan_numbers = [4, 5, 6, 7], 
    export_base_filename = '/tmp/test'
)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

ValueError: window_length must be odd.

# Compare datasets

In [18]:
compare(
    ['/tmp/test_normalized.txt',
    '/tmp/test_normalized_sgb.txt']
)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …