In [11]:
# import all you need
import numpy as np
import plotly.graph_objects as go
from scipy.optimize import curve_fit

# functions imported from helper_files
from helper_files.plotting import plot_lines, fitting_plot
from helper_files.gaussian_fitting import gaussian, n_gaussians, fit_n_peaks_to_gaussian
from helper_files.read_data import read_xy_data, read_only_y_data
from helper_files.error_calculation import rms_error

In [2]:
# this will load the helper modules each time you make changes to them, without having to restart the kernel
%load_ext autoreload
%autoreload 2 # or 1?????

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Reading the data into arrays

##### See the helper file plotting.py

Here you need to:

- set the path to the file, or just use the example file
- open the data file and find out where the data starts and stops.
    - in the example data the data starts after "#SPECTRUM    : Spectral Data Starts Here",
    - and ends with "#ENDOFDATA   : "
- take note of the line endings,
    - in the example data the line endings are '\n'
- take note of what the data is separated by, 
    - in the example data it is separated by a comma and a space ", "

We use these variables to read the data with either:

- read_xy_data(...) if the file contains both x and y on each line
- read_only_y_data(...) if the file only contains the counts

In [3]:
# reading the file

# # Ex1 .emsa
# filepath = 'Ex1_EDS_GaAs_30kV.emsa'
# start_string = "#SPECTRUM    : Spectral Data Starts Here"
# stop_string = "#ENDOFDATA   : "
# line_endings = '\n'
# delimiter = ', '
# data = read_xy_data(filepath, start_string, stop_string, delimiter, line_endings)

# Ex2 .msa
filepath = 'Ex2_NiO_on_Mo_not_calibrated.msa'
start_string = "#SPECTRUM    : Spectral Data Starts Here"
stop_string = "#ENDOFDATA   : End Of Data and File"
line_endings = ', \n'
data = read_only_y_data(filepath, start_string, stop_string, line_endings)

# # Ex3 .mca
# filepath = 'Ex3_Cu.mca'
# start_string = "<<DATA>>"
# stop_string = "<<END>>"
# line_endings = '\n'
# data = read_only_y_data(filepath, start_string, stop_string, line_endings)

# normalize counts to 1 and make a array for the raw channels
y_raw = data[1]/data[1].max()
x_raw = np.arange(0, len(y_raw), 1)  # when using read_only_y_data() data[1] == x_raw

# We need x_raw because the fitting only works in the x values are integers.
# since the goal is to calibrate the channels, we fix this later to plot the data with a calibrated x-axis.


The first line looks like this: '#FORMAT      : EMSA/MAS Spectral Data File\n'
Reading from line 26 to 2075.
Read 2048 data points from Ex2_NiO_on_Mo_not_calibrated.msa
First entry: [0. 0.]
Last entry: [2047.   10.]


In [4]:
plot_lines(data[0], data[1], title='Plot of the raw data', xaxis_title='raw x values (uncalibrated keV for .emsa, channels for .mca and .msa)', yaxis_title='Counts')
plot_lines(x_raw, y_raw, title='Plot of the data we will work with', xaxis_title='Channels', yaxis_title='Normalized counts')

### Fittin the peaks to gaussians

##### see the helper file gaussian_fitting.py

Now we find two or more peaks in the data, and we want to fit a gaussian to each of them. 
We need to know what the theoretical value of at least two the guessed peaks, so that we can calibrate the spectrum later.

In the Ex1 we can use Ga_Ka=9.2517 keV and Ga_La=1.098 keV, or we can use As_Ka=10.5336 keV and As_La=1.2819 keV. Or we can use both

eg peak_guesses = [9.2517, 10.5336]

Short overview of the used functions in gaussian_fitting.py
- def gaussian(x, amp, mu, sigma):
    - the function gaussian defines a gaussian function.

- def n_gaussians(x, *args):
    - since the gaussians could potentially partially overlap, we need to define a function that returns the sum of n gaussians.
        - Eg. like Ga_Kb=10.2642 and As=10.5436 in Ex1

- def fit_n_peaks_to_gaussian(x, raw_y, guessed_peaks, guessed_std=1, guessed_amp=1,):
    - now we need a functions which fits peak guesses to gaussian curves.
    - we will use the scipy.optimize.curve_fit function for this. More info in the function.
    - with normalized counts it usually works nice with guessing all std and amplitues as 1

Additional info:
I tried fitting to the raw keV-valus of the .emsa file, but that did not work. 
I think it is because the x values are not integers, and for some reason that hindered the fitting.
I do suspect that it migh be because the std and amp guesses are way off, but I am not sure.

This is the code that did not work:
~~~
peak_guesses = [0.02, 0.27, 0.97, 1.1, 1.29, 1.75, 9.2517, 10.24, 10.5336, 11.75] 
fit_vals = fit_n_peaks_to_gaussian(data[0], data[1], peak_guesses)
~~~

**Be aware: sometimes the fitting sets a peak as the background. Always inspect the plot**

*TODO: implement RMS-errors between the data and the fit*


In [5]:
# Fitting the data to gaussians

#
#
# NB! if this cell crash with:
# "RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 5000."
# then adjust your peak guesses, or try to fit fewer peaks at once.
#  
#

# the peak guesses are the approximate values of the peaks in the plot above
guesses_peaks_Ga30 = [22, 46, 117, 130, 149, 195, 944, 1045, 1072, 1191]

guesses_peaks_Cu = [24, 47, 72, 95, 106, 767, 864]

guesses_NiO = [225, 248]

# running the gaussian fitting function, see helper_files/gaussian_fitting.py
fit_vals = fit_n_peaks_to_gaussian(x_raw, y_raw, guesses_peaks_Cu)
# fit_vals[0]

# Be aware: sometimes the fitting sets a peak as the background.

In [6]:
# advanced plot using the following function, see helper_files/plotting.py
#fitting_plot(x, y_raw, y_fit=None, vlines=None, fit_vals=None, fig=None, start=0, stop=2048, 
# title="Fitting plot", xaxis_title="Channel number [~10eV]", yaxis_title="Relative intensity [a.u.]", )

y_fit = n_gaussians(x_raw, *fit_vals[0])
peaks_fitted = fit_vals[0][1::3]  # every third value is the peak value

fig = fitting_plot(x_raw, y_raw, y_fit=y_fit, vlines=peaks_fitted)  # add fit_vals[0] to see the fit values
fig.show()

### Error calculation

##### See the helper file error_calculation.py

We want to know the error of the fitting. For now RMS-error is implemented.

- rms_error(y, y_fit)
    - calculates the RMS-error and returns the value
- Good values are ___?

In [12]:
rmse = rms_error(y_raw, y_fit)
print(f"Root mean square error: {rmse}")

Root mean square error: 0.005853568014364957


In [7]:
# # since we are clever, we know that we can plot the fitted data on top of the raw x values,
# # but the fitted peaks are not at the correct values.
# fig = fitting_plot(data[0], y_raw, y_fit=y_fit, xaxis_title='uncalibrated keV', title='Plotting the raw and fit, on the raw x values')
# fig.show()

In [8]:
# now we want to plot the results above on the raw x values,
# but we need to know the calibration factor.
# that is, we


# this works for the .emsa files, since the x values are in keV.
# the .msa and .mca are without x-values, but they do have a calibration factor.
# TODO: make a function that can read the calibration factor from the .msa and .mca files.

# linear mapping of the channels to keV
kev0 = data[0][0]
kev1 = data[0][-1]
channel0 = x_raw[0]
channel1 = x_raw[-1]

# the slope of the linear mapping
slope = (kev1-kev0)/(channel1-channel0)
print(f'linmap: {kev0} + {slope:.3e}*channel')

peaks_kev = [slope*(peak-channel0)+kev0 for peak in peaks_fitted]


linmap: 0.0 + 1.000e+00*channel


In [9]:
fig = fitting_plot(data[0], y_raw, y_fit=y_fit, vlines=peaks_kev, xaxis_title='uncalibrated keV', title='Plotting the raw and fit, on the raw x values')
fig.show()

# to save the figure, uncomment the following line
# fig.write_image('Example_data_plot_uncalibrated.html')

In [10]:
# issue now: we have negative vaules in the .emsa file, and i dunno what to do.

# assuming that Ga_Ka=9.2517, we need some ...