In [2]:
#| echo: false
import numpy as np
from scipy.interpolate import PchipInterpolator
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Span, Range1d
from bokeh.layouts import gridplot
from bokeh.io import output_notebook
import warnings
import pandas as pd
import altair as alt
from IPython.display import display
warnings.filterwarnings('ignore')
output_notebook()

def plot_format(plot, xlabel, ylabel, location, size, titlesize, labelsize):
    # x axis format
    plot.xaxis.axis_label = xlabel
    plot.xaxis.axis_label_text_font_style = 'bold'
    plot.xaxis.major_label_text_font_style = "bold"
    plot.xaxis.axis_label_text_font_size = size
    plot.xaxis.major_label_text_font_size = size

    # y axis format
    plot.yaxis.axis_label = ylabel
    plot.yaxis.axis_label_text_font_style = 'bold'
    plot.yaxis.major_label_text_font_style = "bold"
    plot.yaxis.axis_label_text_font_size = size
    plot.yaxis.major_label_text_font_size = size

    # Legend format
    plot.legend.location = location
    plot.legend.click_policy = "hide"
    plot.legend.label_text_font_size = labelsize
    plot.legend.label_text_font_style = 'bold'
    plot.legend.border_line_width = 3
    plot.legend.border_line_color = "navy"
    plot.legend.border_line_alpha = 0.0
    plot.legend.background_fill_alpha = 0.0
    plot.legend.label_text_color = "#E3F4FF"


    # Title format
    plot.title.text_font_size = titlesize
    plot.title.text_font_style = "bold"

    # Dark theme
    plot.background_fill_color = "#282B30"
    plot.border_fill_color = "#282B30"
    plot.xgrid.grid_line_color = '#606773'
    # plot.xgrid.minor_grid_line_color = '#606773' 
    # plot.xgrid.minor_grid_line_alpha = 0.4
    # plot.xgrid.minor_grid_line_dash = [2, 2] 
    plot.xaxis.minor_tick_line_color = '#606773'
    plot.yaxis.minor_tick_line_color = '#606773'
    plot.ygrid.grid_line_color = '#606773'
    plot.yaxis.major_label_text_color = "#E3F4FF"
    plot.xaxis.major_label_text_color = "#E3F4FF"
    plot.yaxis.axis_label_text_color = "#E3F4FF"
    plot.xaxis.axis_label_text_color = "#E3F4FF"
    plot.title.text_color = "#A6DDFF"
    return plot

In [3]:
new_colors = []
for i in range(42):
        new_colors.append('#9D6C97')
        new_colors.append('#9DC3E6')
        new_colors.append('#9DD9C5')

# 1. Read the Excel file into a DataFrame
df = pd.read_excel('data/base_function.xlsx', sheet_name=['base', 'M'])

# 2. Split the DataFrame into two separate DataFrames
base_df = df['base']
M_df = df['M'].sort_values(by='M')
sorted_df = pd.DataFrame(columns=['mu','xaxis', 'yaxis', 'colors'])

# 3. Create x axis
xaxis = np.arange(-15.5, 16.5, 1)
plots = []

# 4. Iterate M dataframe
for i, (index, row) in enumerate(M_df.iterrows()):
    # Create dataframe
    new_axis = xaxis - row.M
    sorted_df = sorted_df.append(pd.DataFrame({'mu':[row.M]*32,'xaxis':new_axis, 'yaxis':base_df[index], 'colors':new_colors[0:32]}), ignore_index=True)
    
base_function_df = sorted_df.sort_values(by='xaxis').reset_index(drop=True)
smooth_df = pd.DataFrame(data={}, columns=['xaxis', 'yaxis', 'colors'])
xoutindx=0

for aveindex in range(1, len(base_function_df)):
    if (base_function_df.loc[aveindex, 'xaxis'] - base_function_df.loc[aveindex-1, 'xaxis']) < 0.01:
        smooth_df.loc[xoutindx, 'xaxis'] = (base_function_df.loc[aveindex, 'xaxis'] + base_function_df.loc[aveindex-1, 'xaxis'])/2
        smooth_df.loc[xoutindx, 'yaxis'] = (base_function_df.loc[aveindex, 'yaxis'] + base_function_df.loc[aveindex-1, 'yaxis'])/2
        smooth_df.loc[xoutindx, 'colors'] = base_function_df.loc[aveindex, 'colors']
    else:
        xoutindx += 1
        smooth_df.loc[xoutindx, 'xaxis'] = base_function_df.loc[aveindex, 'xaxis']
        smooth_df.loc[xoutindx, 'yaxis'] = base_function_df.loc[aveindex, 'yaxis']
        smooth_df.loc[xoutindx, 'colors'] = base_function_df.loc[aveindex, 'colors']

In [4]:
from bokeh.palettes import Set3
# 1. Import data
rough_df = pd.read_excel('data/rough_samples.xlsx')
source_rough = ColumnDataSource(rough_df)

# # 2. Create plot
rough_plots = []
color_palette = Set3[len(rough_df.columns[1:])+2]

# a. iterate over the columns and add a line for each one
for i, col in enumerate(rough_df.columns[1:2]):
    rough_plot = figure(title = str(col), x_axis_label='xaxis', y_axis_label='yaxis', width = 470, height = 420, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
    # Base function points
    
    rough_plot.circle(x=base_function_df.xaxis, y=base_function_df.yaxis, color = base_function_df.colors, legend_label = 'Base function non-smooth', size = 6)
    rough_plot.circle(x=smooth_df.xaxis, y=smooth_df.yaxis, color = smooth_df.colors, legend_label = 'Base function smooth', size = 6)
    # rough_plot.line(x=smooth_df.xaxis, y=smooth_df.yaxis, legend_label = 'Base function', line_width=4, color='#D17B8F')

    # Experimental data
    rough_plot.line('xaxis', col, source=source_rough, color = '#9DC3E6', legend_label = str(col), line_width=4)
    rough_plot.triangle('xaxis', col, source=source_rough, fill_color= color_palette[1], size=10, legend_label = f"{col} points")
    
    # Plot format
    rough_plot.y_range = Range1d(-5000, 50000)
    rough_plot = plot_format(rough_plot, "Degrees", "Intensity", "top_left", "10pt", "8pt", "8pt")
    rough_plots.append(rough_plot)

grid_rough = gridplot(children = rough_plots, ncols = 3, merge_tools=False, width = 650, height = 340)
show(grid_rough)

## Gaussian function: $A_{1}\exp\left(-\frac{(x-x_0)^2}{2\sigma^2}\right)$


In [5]:
#| column: screen
from bokeh.palettes import Set3
from scipy.optimize import minimize

# 1. Get base function points (330 points from -16.4 to 16.5)
x_base = smooth_df.xaxis.values
y_base = smooth_df.yaxis.values

# 2. get rough data 
rough_df = pd.read_excel('data/rough_samples.xlsx', sheet_name='Data')
source_rough = ColumnDataSource(rough_df)
x_rough = rough_df['xaxis'].values.round(3)

# 3. Get initial guesses
guess_df = pd.read_excel('data/rough_samples.xlsx', sheet_name='SuperGaussian')
guess_df = guess_df.set_index('Variables')

# 4. Create df that will save optmized parameters
# columns = ['ann1_opt', 'pt2_opt', 'pt2b_opt', 'pt2c_opt', 'pt2d_opt', 'pt2e_opt']
# methods = ['Nelder-Mead', 'Powell', 'CG', 'L-BFGS-B', 'TNC', 'COBYLA', 'SLSQP', 'trust-constr']
methods = ['Nelder-Mead', 'Powell', 'CG', 'L-BFGS-B', 'COBYLA', 'SLSQP', 'trust-constr']
index = ['x0', 'Abase', 'sigma', 'Agaussian', 'n', 'displacement', 'error']
optimized_df = pd.DataFrame(columns=methods, index=index)

# 5. Define gaussian function
gaussian = lambda x, x0, sigma, A1: A1 * np.exp(-((x - x0) / sigma) ** 2 / 2)
supergaussian = lambda x, x0, sigma, A1, n: A1 * np.exp(-abs(((x-x0)/sigma))**n)

# 6. Define cost function
pchip = PchipInterpolator(x_base, y_base)
def cost_function(params, y):
    x0, A0, sigma, A1, n, displacement = params
    # Get new x axis
    x_new = x_rough + x0 
    # interpolate base function with respect to x_new (32 points)
    y_base_modified = A0*pchip(x_new) 
    # calculate background on original axis and with x0
    y_background = supergaussian(x_new, x0+displacement, sigma, A1, n)
    # calculate modified function
    y_modified = y_base_modified + y_background
    # Compare directly with 32 points experimental data
    convergence.append(np.sqrt(np.mean((y - y_modified) ** 2)))
    return np.sqrt(np.mean((y - y_modified) ** 2))

# 7. Iterate over the experimental data
rough_plots = []
color_palette = Set3[len(rough_df.columns[1:])+2]
bounds = ((-1, 1), (-0.5, 1.5), (1.0, 4.0), (-10000.0, None), (1.0, 5.0), (-1.0, 1.0))
         #x0        #Abase       #sigma     #Agaussian         #n          #displacement
backgrounds = figure(title = f'Background functions all methods', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
differences = figure(title = f'Errors all methods', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
convergences = figure(title = f'Error convergences all methods', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])

for j, method in enumerate(methods):
    convergence = []
    for i, col in enumerate(rough_df.columns[6:7]):
        # 8. Get initial guesses
        rough_plot = figure(title = f"{col}: {method}", width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
        # backgrounds = figure(title = f"Background functions with experimental data - {method}", width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
        backgroundsbg = figure(title = f'Background functions - {method}', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
        # downsamplesg = figure(title = f'Gaussian downsampled points - {method}', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
        differenceg = figure(title = f'Gaussian Experimental vs Optimized differences - {method}', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
        
        
        guess = [guess_df.loc[var][col] for var in ['x0', 'Abase', 'sigma', 'Agaussian', 'n', 'displacement']]
        # x0 = params[0]

        # 8. Call minimization function
        y_rough = rough_df[col].copy().values
        cost_fn = lambda p:cost_function(p, y_rough)
        result = minimize(cost_fn, guess, method=method, bounds=bounds)
        # result = minimize(cost_fn, guess, method=method)
        # result = minimize(cost_fn, params, bounds=bounds)
        optimized_parameters = result.x
        colu = col + '_opt'
        x0_opt, A0_opt, sigma_opt, A1_opt, n_opt, displacement_opt = optimized_parameters
        optimized_df.loc['x0'][method] = x0_opt
        optimized_df.loc['Abase'][method] = A0_opt
        optimized_df.loc['sigma'][method] = sigma_opt
        optimized_df.loc['Agaussian'][method] = A1_opt
        optimized_df.loc['n'][method] = n_opt
        optimized_df.loc['displacement'][method] = displacement_opt

        result2 = minimize(cost_fn, optimized_parameters, method=method, bounds=bounds)
        optimized_parameters2 = result2.x
        colu = col + '_opt'
        x0_opt, A0_opt, sigma_opt, A1_opt, n_opt, displacement_opt = optimized_parameters2
        optimized_df.loc['x0'][method] = x0_opt
        optimized_df.loc['Abase'][method] = A0_opt
        optimized_df.loc['sigma'][method] = sigma_opt
        optimized_df.loc['Agaussian'][method] = A1_opt
        optimized_df.loc['n'][method] = n_opt
        optimized_df.loc['displacement'][method] = displacement_opt

        # 7. Calculate new optimized modified function
        x_new_opt = x_rough + x0_opt 
        # interpolate base function with respect to x_new (32 points)
        y_base_opt = A0_opt*pchip(x_new_opt) 
        # calculate background on original axis and with x0
        y_background_opt = supergaussian(x_new_opt, x0_opt+displacement_opt, sigma_opt, A1_opt, n_opt)
        # calculate optmized function
        y_optimized = y_base_opt + y_background_opt
        error = np.sqrt(np.mean((y_rough - y_optimized) ** 2))
        optimized_df.loc['error'][method] = error
        
        vline = Span(location=0.0, dimension = 'height', line_color='#FEEED9', line_width=1)
        rough_plot.add_layout(vline)

        # Plot optimize function lines
        rough_plot.line(x_rough, y_base_opt, legend_label = 'Base', line_width = 5, color='#F96F5D')
        rough_plot.line(x_rough, y_background_opt, legend_label = 'Bbackground', line_width = 5, color='#F9B5AC')
        rough_plot.line(x_rough, y_optimized, legend_label = 'Optimized function', line_width = 5, color='#987284')
        rough_plot.triangle(x_rough, y_optimized, legend_label = 'Optimized points', size = 8, color=color_palette[1])
        backgrounds.line(x_rough, y_background_opt, color = color_palette[j], line_width = 5 , legend_label = f"{method}")
        backgrounds.circle(x_rough, y_background_opt, fill_color = color_palette[j], size = 7 , legend_label = f"{method}")
        backgroundsbg.line(np.arange(0, len(convergence)), convergence, color = color_palette[i], line_width = 5 , legend_label = f"Background {col}")
        # downsamplesg.line(x_rough, y_optimized, line_width=4, legend_label = f'Downsampling {col}', color = color_palette[i+1],  alpha = 0.9, line_dash='dashed')
        # downsamplesg.triangle(x_rough, y_optimized, size = 13, legend_label = f'Downsampling {col}', color = color_palette[i+1])

        # Plot rough experimental data
        rough_plot.line('xaxis', col, source=source_rough, color = '#9DC3E6', legend_label = str(col), line_width=4, line_dash = 'dashed')
        rough_plot.circle('xaxis', col, source=source_rough, fill_color= color_palette[i], size=7, legend_label = f"{col} points")
        
        # Error convergence plot
        convergences.line(np.arange(0, len(convergence[0:])), convergence[0:], legend_label = method, color=color_palette[j], line_width=5)
        # Plot format
        rough_plot.y_range = Range1d(-5000, 50000)
        rough_plot.xaxis.ticker.desired_num_ticks = 10
        rough_plot.yaxis.ticker.desired_num_ticks = 10
        rough_plot = plot_format(rough_plot, "Degrees", "Intensity", "top_left", "10pt", "8pt", "9pt")
        rough_plots.append(rough_plot)

        # Difference plot
        diff = y_rough - y_optimized
        differenceg.line(x=x_rough, y=diff, legend_label = col, color = color_palette[i], line_width=4)
        differenceg.circle(x=x_rough, y=diff, legend_label = col, fill_color= color_palette[i], size=7)
        differences.line(x=x_rough, y=diff, legend_label = method, color = color_palette[j], line_width=4)
        differences.circle(x=x_rough, y=diff, legend_label = method, fill_color = color_palette[j], size=6)
    # plots = [backgrounds, backgroundsbg, downsamplesg, differenceg]
    # plots = [backgroundsbg, downsamplesg, differenceg]
    plots = [backgroundsbg, differenceg]
    for plot in plots:
        plot = plot_format(plot, "Degrees", "Intensity", "top_left", "10pt", "10pt", "9pt")
        rough_plots.append(plot)
        plot.xaxis.ticker.desired_num_ticks = 10
        plot.yaxis.ticker.desired_num_ticks = 10
    differenceg.y_range = Range1d(-3000, 3000)
    backgrounds.add_layout(vline)
    backgroundsbg.add_layout(vline)

display(optimized_df)
backgrounds = plot_format(backgrounds, "Degrees", "Intensity", "top_left", "9pt", "9pt", "9pt")
differences = plot_format(differences, "Degrees", "Intensity", "top_left", "9pt", "9pt", "9pt")
convergences = plot_format(convergences, "Degrees", "Intensity", "top_left", "9pt", "9pt", "9pt")

convergences.y_range = Range1d(-2000, 50000)
differences.y_range = Range1d(-2000, 2000)
rough_plots.insert(0, backgrounds)
rough_plots.insert(1, differences)
rough_plots.insert(2, convergences)
grid_rough = gridplot(children = rough_plots, ncols = 3, merge_tools=False, width = 500, height = 340)
show(grid_rough)


# merged_df = guess_df.join(optimized_df)\
#     [['ann1', 'ann1_opt', 'pt2', 'pt2_opt', 'pt2b', 'pt2b_opt', 'pt2c', 'pt2c_opt', 'pt2d', 'pt2d_opt', 'pt2e', 'pt2e_opt']]
# display(merged_df)

Unnamed: 0,Nelder-Mead,Powell,CG,L-BFGS-B,COBYLA,SLSQP,trust-constr
x0,0.196639,0.196871,0.196874,0.196873,0.192717,0.196871,0.196639
Abase,0.525085,0.525375,0.525604,0.525604,0.524094,0.525604,0.525086
sigma,2.613408,2.614181,2.616063,2.616064,2.638738,2.616062,2.613412
Agaussian,6020.031643,6010.548585,5999.999982,6000.025935,6000.946525,6000.004595,6020.004796
n,1.953781,1.952948,1.953809,1.953807,2.006467,1.953797,1.953782
displacement,-0.012475,-0.011254,-0.010882,-0.010883,-0.028418,-0.010894,-0.012473
error,114.768265,114.769146,114.77002,114.770015,115.721999,114.770019,114.768265


### Removing points

In [115]:
#| column: screen
from bokeh.palettes import Set3
from scipy.optimize import minimize

# 1. Get base function points (330 points from -16.4 to 16.5)
x_base = smooth_df.xaxis.values
y_base = smooth_df.yaxis.values
limit = 50000
# 2. get rough data 
rough_df = pd.read_excel('data/rough_samples.xlsx', sheet_name='Data')
source_rough = ColumnDataSource(rough_df)
x_rough = rough_df['xaxis'].values.round(3)

# 3. Get initial guesses
guess_df = pd.read_excel('data/rough_samples.xlsx', sheet_name='SuperGaussian')
guess_df = guess_df.set_index('Variables')

# 4. Create df that will save optmized parameters
# columns = ['ann1_opt', 'pt2_opt', 'pt2b_opt', 'pt2c_opt', 'pt2d_opt', 'pt2e_opt']
# methods = ['Nelder-Mead', 'Powell', 'CG', 'L-BFGS-B', 'TNC', 'COBYLA', 'SLSQP', 'trust-constr']
methods = ['Nelder-Mead', 'Powell', 'CG', 'L-BFGS-B', 'COBYLA', 'SLSQP', 'trust-constr']
index = ['x0', 'Abase', 'sigma', 'Agaussian', 'n', 'displacement', 'error']
optimized_df = pd.DataFrame(columns=methods, index=index)

# 5. Define gaussian function
# gaussian = lambda x, x0, sigma, A1: A1 * np.exp(-((x - x0) / sigma) ** 2 / 2)
supergaussian = lambda x, x0, sigma, A1, n: A1 * np.exp(-abs(((x-x0)/sigma))**n)

# 6. Define cost function
pchip = PchipInterpolator(x_base, y_base)
def cost_function(params, y):
    x0, A0, sigma, A1, n, displacement = params
    mask = y < limit
    y = y[mask]
    # Get new x axis
    x_new = x_rough[mask] + x0
    # interpolate base function with respect to x_new (32 points)
    y_base_modified = A0*pchip(x_new) 
    # calculate background on original axis and with x0
    y_background = supergaussian(x_new, x0+displacement, sigma, A1, n)
    # calculate modified function
    y_modified = y_base_modified + y_background
    # Compare directly with 32 points experimental data
    convergence.append(np.sqrt(np.mean((y - y_modified) ** 2)))
    return np.sqrt(np.mean((y - y_modified) ** 2))

# 7. Iterate over the experimental data
rough_plots = []
color_palette = Set3[len(rough_df.columns[1:])+2]
bounds = ((-0.3, 0.3), (-0.5, 1.2), (1, 4), (0, None), (1, 4), (-1, 1))
         #x0        #Abase       #sigma   #Agaussian    #n       #displacament
backgrounds = figure(title = f'Background functions all methods', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
differences = figure(title = f'Errors all methods', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
convergences = figure(title = f'Error convergences all methods', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])

for j, method in enumerate(methods):
    convergence = []
    for i, col in enumerate(rough_df.columns[6:7]):
        # 8. Get initial guesses
        rough_plot = figure(title = f"{col}: {method}", width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
        backgroundsbg = figure(title = f'Background functions - {method}', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
        differenceg = figure(title = f'Gaussian Experimental vs Optimized differences - {method}', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
        
        guess = [guess_df.loc[var][col] for var in ['x0', 'Abase', 'sigma', 'Agaussian', 'n', 'displacement']]
        
        # 8. Call minimization function
        y_rough = rough_df[col].copy().values
        cost_fn = lambda p:cost_function(p, y_rough)
        # result = minimize(cost_fn, guess, method=method, bounds=bounds)
        result = minimize(cost_fn, guess, method=method)
        optimized_parameters = result.x
        colu = col + '_opt'
        # x0_opt, A0_opt, sigma_opt, A1_opt, displacement_opt = optimized_parameters
        # optimized_df.loc['x0'][method] = x0_opt
        # optimized_df.loc['Abase'][method] = A0_opt
        # optimized_df.loc['sigma'][method] = sigma_opt
        # optimized_df.loc['Agaussian'][method] = A1_opt
        # optimized_df.loc['displacement'][colu] = displacement_opt

        result2 = minimize(cost_fn, optimized_parameters, method=method, bounds=bounds)
        optimized_parameters2 = result2.x
        colu = col + '_opt'
        x0_opt, A0_opt, sigma_opt, A1_opt, n_opt, displacement_opt = optimized_parameters2
        optimized_df.loc['x0'][method] = x0_opt
        optimized_df.loc['Abase'][method] = A0_opt
        optimized_df.loc['sigma'][method] = sigma_opt
        optimized_df.loc['Agaussian'][method] = A1_opt
        optimized_df.loc['n'][method] = n_opt
        optimized_df.loc['displacement'][method] = displacement_opt

        # 7. Calculate new optimized modified function
        mask = y_rough < limit
        y_rough = y_rough[mask]
       
        # Get new x axis
        x_new_opt = x_rough[mask] + x0_opt
        # x_new_opt = x_rough + x0_opt 
        # interpolate base function with respect to x_new (32 points)
        y_base_opt = A0_opt*pchip(x_new_opt) 
        # calculate background on original axis and with x0
        y_background_opt = supergaussian(x_new_opt, x0_opt+displacement_opt, sigma_opt, A1_opt, n_opt)
        # calculate optmized function
        y_optimized = y_base_opt + y_background_opt
        error = np.sqrt(np.mean((y_rough - y_optimized) ** 2))
        optimized_df.loc['error'][method] = error
        
        vline = Span(location=0.0, dimension = 'height', line_color='#FEEED9', line_width=1)
        rough_plot.add_layout(vline)

        # Plot optimize function lines
        x_rough_n = x_rough[mask]
        rough_plot.line(x_rough_n, y_base_opt, legend_label = 'Base', line_width = 5, color='#F96F5D')
        rough_plot.line(x_rough_n, y_background_opt, legend_label = 'Bbackground', line_width = 5, color='#F9B5AC')
        rough_plot.line(x_rough_n, y_optimized, legend_label = 'Optimized function', line_width = 5, color='#987284')
        rough_plot.triangle(x_rough_n, y_optimized, legend_label = 'Optimized points', size = 8, color=color_palette[1])
        backgrounds.line(x_rough_n, y_background_opt, color = color_palette[j], line_width = 5 , legend_label = f"{method}")
        backgrounds.circle(x_rough_n, y_background_opt, fill_color = color_palette[j], size = 7 , legend_label = f"{method}")
        backgroundsbg.line(np.arange(0, len(convergence)), convergence, color = color_palette[i], line_width = 5 , legend_label = f"Background {col}")
        # downsamplesg.line(x_rough_n, y_optimized, line_width=4, legend_label = f'Downsampling {col}', color = color_palette[i+1],  alpha = 0.9, line_dash='dashed')
        # downsamplesg.triangle(x_rough, y_optimized, size = 13, legend_label = f'Downsampling {col}', color = color_palette[i+1])

        # Plot rough experimental data
        rough_plot.line('xaxis', col, source=source_rough, color = '#9DC3E6', legend_label = str(col), line_width=4, line_dash = 'dashed')
        rough_plot.circle('xaxis', col, source=source_rough, fill_color= color_palette[i], size=7, legend_label = f"{col} points")
        
        # Error convergence plot
        convergences.line(np.arange(0, len(convergence[0:])), convergence[0:], legend_label = method, color=color_palette[j], line_width=5)
        # Plot format
        rough_plot.y_range = Range1d(-5000, 50000)
        rough_plot.xaxis.ticker.desired_num_ticks = 10
        rough_plot.yaxis.ticker.desired_num_ticks = 10
        rough_plot = plot_format(rough_plot, "Degrees", "Intensity", "top_left", "10pt", "8pt", "9pt")
        rough_plots.append(rough_plot)

        # Difference plot
        diff = y_rough - y_optimized
        differenceg.line(x=x_rough_n, y=diff, legend_label = col, color = color_palette[i], line_width=4)
        differenceg.circle(x=x_rough_n, y=diff, legend_label = col, fill_color= color_palette[i], size=7)
        differences.line(x=x_rough_n, y=diff, legend_label = method, color = color_palette[j], line_width=4)
        differences.circle(x=x_rough_n, y=diff, legend_label = method, fill_color = color_palette[j], size=6)
    # plots = [backgrounds, backgroundsbg, downsamplesg, differenceg]
    # plots = [backgroundsbg, downsamplesg, differenceg]
    plots = [backgroundsbg, differenceg]
    for plot in plots:
        plot = plot_format(plot, "Degrees", "Intensity", "top_left", "10pt", "10pt", "9pt")
        rough_plots.append(plot)
        plot.xaxis.ticker.desired_num_ticks = 10
        plot.yaxis.ticker.desired_num_ticks = 10
    differenceg.y_range = Range1d(-3000, 3000)
    # backgrounds.y_range = Range1d(-2000, 10000)
    backgrounds.add_layout(vline)
    backgroundsbg.add_layout(vline)

display(optimized_df)
backgrounds = plot_format(backgrounds, "Degrees", "Intensity", "top_left", "9pt", "9pt", "9pt")
differences = plot_format(differences, "Degrees", "Intensity", "top_left", "9pt", "9pt", "9pt")
convergences = plot_format(convergences, "Degrees", "Intensity", "top_left", "9pt", "9pt", "9pt")

# convergences.y_range = Range1d(-2000, 50000)
differences.y_range = Range1d(-2000, 2000)
rough_plots.insert(0, backgrounds)
rough_plots.insert(1, differences)
rough_plots.insert(2, convergences)
grid_rough = gridplot(children = rough_plots, ncols = 3, merge_tools=False, width = 500, height = 340)
show(grid_rough)


# merged_df = guess_df.join(optimized_df)\
#     [['ann1', 'ann1_opt', 'pt2', 'pt2_opt', 'pt2b', 'pt2b_opt', 'pt2c', 'pt2c_opt', 'pt2d', 'pt2d_opt', 'pt2e', 'pt2e_opt']]
# display(merged_df)

Unnamed: 0,Nelder-Mead,Powell,CG,L-BFGS-B,COBYLA,SLSQP,trust-constr
x0,0.196639,0.196726,0.196874,0.196874,0.192717,0.196874,0.196873
Abase,0.525085,0.525214,0.525604,0.525604,0.524094,0.525606,0.525604
sigma,2.613408,2.613774,2.616063,2.616063,2.638738,2.616074,2.616063
Agaussian,6020.031643,6015.722894,5999.999982,5999.999975,6000.946525,5999.925043,6000.005679
n,1.953781,1.953413,1.953809,1.953807,2.006467,1.953813,1.953808
displacement,-0.012475,-0.01199,-0.010882,-0.010883,-0.028418,-0.010879,-0.010884
error,114.768265,114.768422,114.77002,114.77002,115.721999,114.770033,114.770019


### Removing points + normalized 

In [105]:
#| column: screen
from bokeh.palettes import Set3
from scipy.optimize import minimize

# 1. Get base function points (330 points from -16.4 to 16.5)
x_base = smooth_df.xaxis.values
y_base = smooth_df.yaxis.values
y_base = y_base/np.max(y_base)
limit = 50000
# 2. get rough data 
rough_df = pd.read_excel('data/rough_samples.xlsx', sheet_name='Data')
source_rough = ColumnDataSource(rough_df)
x_rough = rough_df['xaxis'].values.round(3)

# 3. Get initial guesses
guess_df = pd.read_excel('data/rough_samples.xlsx', sheet_name='SuperGaussian')
guess_df = guess_df.set_index('Variables')

# 4. Create df that will save optmized parameters
# columns = ['ann1_opt', 'pt2_opt', 'pt2b_opt', 'pt2c_opt', 'pt2d_opt', 'pt2e_opt']
# methods = ['Nelder-Mead', 'Powell', 'CG', 'L-BFGS-B', 'TNC', 'COBYLA', 'SLSQP', 'trust-constr']
methods = ['Nelder-Mead', 'Powell', 'CG', 'L-BFGS-B', 'COBYLA', 'SLSQP', 'trust-constr']
index = ['x0', 'Abase', 'sigma', 'Agaussian', 'n', 'displacement', 'error']
optimized_df = pd.DataFrame(columns=methods, index=index)

# 5. Define gaussian function
# gaussian = lambda x, x0, sigma, A1: A1 * np.exp(-((x - x0) / sigma) ** 2 / 2)
supergaussian = lambda x, x0, sigma, A1, n: A1 * np.exp(-abs(((x-x0)/sigma))**n)

# 6. Define cost function
pchip = PchipInterpolator(x_base, y_base)
def cost_function(params, y):
    x0, A0, sigma, A1, n, displacement = params
    mask = y < limit
    y = y[mask]
    # Get new x axis
    x_new = x_rough[mask] + x0
    # interpolate base function with respect to x_new (32 points)
    y_base_modified = A0*pchip(x_new) 
    # calculate background on original axis and with x0
    y_background = supergaussian(x_new, x0+displacement, sigma, A1, n)
    # calculate modified function
    y_modified = y_base_modified + y_background
    # Compare directly with 32 points experimental data
    convergence.append(np.sqrt(np.mean((y - y_modified) ** 2)))
    return np.sqrt(np.mean((y - y_modified) ** 2))

# 7. Iterate over the experimental data
rough_plots = []
color_palette = Set3[len(rough_df.columns[1:])+2]
bounds = ((-0.3, 0.3), (-0.5, 1.2), (1, 4), (0, None), (1, 4), (-1, 1))
         #x0        #Abase       #sigma   #Agaussian    #n       #displacament
backgrounds = figure(title = f'Background functions all methods', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
differences = figure(title = f'Errors all methods', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
convergences = figure(title = f'Error convergences all methods', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])

for j, method in enumerate(methods):
    convergence = []
    for i, col in enumerate(rough_df.columns[6:7]):
        # 8. Get initial guesses
        rough_plot = figure(title = f"{col}: {method}", width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
        backgroundsbg = figure(title = f'Background functions - {method}', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
        differenceg = figure(title = f'Gaussian Experimental vs Optimized differences - {method}', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
        
        guess = [guess_df.loc[var][col] for var in ['x0', 'Abase', 'sigma', 'Agaussian', 'n', 'displacement']]
        
        # 8. Call minimization function
        y_rough = rough_df[col].copy().values
        y_rough = y_rough/np.max(y_rough)
        cost_fn = lambda p:cost_function(p, y_rough)

        # result = minimize(cost_fn, guess, method=method, bounds=bounds)
        result = minimize(cost_fn, guess, method=method)
        optimized_parameters = result.x
        colu = col + '_opt'
        # x0_opt, A0_opt, sigma_opt, A1_opt, displacement_opt = optimized_parameters
        # optimized_df.loc['x0'][method] = x0_opt
        # optimized_df.loc['Abase'][method] = A0_opt
        # optimized_df.loc['sigma'][method] = sigma_opt
        # optimized_df.loc['Agaussian'][method] = A1_opt
        # optimized_df.loc['displacement'][colu] = displacement_opt

        result2 = minimize(cost_fn, optimized_parameters, method=method, bounds=bounds)
        optimized_parameters2 = result2.x
        colu = col + '_opt'
        x0_opt, A0_opt, sigma_opt, A1_opt, n_opt, displacement_opt = optimized_parameters2
        optimized_df.loc['x0'][method] = x0_opt
        optimized_df.loc['Abase'][method] = A0_opt
        optimized_df.loc['sigma'][method] = sigma_opt
        optimized_df.loc['Agaussian'][method] = A1_opt
        optimized_df.loc['n'][method] = n_opt
        optimized_df.loc['displacement'][method] = displacement_opt

        # 7. Calculate new optimized modified function
        # mask = y_rough < limit
        # y_rough = y_rough[mask]
        
        # Get new x axis
        x_new_opt = x_rough[mask] + x0_opt
        # x_new_opt = x_rough + x0_opt 
        # interpolate base function with respect to x_new (32 points)
        y_base_opt = A0_opt*pchip(x_new_opt) 
        # calculate background on original axis and with x0
        y_background_opt = supergaussian(x_new_opt, x0_opt+displacement_opt, sigma_opt, A1_opt, n_opt)
        # calculate optmized function
        y_optimized = y_base_opt + y_background_opt
        # print(np.max(y_optimized))
        # print(np.max(y_rough))
        error = np.sqrt(np.mean((y_rough - y_optimized) ** 2))
        optimized_df.loc['error'][method] = error
        
        vline = Span(location=0.0, dimension = 'height', line_color='#FEEED9', line_width=1)
        rough_plot.add_layout(vline)

        # Plot optimized function lines
        x_rough_n = x_rough[mask]
        rough_plot.line(x_rough_n, y_base_opt, legend_label = 'Base', line_width = 5, color='#F96F5D')
        rough_plot.line(x_rough_n, y_background_opt, legend_label = 'Bbackground', line_width = 5, color='#F9B5AC')
        rough_plot.line(x_rough_n, y_optimized, legend_label = 'Optimized function', line_width = 5, color='#987284')
        rough_plot.triangle(x_rough_n, y_optimized, legend_label = 'Optimized points', size = 8, color=color_palette[1])
        backgrounds.line(x_rough_n, y_background_opt, color = color_palette[j], line_width = 5 , legend_label = f"{method}")
        backgrounds.circle(x_rough_n, y_background_opt, fill_color = color_palette[j], size = 7 , legend_label = f"{method}")
        backgroundsbg.line(np.arange(0, len(convergence)), convergence, color = color_palette[i], line_width = 5 , legend_label = f"Background {col}")
        
        # Plot rough experimental data
        rough_plot.line(x_rough_n, y_rough, color = '#9DC3E6', legend_label = str(col), line_width=4, line_dash = 'dashed')
        rough_plot.circle(x_rough_n, y_rough, fill_color= color_palette[i], size=7, legend_label = f"{col} points")
        
        # Error convergence plot
        convergences.line(np.arange(0, len(convergence[0:])), convergence[0:], legend_label = method, color=color_palette[j], line_width=5)
        # Plot format
        rough_plot.y_range = Range1d(-0.2, 1.2)
        rough_plot.xaxis.ticker.desired_num_ticks = 10
        rough_plot.yaxis.ticker.desired_num_ticks = 10
        rough_plot = plot_format(rough_plot, "Degrees", "Intensity", "top_left", "10pt", "8pt", "9pt")
        rough_plots.append(rough_plot)

        # Difference plot
        diff = y_rough - y_optimized
        differenceg.line(x=x_rough_n, y=diff, legend_label = col, color = color_palette[i], line_width=4)
        differenceg.circle(x=x_rough_n, y=diff, legend_label = col, fill_color= color_palette[i], size=7)
        differences.line(x=x_rough_n, y=diff, legend_label = method, color = color_palette[j], line_width=4)
        differences.circle(x=x_rough_n, y=diff, legend_label = method, fill_color = color_palette[j], size=6)
    # plots = [backgrounds, backgroundsbg, downsamplesg, differenceg]
    # plots = [backgroundsbg, downsamplesg, differenceg]
    plots = [backgroundsbg, differenceg]
    for plot in plots:
        plot = plot_format(plot, "Degrees", "Intensity", "top_left", "10pt", "10pt", "9pt")
        rough_plots.append(plot)
        plot.xaxis.ticker.desired_num_ticks = 10
        plot.yaxis.ticker.desired_num_ticks = 10
    differenceg.y_range = Range1d(-0.2, 0.2)
    backgrounds.add_layout(vline)
    backgroundsbg.add_layout(vline)

display(optimized_df)
backgrounds = plot_format(backgrounds, "Degrees", "Intensity", "top_left", "9pt", "9pt", "9pt")
differences = plot_format(differences, "Degrees", "Intensity", "top_left", "9pt", "9pt", "9pt")
convergences = plot_format(convergences, "Degrees", "Intensity", "top_left", "9pt", "9pt", "9pt")

# convergences.y_range = Range1d(-2000, 50000)
differences.y_range = Range1d(-0.2, 0.2)
rough_plots.insert(0, backgrounds)
rough_plots.insert(1, differences)
rough_plots.insert(2, convergences)
grid_rough = gridplot(children = rough_plots, ncols = 3, merge_tools=False, width = 500, height = 340)
show(grid_rough)


# merged_df = guess_df.join(optimized_df)\
#     [['ann1', 'ann1_opt', 'pt2', 'pt2_opt', 'pt2b', 'pt2b_opt', 'pt2c', 'pt2c_opt', 'pt2d', 'pt2d_opt', 'pt2e', 'pt2e_opt']]
# display(merged_df)

Unnamed: 0,Nelder-Mead,Powell,CG,L-BFGS-B,COBYLA,SLSQP,trust-constr
x0,0.196638,0.299936,0.196638,0.196637,0.155398,0.196404,0.196638
Abase,0.766287,0.047836,0.766221,0.766297,0.631223,0.766037,0.766293
sigma,2.613401,2.093915,2.613021,2.613426,2.346042,2.612412,2.613407
Agaussian,0.242546,0.935389,0.242625,0.242534,0.374288,0.242851,0.242538
n,1.953794,3.352785,1.953594,1.953793,2.354901,1.952543,1.95378
displacement,-0.012481,-0.143952,-0.012526,-0.012483,-0.163304,-0.013348,-0.012478
error,0.004624,0.011191,0.004624,0.004624,0.006093,0.004624,0.004624
