In [1]:
#| echo: false
import numpy as np
from scipy.interpolate import PchipInterpolator
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Span, Range1d
from bokeh.layouts import gridplot
from bokeh.io import output_notebook
import warnings
import pandas as pd
import altair as alt
from IPython.display import display
warnings.filterwarnings('ignore')
output_notebook()

def plot_format(plot, xlabel, ylabel, location, size, titlesize, labelsize):
    # x axis format
    plot.xaxis.axis_label = xlabel
    plot.xaxis.axis_label_text_font_style = 'bold'
    plot.xaxis.major_label_text_font_style = "bold"
    plot.xaxis.axis_label_text_font_size = size
    plot.xaxis.major_label_text_font_size = size

    # y axis format
    plot.yaxis.axis_label = ylabel
    plot.yaxis.axis_label_text_font_style = 'bold'
    plot.yaxis.major_label_text_font_style = "bold"
    plot.yaxis.axis_label_text_font_size = size
    plot.yaxis.major_label_text_font_size = size

    # Legend format
    plot.legend.location = location
    plot.legend.click_policy = "hide"
    plot.legend.label_text_font_size = labelsize
    plot.legend.label_text_font_style = 'bold'
    plot.legend.border_line_width = 3
    plot.legend.border_line_color = "navy"
    plot.legend.border_line_alpha = 0.0
    plot.legend.background_fill_alpha = 0.0
    plot.legend.label_text_color = "#E3F4FF"


    # Title format
    plot.title.text_font_size = titlesize
    plot.title.text_font_style = "bold"

    # Dark theme
    plot.background_fill_color = "#282B30"
    plot.border_fill_color = "#282B30"
    plot.xgrid.grid_line_color = '#606773'
    # plot.xgrid.minor_grid_line_color = '#606773' 
    # plot.xgrid.minor_grid_line_alpha = 0.4
    # plot.xgrid.minor_grid_line_dash = [2, 2] 
    plot.xaxis.minor_tick_line_color = '#606773'
    plot.yaxis.minor_tick_line_color = '#606773'
    plot.ygrid.grid_line_color = '#606773'
    plot.yaxis.major_label_text_color = "#E3F4FF"
    plot.xaxis.major_label_text_color = "#E3F4FF"
    plot.yaxis.axis_label_text_color = "#E3F4FF"
    plot.xaxis.axis_label_text_color = "#E3F4FF"
    plot.title.text_color = "#A6DDFF"
    return plot

new_colors = []
for i in range(42):
        new_colors.append('#9D6C97')
        new_colors.append('#9DC3E6')
        new_colors.append('#9DD9C5')

# 1. Read the Excel file into a DataFrame
df = pd.read_excel('data/base_function.xlsx', sheet_name=['base', 'M'])

# 2. Split the DataFrame into two separate DataFrames
base_df = df['base']
M_df = df['M'].sort_values(by='M')
sorted_df = pd.DataFrame(columns=['mu','xaxis', 'yaxis', 'colors'])

# 3. Create x axis
xaxis = np.arange(-15.5, 16.5, 1)
plots = []

# 4. Iterate M dataframe
for i, (index, row) in enumerate(M_df.iterrows()):
    # Create dataframe
    new_axis = xaxis - row.M
    sorted_df = sorted_df.append(pd.DataFrame({'mu':[row.M]*32,'xaxis':new_axis, 'yaxis':base_df[index], 'colors':new_colors[0:32]}), ignore_index=True)
    
base_function_df = sorted_df.sort_values(by='xaxis').reset_index(drop=True)
smooth_df = pd.DataFrame(data={}, columns=['xaxis', 'yaxis', 'colors'])
xoutindx=0

for aveindex in range(1, len(base_function_df)):
    if (base_function_df.loc[aveindex, 'xaxis'] - base_function_df.loc[aveindex-1, 'xaxis']) < 0.01:
        smooth_df.loc[xoutindx, 'xaxis'] = (base_function_df.loc[aveindex, 'xaxis'] + base_function_df.loc[aveindex-1, 'xaxis'])/2
        smooth_df.loc[xoutindx, 'yaxis'] = (base_function_df.loc[aveindex, 'yaxis'] + base_function_df.loc[aveindex-1, 'yaxis'])/2
        smooth_df.loc[xoutindx, 'colors'] = base_function_df.loc[aveindex, 'colors']
    else:
        xoutindx += 1
        smooth_df.loc[xoutindx, 'xaxis'] = base_function_df.loc[aveindex, 'xaxis']
        smooth_df.loc[xoutindx, 'yaxis'] = base_function_df.loc[aveindex, 'yaxis']
        smooth_df.loc[xoutindx, 'colors'] = base_function_df.loc[aveindex, 'colors']

from bokeh.palettes import Set3
# 1. Import data
rough_df = pd.read_excel('data/rough_samples.xlsx')
source_rough = ColumnDataSource(rough_df)

# # 2. Create plot
rough_plots = []
color_palette = Set3[len(rough_df.columns[1:])+2]

# a. iterate over the columns and add a line for each one
for i, col in enumerate(rough_df.columns[1:2]):
    rough_plot = figure(title = str(col), x_axis_label='xaxis', y_axis_label='yaxis', width = 470, height = 420, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
    # Base function points
    
    rough_plot.circle(x=base_function_df.xaxis, y=base_function_df.yaxis, color = base_function_df.colors, legend_label = 'Base function non-smooth', size = 6)
    rough_plot.circle(x=smooth_df.xaxis, y=smooth_df.yaxis, color = smooth_df.colors, legend_label = 'Base function smooth', size = 6)
    # rough_plot.line(x=smooth_df.xaxis, y=smooth_df.yaxis, legend_label = 'Base function', line_width=4, color='#D17B8F')

    # Experimental data
    rough_plot.line('xaxis', col, source=source_rough, color = '#9DC3E6', legend_label = str(col), line_width=4)
    rough_plot.triangle('xaxis', col, source=source_rough, fill_color= color_palette[1], size=10, legend_label = f"{col} points")
    
    # Plot format
    rough_plot.y_range = Range1d(-5000, 50000)
    rough_plot = plot_format(rough_plot, "Degrees", "Intensity", "top_left", "10pt", "8pt", "8pt")
    rough_plots.append(rough_plot)


# Minimization methods
In the previous section a [minimization function](c_experimental_data.ipynb) was implemented in order to add the base function with a background function in order to compensate for sample roughness,

The minimization function found converging solutions, however in some cases the optimized parameters were off the expected boundaries. For such reason two minimization strategies were implemented in order to keep the optimized parameters within certain boundaries. The strategies include:
* Add a major weight to the tails as compared to the points in the center so that the minimization function considers the roughness effects as observed in the tails
* Remove some on the sampling points of the center as these contribute more to the error estimation.

The optimization code is now shown:

In [2]:
#| column: page
from scipy.optimize import minimize
# 1. Get base function points (330 points from -16.4 to 16.5)
x_base = smooth_df.xaxis.values
y_base = smooth_df.yaxis.values
pchip = PchipInterpolator(x_base, y_base)

# 2. get rough data 
rough_df = pd.read_excel('data/rough_samples.xlsx', sheet_name='Data')
source_rough = ColumnDataSource(rough_df)
x_rough = rough_df['xaxis'].values.round(3)

# 5. Define gaussian function
supergaussian = lambda x, x0, sigma, A1, n: A1 * np.exp(-abs(((x-x0)/sigma))**n)

# 6. Define cost function
def cost_function(params, y):
    x0, A0, sigma, A1, n, displacement = params
    mask = y < limit
    y = y[mask]
    # Get new x axis
    x_new = x_rough[mask] + x0
    # interpolate base function with respect to x_new (32 points)
    y_base_modified = A0*pchip(x_new) 
    # calculate background on original axis and with x0
    y_background = supergaussian(x_new, x0+displacement, sigma, A1, n)
    # calculate modified function
    y_modified = y_base_modified + y_background
    # Compare directly with 32 points experimental data
    if weight_bool:
        mse = np.mean(np.abs(x_rough[mask])*((y - y_modified) ** 2))
        rmse = np.sqrt(mse)
    else:
        mse = np.mean((y - y_modified) ** 2)
        rmse = np.sqrt(mse)
    convergence.append(rmse)
    return rmse

def optimize(methods, guess_df, optimized_df, limit, col, weight):
    rough_plots = []
    color_palette = Set3[len(rough_df.columns[1:])+2]
    bounds = ((-0.3, 0.3), (-0.5, 1.2), (1, 4), (0, None), (1, 4), (-0.4, 0.4))
            #x0           #Abase        #sigma  #Agaussian  #n       #displacament
    
    backgrounds = figure(title = f'Background functions all methods {weight} ({col})', width = 700, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
    differences = figure(title = f'Difference between experimental and optimized data {weight}', width = 700, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
    convergences = figure(title = f'Base function all methods {weight} ({col})', width = 700, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
    optimized = figure(title = f'Optimized functions all methods {weight} ({col})', width = 700, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])

    for j, method in enumerate(methods):
        # 8. Get initial guesses
        rough_plot = figure(title = f"{col}: {method}", width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
        backgroundsbg = figure(title = f'Error - {method}', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
        differenceg = figure(title = f'Gaussian Experimental vs Optimized differences - {method}', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
        guess = [guess_df.loc[var][method] for var in ['x0', 'Abase', 'sigma', 'Agaussian', 'n', 'displacement']]
        
        # 8. Call minimization function
        y_rough = rough_df[col].copy().values
        cost_fn = lambda p:cost_function(p, y_rough)
        result = minimize(cost_fn, guess, method=method, bounds=bounds)
        optimized_parameters = result.x

        result2 = minimize(cost_fn, optimized_parameters, method=method, bounds=bounds)
        optimized_parameters2 = result2.x
        colu = col + '_opt'
        x0_opt, A0_opt, sigma_opt, A1_opt, n_opt, displacement_opt = optimized_parameters2
        optimized_df.loc['x0'][method] = x0_opt
        optimized_df.loc['Abase'][method] = A0_opt
        optimized_df.loc['sigma'][method] = sigma_opt
        optimized_df.loc['Agaussian'][method] = A1_opt
        optimized_df.loc['n'][method] = n_opt
        optimized_df.loc['displacement'][method] = displacement_opt

        # 7. Calculate new optimized modified function
        mask = y_rough < limit
        y_rough = y_rough[mask]
        x_rough_n = x_rough[mask]
        # Get new x axis
        x_new_opt = x_rough[mask] + x0_opt
        # interpolate base function with respect to x_new (32 points)
        y_base_opt = A0_opt*pchip(x_new_opt) 
        # calculate background on original axis and with x0
        y_background_opt = supergaussian(x_new_opt, x0_opt+displacement_opt, sigma_opt, A1_opt, n_opt)
        x_interp = np.arange(-15.5, 15.5001, 0.001).round(3)
        y_interp = supergaussian(x_interp, x0_opt+displacement_opt, sigma_opt, A1_opt, n_opt)
        optimized_df.loc['area_background'][method] = np.trapz(y_interp, x=x_interp)
        # calculate optmized function
        y_optimized = y_base_opt + y_background_opt

        # 8. Calculate error
        mse = np.mean(np.abs(x_rough[mask])*((y_rough - y_optimized) ** 2))
        rmse = np.sqrt(mse)
        optimized_df.loc['error'][method] = rmse
        
        vline = Span(location=0.0, dimension = 'height', line_color='#FEEED9', line_width=1)
        rough_plot.add_layout(vline)

        # Plot optimize function lines
        rough_plot.line(x_rough_n, y_base_opt, legend_label = 'Base', line_width = 5, color='#F96F5D')
        rough_plot.line(x_rough_n, y_background_opt, legend_label = 'Bbackground', line_width = 5, color='#F9B5AC')
        rough_plot.line(x_rough_n, y_optimized, legend_label = 'Optimized function', line_width = 5, color='#987284')
        rough_plot.triangle(x_rough_n, y_optimized, legend_label = 'Optimized points', size = 8, color=color_palette[1])
        backgrounds.line(x_rough_n, y_background_opt, color = color_palette[j], line_width = 5 , legend_label = f"{method}")
        backgrounds.circle(x_rough_n, y_background_opt, fill_color = color_palette[j], size = 7 , legend_label = f"{method}")
        backgroundsbg.line(np.arange(0, len(convergence)), convergence, color = color_palette[i], line_width = 5 , legend_label = f"Background {col}")
        
        # Plot rough experimental data
        rough_plot.line('xaxis', col, source=source_rough, color = '#9DC3E6', legend_label = str(col), line_width=4, line_dash = 'dashed')
        rough_plot.circle('xaxis', col, source=source_rough, fill_color= color_palette[j], size=7, legend_label = f"{col} points")
        
        # Error convergence plot
        convergences.line(x_rough_n, y_base_opt, legend_label = method, color=color_palette[j], line_width=5)
        optimized.line(x_rough_n, y_optimized, legend_label = method, color=color_palette[j], line_width=5)
        optimized.triangle(x_rough_n, y_optimized, legend_label = method, size = 8, color=color_palette[1])

        # Plot format
        rough_plot.y_range = Range1d(-5000, 50000)
        rough_plot.xaxis.ticker.desired_num_ticks = 10
        rough_plot.yaxis.ticker.desired_num_ticks = 10
        rough_plot = plot_format(rough_plot, "Degrees", "Intensity", "top_left", "10pt", "8pt", "9pt")
        # rough_plots.append(rough_plot)

        # Difference plot
        diff = y_rough - y_optimized
        differenceg.line(x=x_rough_n, y=diff, legend_label = col, color = color_palette[j], line_width=4)
        differenceg.circle(x=x_rough_n, y=diff, legend_label = col, fill_color= color_palette[j], size=7)
        differences.line(x=x_rough_n, y=diff, legend_label = method, color = color_palette[j], line_width=4)
        differences.circle(x=x_rough_n, y=diff, legend_label = method, fill_color = color_palette[j], size=6)
    
    plots = [backgroundsbg, differenceg]
    for plot in plots:
        plot = plot_format(plot, "Degrees", "Intensity", "top_left", "10pt", "10pt", "9pt")
        # rough_plots.append(plot)
        plot.xaxis.ticker.desired_num_ticks = 10
        plot.yaxis.ticker.desired_num_ticks = 10
    differenceg.y_range = Range1d(-300, 300)
    # backgrounds.y_range = Range1d(-2000, 10000)
    backgrounds.add_layout(vline)
    backgroundsbg.add_layout(vline)

    optimized.line('xaxis', col, source=source_rough, color = '#9DC3E6', legend_label = str(col), line_width=4, line_dash = 'dashed')
    optimized.circle('xaxis', col, source=source_rough, fill_color= color_palette[j], size=7, legend_label = f"{col} points")

    backgrounds = plot_format(backgrounds, "Degrees", "Intensity", "top_left", "9pt", "9pt", "9pt")
    differences = plot_format(differences, "Degrees", "Intensity", "top_left", "9pt", "9pt", "9pt")
    convergences = plot_format(convergences, "Degrees", "Intensity", "top_left", "9pt", "9pt", "9pt")
    optimized = plot_format(optimized, "Degrees", "Intensity", "top_left", "9pt", "9pt", "9pt")

    # convergences.y_range = Range1d(-2000, 50000)
    differences.y_range = Range1d(-2000, 2000)
    rough_plots.insert(0, backgrounds)
    rough_plots.insert(1, differences)
    # rough_plots.insert(2, convergences)
    rough_plots.insert(2, optimized)
    return optimized_df, rough_plots

## PT2B sample without and with weighted tails

In order to compare the effect of `adding weighted` tails, the minimization function was applied to sample PT2B for both cases. 
For the non-weighted case, the optimized parameters do not converge to the same solution for each method. 

In comparison, when adding the weights, the optimized parameters all converge to one solution. 
This effect is also observed in the shape of the background functions as well as the error between experimental data and optimized data.

The calculated error shown in the table is the RMSE.

In [3]:
#| column: page
# 3. Get initial guesses
from IPython.display import HTML
col = 'pt2b'
guess_df = pd.read_excel('data/guesses.xlsx', sheet_name=col)
guess_df = guess_df.set_index('Variables')

# 4. Create df that will save optmized parameters
methods = ['Powell', 'CG', 'L-BFGS-B', 'SLSQP', 'trust-constr']
index = ['x0', 'Abase', 'sigma', 'Agaussian', 'n', 'displacement', 'error', 'area_background']
optimized_df = pd.DataFrame(columns=methods, index=index)
convergence = []

# Non-weighted
weight_bool = False
limit = 40000
optimized_df_nw, rough_plots_nw =  optimize(methods, guess_df, optimized_df.copy(), limit, col, 'not-weighted')
optimized_df_nw.columns = [col + '_not_weighted' for col in optimized_df.columns]

# Weighted
weight_bool = True
limit = 40000
optimized_df_w, rough_plots_w =  optimize(methods, guess_df, optimized_df.copy(), limit, col, 'weighted')
optimized_df_w.columns = [col + '_weighted' for col in optimized_df.columns]

# Combine dfs
optimized_df = pd.concat([optimized_df_nw, optimized_df_w], axis=1)
# optimized_df = optimized_df.sort_index(level=0, axis=1)
optimized_df_html = optimized_df.style.set_table_attributes('style="font-size: 11px"').render()
display(HTML(optimized_df_html))

# Combine plots
rough_plots_nw.extend(rough_plots_w)

Unnamed: 0,Powell_not_weighted,CG_not_weighted,L-BFGS-B_not_weighted,SLSQP_not_weighted,trust-constr_not_weighted,Powell_weighted,CG_weighted,L-BFGS-B_weighted,SLSQP_weighted,trust-constr_weighted
x0,0.018211,0.0171,0.01901,0.035705,0.017846,0.022585,0.022583,0.022583,0.022586,0.022583
Abase,0.969908,0.973447,0.972645,0.947482,0.969031,0.969682,0.969687,0.969686,0.969677,0.969685
sigma,2.117634,2.035197,2.279734,2.173763,2.089853,2.779542,2.779018,2.779047,2.778353,2.779032
Agaussian,554.915071,433.897441,426.025903,1229.160855,591.325521,425.982586,426.008811,426.002565,426.338293,426.009897
n,1.18537,1.078285,1.130572,4.0,1.217606,2.987178,2.984405,2.985669,2.984767,2.985632
displacement,-0.399948,-0.723129,-0.4,0.4,-0.399998,0.228718,0.228685,0.228703,0.228727,0.228698
error,47.228539,48.13418,47.668053,38.331845,47.161555,32.819204,32.819177,32.819173,32.81917,32.819173
area_background,2217.834876,1715.502534,1857.837796,4843.641976,2316.353445,2114.243543,2113.889401,2113.919733,2115.029071,2113.943555


In [16]:
#| column: page
# combine plots
grid_rough = gridplot(children = rough_plots_nw, ncols = 3, merge_tools=False, width = 450, height = 350)
show(grid_rough)

The plot shows the reconstructed backgrounds and difference between experimental and optimized data for both cases. 

Notice how in the non-weighted case, the backgrounds do not converge to the same solution. The opposite case is observed when adding the weights to the tails.

Another difference is present in the difference between the experimental and optimized data as well. Notice how the main difference is observed in the central points. For this one can remove the central points and optimized with respect to the tail points.

## PT2C with weighted tails and removed points

Another factor for which the parameters do not converge is the central points. For this reason, the central points were removed and the minimization function was applied to the tail points only.
PT2C sample is rougher compared to PT2B. 

In [5]:
#| column: page
# 3. Get initial guesses
col = 'pt2c'
guess_df = pd.read_excel('data/guesses.xlsx', sheet_name=col)
guess_df = guess_df.set_index('Variables')
methods = ['Powell', 'CG', 'L-BFGS-B', 'SLSQP', 'trust-constr']
index = ['x0', 'Abase', 'sigma', 'Agaussian', 'n', 'displacement', 'error', 'area_background']
optimized_df = pd.DataFrame(columns=methods, index=index)
convergence = []

limit = 40000
weight_bool = True
optimized_df_nw, rough_plots_nw =  optimize(methods, guess_df, optimized_df.copy(), limit, col, 'all-points')
optimized_df_nw.columns = [col + '_all_points' for col in optimized_df.columns]

# Weighted
limit = 30000
weight_bool = True
optimized_df_w, rough_plots_w =  optimize(methods, guess_df, optimized_df.copy(), limit, col, 'less-points')
optimized_df_w.columns = [col + '_less_points' for col in optimized_df.columns]

# Combine dfs
optimized_df = pd.concat([optimized_df_nw, optimized_df_w], axis=1)
# optimized_df = optimized_df.sort_index(level=0, axis=1)
optimized_df_html = optimized_df.style.set_table_attributes('style="font-size: 11px"').render()
display(HTML(optimized_df_html))

# Combine plots
rough_plots_nw.extend(rough_plots_w)

Unnamed: 0,Powell_all_points,CG_all_points,L-BFGS-B_all_points,SLSQP_all_points,trust-constr_all_points,Powell_less_points,CG_less_points,L-BFGS-B_less_points,SLSQP_less_points,trust-constr_less_points
x0,0.025124,0.025262,0.025262,0.025095,0.025095,0.025227,0.025143,0.025144,0.025144,0.025144
Abase,0.934455,0.932707,0.932707,0.9344,0.934411,0.932926,0.933064,0.933062,0.93306,0.93306
sigma,2.757419,2.674442,2.674459,2.756507,2.756876,2.646597,2.652842,2.652932,2.652746,2.652781
Agaussian,854.722521,930.001756,929.989782,856.193351,855.780576,935.778522,930.003077,930.002471,930.14421,930.121878
n,2.197718,2.149952,2.150048,2.19851,2.198678,2.032927,2.033756,2.034152,2.03395,2.033996
displacement,0.250931,0.233542,0.233555,0.249144,0.249261,0.256333,0.254779,0.254773,0.254754,0.254749
error,51.08794,51.220164,51.220122,51.087523,51.087518,42.436037,42.43462,42.434619,42.434608,42.43461
area_background,4174.517456,4405.433428,4405.404855,4180.322922,4178.86764,4388.547041,4371.728429,4371.862359,4372.22863,4372.179257


In [6]:
#| column: page
# combine plots
grid_rough = gridplot(children = rough_plots_nw, ncols = 3, merge_tools=False, width = 450, height = 350)
show(grid_rough)

## PT2D with weighted tails and removed points
It was observed that adding the weights and removing points produces optimized parameters within the expected boundaries for all methods.
However, there are some cases where the optimized parameters do not converge to the same solution.

For example for the case of PT2D, the optimization method 'CG' converges to a slightly different solution than the other methods.

In [18]:
#| column: page
# 3. Get initial guesses
col = 'pt2d'
guess_df = pd.read_excel('data/guesses.xlsx', sheet_name=col)
guess_df = guess_df.set_index('Variables')
methods = ['Powell', 'CG', 'L-BFGS-B', 'SLSQP', 'trust-constr']
index = ['x0', 'Abase', 'sigma', 'Agaussian', 'n', 'displacement', 'error', 'area_background']
optimized_df = pd.DataFrame(columns=methods, index=index)
convergence = []

limit = 40000
weight_bool = True
optimized_df_nw, rough_plots_nw =  optimize(methods, guess_df, optimized_df.copy(), limit, col, 'all-points')
optimized_df_nw.columns = [col + '_all_points' for col in optimized_df.columns]

# Weighted
limit = 20000
weight_bool = True
optimized_df_w, rough_plots_w =  optimize(methods, guess_df, optimized_df.copy(), limit, col, 'less-points')
optimized_df_w.columns = [col + '_less_points' for col in optimized_df.columns]

# Combine dfs
optimized_df = pd.concat([optimized_df_nw, optimized_df_w], axis=1)
# optimized_df = optimized_df.sort_index(level=0, axis=1)
optimized_df_html = optimized_df.style.set_table_attributes('style="font-size: 11px"').render()
display(HTML(optimized_df_html))

# Combine plots
rough_plots_nw.extend(rough_plots_w)

Unnamed: 0,Powell_all_points,CG_all_points,L-BFGS-B_all_points,SLSQP_all_points,trust-constr_all_points,Powell_less_points,CG_less_points,L-BFGS-B_less_points,SLSQP_less_points,trust-constr_less_points
x0,0.021119,0.017979,0.02111,0.02111,0.021107,0.015023,0.0167,0.015031,0.015031,0.015031
Abase,0.827018,0.842649,0.827019,0.82702,0.827044,1.043568,1.057112,1.043561,1.04356,1.043559
sigma,2.745541,3.242377,2.745723,2.74573,2.746304,2.199506,2.003255,2.199015,2.198963,2.198992
Agaussian,2122.329764,1427.652615,2122.161618,2122.124964,2121.026907,1413.448016,1400.002713,1413.982185,1414.032067,1414.013676
n,1.974656,2.361408,1.974836,1.974848,1.975133,1.000042,0.90584,1.0,1.0,1.0
displacement,0.219812,0.254771,0.21966,0.219675,0.219741,0.399948,0.594046,0.4,0.4,0.4
error,109.322357,116.780293,109.322348,109.322348,109.322362,35.350994,31.212251,35.348265,35.348265,35.348266
area_background,10330.604208,8204.740025,10330.449648,10330.297214,10327.078252,6212.160798,5867.93249,6213.238203,6213.310084,6213.312988


In [8]:
#| column: page
# combine plots
grid_rough = gridplot(children = rough_plots_nw, ncols = 3, merge_tools=False, width = 450, height = 350)
show(grid_rough)

## Ann1 with weighted tails and removed points
In the case of a smooth wafer, removing the points made a difference in the optimized parameters. Although one of the methods did not fully converge to the same parameters as the rest of the methods.

In [19]:
#| column: page
# 3. Get initial guesses
from IPython.display import HTML
col = 'ann1'
guess_df = pd.read_excel('data/guesses.xlsx', sheet_name=col)
guess_df = guess_df.set_index('Variables')

# 4. Create df that will save optmized parameters
methods = ['Powell', 'CG', 'L-BFGS-B', 'SLSQP', 'trust-constr']
index = ['x0', 'Abase', 'sigma', 'Agaussian', 'n', 'displacement', 'error', 'area_background']
optimized_df = pd.DataFrame(columns=methods, index=index)
convergence = []

# Non-weighted
weight_bool = True
limit = 40000
optimized_df_nw, rough_plots_nw =  optimize(methods, guess_df, optimized_df.copy(), limit, col, 'all-points')
optimized_df_nw.columns = [col + '_all_points' for col in optimized_df.columns]

# Weighted
weight_bool = True
limit = 30000
optimized_df_w, rough_plots_w =  optimize(methods, guess_df, optimized_df.copy(), limit, col, 'less-points')
optimized_df_w.columns = [col + '_less_points' for col in optimized_df.columns]

# Combine dfs
optimized_df = pd.concat([optimized_df_nw, optimized_df_w], axis=1)
# optimized_df = optimized_df.sort_index(level=0, axis=1)
optimized_df_html = optimized_df.style.set_table_attributes('style="font-size: 11px"').render()
display(HTML(optimized_df_html))

# Combine plots
rough_plots_nw.extend(rough_plots_w)

Unnamed: 0,Powell_all_points,CG_all_points,L-BFGS-B_all_points,SLSQP_all_points,trust-constr_all_points,Powell_less_points,CG_less_points,L-BFGS-B_less_points,SLSQP_less_points,trust-constr_less_points
x0,0.003677,0.003885,0.003691,0.007501,0.003691,0.005723,0.005773,0.005785,0.005785,0.005744
Abase,0.999197,0.999873,0.999198,0.991164,0.9992,0.999162,0.999875,0.999052,0.999051,0.999129
sigma,1.000042,0.54604,1.0,1.94699,1.000024,1.068458,0.933377,1.314495,1.314452,1.17847
Agaussian,247.419439,246.999057,247.000468,413.204511,246.896008,330.396009,247.000267,246.998886,247.024222,287.136494
n,1.044242,0.704384,1.043734,4.0,1.043786,1.430808,1.26201,1.769538,1.769436,1.567584
displacement,-0.399948,-0.499583,-0.4,0.253984,-0.399984,0.393858,0.624502,0.4,0.4,0.396781
error,20.812337,17.505415,20.811279,24.799936,20.811625,5.547635,5.538497,5.577768,5.577751,5.559087
area_background,486.358677,339.404434,485.603883,1458.410696,485.401087,641.378994,428.529211,577.970352,578.012287,607.967217


In [10]:
#| column: page
# combine plots
grid_rough = gridplot(children = rough_plots_nw, ncols = 3, merge_tools=False, width = 450, height = 350)
show(grid_rough)

## PT2E with weighted tails
Depending on the type of data, adding weighted tails is enough in order to obtain converging solutions.
Such is the case of PT2E.

In [20]:
#| column: page
# 3. Get initial guesses
col = 'pt2e'
guess_df = pd.read_excel('data/guesses.xlsx', sheet_name=col)
guess_df = guess_df.set_index('Variables')
methods = ['Powell', 'CG', 'L-BFGS-B', 'SLSQP', 'trust-constr']
index = ['x0', 'Abase', 'sigma', 'Agaussian', 'n', 'displacement', 'error', 'area_background']
optimized_df = pd.DataFrame(columns=methods, index=index)
convergence = []

limit = 40000
weight_bool = True
optimized_df_nw, rough_plots_nw =  optimize(methods, guess_df, optimized_df.copy(), limit, col, 'all-points')
optimized_df_nw.columns = [col + '_all_points' for col in optimized_df.columns]

# Weighted
# limit = 20000
# weight_bool = True
# optimized_df_w, rough_plots_w =  optimize(methods, guess_df, optimized_df.copy(), limit, col, 'less-points')
# optimized_df_w.columns = [col + '_less_points' for col in optimized_df.columns]

# # Combine dfs
# optimized_df = pd.concat([optimized_df_nw, optimized_df_w], axis=1)
# optimized_df = optimized_df.sort_index(level=0, axis=1)
optimized_df_html = optimized_df_nw.style.set_table_attributes('style="font-size: 11px"').render()
display(HTML(optimized_df_html))

# Combine plots
# rough_plots_nw.extend(rough_plots_w)

Unnamed: 0,Powell_all_points,CG_all_points,L-BFGS-B_all_points,SLSQP_all_points,trust-constr_all_points
x0,0.19769,0.197292,0.197595,0.197594,0.197594
Abase,0.535596,0.532997,0.53474,0.534739,0.534738
sigma,2.577168,2.564349,2.577615,2.577633,2.577605
Agaussian,5899.348205,5999.99998,5918.595009,5918.57624,5918.678078
n,1.737081,1.741056,1.744394,1.744424,1.744403
displacement,0.017359,0.012327,0.015913,0.015909,0.015909
error,245.878484,245.897741,245.862474,245.862474,245.862474
area_background,27093.076201,27414.50297,27179.370007,27179.449437,27179.634559


In [12]:
#| column: page
# combine plots
grid_rough = gridplot(children = rough_plots_nw, ncols = 3, merge_tools=False, width = 450, height = 350)
show(grid_rough)

## Conclusions

In this section, two minimization strategies were implemented in order to obtain converging solutions for the minimization function. The strategies include:
* Add a major weight to the tails as compared to the points in the center so that the minimization function considers the roughness effects as observed in the tails
* Remove some on the sampling points of the center 
* The combination of both strategies depending on the experimental data lead to converging parametes for all methods.
* However, depending on the experimental data, there were cases were the optimized parameters did not lead to the exact same solution.
* The next step is to relate the optimized parameters of the background function and relate them with real roughness measurements.