In [2]:
#| echo: false
import numpy as np
from scipy.interpolate import PchipInterpolator
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Span, Range1d
from bokeh.layouts import gridplot
from bokeh.io import output_notebook
import warnings
import pandas as pd
import altair as alt
from IPython.display import display
warnings.filterwarnings('ignore')
output_notebook()

alt.data_transformers.disable_max_rows()
data = np.loadtxt('data/PT_wafer2_after_d_diodes.dat', delimiter=',')

def plot_format(plot, xlabel, ylabel, location, size, titlesize, labelsize):
    # x axis format
    plot.xaxis.axis_label = xlabel
    plot.xaxis.axis_label_text_font_style = 'bold'
    plot.xaxis.major_label_text_font_style = "bold"
    plot.xaxis.axis_label_text_font_size = size
    plot.xaxis.major_label_text_font_size = size

    # y axis format
    plot.yaxis.axis_label = ylabel
    plot.yaxis.axis_label_text_font_style = 'bold'
    plot.yaxis.major_label_text_font_style = "bold"
    plot.yaxis.axis_label_text_font_size = size
    plot.yaxis.major_label_text_font_size = size

    # Legend format
    plot.legend.location = location
    plot.legend.click_policy = "hide"
    plot.legend.label_text_font_size = labelsize
    plot.legend.label_text_font_style = 'bold'
    plot.legend.border_line_width = 3
    plot.legend.border_line_color = "navy"
    plot.legend.border_line_alpha = 0.0
    plot.legend.background_fill_alpha = 0.0
    plot.legend.label_text_color = "#E3F4FF"


    # Title format
    plot.title.text_font_size = titlesize
    plot.title.text_font_style = "bold"

    # Dark theme
    plot.background_fill_color = "#282B30"
    plot.border_fill_color = "#282B30"
    plot.xgrid.grid_line_color = '#606773'
    # plot.xgrid.minor_grid_line_color = '#606773' 
    # plot.xgrid.minor_grid_line_alpha = 0.4
    # plot.xgrid.minor_grid_line_dash = [2, 2] 
    plot.xaxis.minor_tick_line_color = '#606773'
    plot.yaxis.minor_tick_line_color = '#606773'
    plot.ygrid.grid_line_color = '#606773'
    plot.yaxis.major_label_text_color = "#E3F4FF"
    plot.xaxis.major_label_text_color = "#E3F4FF"
    plot.yaxis.axis_label_text_color = "#E3F4FF"
    plot.xaxis.axis_label_text_color = "#E3F4FF"
    plot.title.text_color = "#A6DDFF"
    return plot

## Base function experimental points

In [3]:
#| column: page
# 1. Create plots
interleaved_plot = figure(title='Interleaved base function', x_axis_label='sampling point', y_axis_label='intensity', tooltips = [("index", "$index"),("(x,y)", "($x, $y)")],
            width = 700, height = 500)
smooth_plot = figure(title='Smooth base function', x_axis_label='sampling point', y_axis_label='intensity', tooltips = [("index", "$index"),("(x,y)", "($x, $y)")],
            width = 700, height = 500)

# 2. Define base_function and smooth df's
base_function_df = pd.read_csv('data/base_function_points.csv')
smooth_df = pd.DataFrame(data={}, columns=['xaxis', 'yaxis', 'colors'])
xoutindx=0

for aveindex in range(1, len(base_function_df)):
    if (base_function_df.loc[aveindex, 'xaxis'] - base_function_df.loc[aveindex-1, 'xaxis']) < 0.01:
        smooth_df.loc[xoutindx, 'xaxis'] = (base_function_df.loc[aveindex, 'xaxis'] + base_function_df.loc[aveindex-1, 'xaxis'])/2
        smooth_df.loc[xoutindx, 'yaxis'] = (base_function_df.loc[aveindex, 'yaxis'] + base_function_df.loc[aveindex-1, 'yaxis'])/2
        smooth_df.loc[xoutindx, 'colors'] = base_function_df.loc[aveindex, 'colors']
    else:
        xoutindx += 1
        smooth_df.loc[xoutindx, 'xaxis'] = base_function_df.loc[aveindex, 'xaxis']
        smooth_df.loc[xoutindx, 'yaxis'] = base_function_df.loc[aveindex, 'yaxis']
        smooth_df.loc[xoutindx, 'colors'] = base_function_df.loc[aveindex, 'colors']

# b. Plot points
for (plot, df, legend, color) in [(interleaved_plot, base_function_df, 'Non-smooth base function', '#9DC3E6'), (smooth_plot, smooth_df, 'Smooth base function', '#9D6C97')]:
    # individual points
    plot.circle(df.xaxis, df.yaxis, color=df.colors, size=6)

    # line plot
    plot.line(df.xaxis, df.yaxis, line_width=4, legend_label=legend, color=color)

    # format
    plot.xaxis.ticker.desired_num_ticks = 15
    plot.y_range = Range1d(0, 45000)
    plot = plot_format(plot, "Degrees", "Intensity", "top_left", "10pt", "10pt", "9pt")

# d. Interpolation
x_base = np.arange(-15.5, 15.5001, 0.001).round(3)
pchip = PchipInterpolator(smooth_df['xaxis'], smooth_df['yaxis'])
y_base = pchip(x_base)

base_function_grid = gridplot(children=[interleaved_plot, smooth_plot], ncols=2, merge_tools=False, width=420, height=380)
show(base_function_grid)

## Base function with experimental points

In [5]:
#| column: page
from bokeh.palettes import Set3
# 1. Import data
rough_df = pd.read_excel('data/rough_samples.xlsx')
source_rough = ColumnDataSource(rough_df)

# # 2. Create plot
rough_plots = []
color_palette = Set3[len(rough_df.columns[1:])+2]

# a. iterate over the columns and add a line for each one
for i, col in enumerate(rough_df.columns[1:]):
    rough_plot = figure(title = str(col), x_axis_label='xaxis', y_axis_label='yaxis', width = 550, height = 420, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
    # Base function points
    
    rough_plot.circle(x=base_function_df.xaxis, y=base_function_df.yaxis, color = base_function_df.colors, legend_label = 'Base function non-smooth', size = 6)
    rough_plot.circle(x=smooth_df.xaxis, y=smooth_df.yaxis, color = smooth_df.colors, legend_label = 'Base function smooth', size = 6)
    # rough_plot.line(x=smooth_df.xaxis, y=smooth_df.yaxis, legend_label = 'Base function', line_width=4, color='#D17B8F')

    # Experimental data
    rough_plot.line('xaxis', col, source=source_rough, color = '#9DC3E6', legend_label = str(col), line_width=4)
    rough_plot.triangle('xaxis', col, source=source_rough, fill_color= color_palette[1], size=10, legend_label = f"{col} points")
    
    # Plot format
    rough_plot.y_range = Range1d(-5000, 45000)
    rough_plot = plot_format(rough_plot, "Degrees", "Intensity", "top_left", "10pt", "9pt", "9pt")
    rough_plots.append(rough_plot)

grid_rough = gridplot(children = rough_plots, ncols = 3, merge_tools=False)
show(grid_rough)

## Gaussian function: $A_{1}\exp\left(-\frac{(x-x_0)^2}{2\sigma^2}\right)$

In [67]:
from bokeh.palettes import Set3
from scipy.optimize import minimize

# 1. Get base function points (330 points from -16.4 to 16.5)
x_base = smooth_df.xaxis.values
y_base = smooth_df.yaxis.values
# x_base = np.concatenate(([-50000], x_base, [50000]))
# y_base = np.concatenate(([0], y_base, [0]))

# 2. get rough data 
rough_df = pd.read_excel('data/rough_samples.xlsx', sheet_name='Data')
source_rough = ColumnDataSource(rough_df)
x_rough = rough_df['xaxis'].values.round(3)

# 3. Get initial guesses
guess_df = pd.read_excel('data/rough_samples.xlsx', sheet_name='Gaussian')
guess_df = guess_df.set_index('Variables')

# 4. Create df that will save optmized parameters
columns = ['ann1_opt', 'pt2_opt', 'pt2b_opt', 'pt2c_opt', 'pt2d_opt', 'pt2e_opt']
index = ['x0', 'Abase', 'sigma', 'Agaussian']
optimized_df = pd.DataFrame(columns=columns, index=index)

# 5. Define gaussian function
gaussian = lambda x, x0, sigma, A1: A1 * np.exp(-((x - x0) / sigma) ** 2 / 2)

# 6. Define cost function
pchip = PchipInterpolator(x_base, y_base)
def cost_function(params, y):
    x0, A0, sigma, A1 = params
    # Get new x axis
    x_new = x_rough + x0 
    # interpolate base function with respect to x_new (32 points)
    y_base_modified = A0*pchip(x_new) 
    # calculate background on original axis and with x0
    y_background = gaussian(x_new, x0, sigma, A1)
    # calculate modified function
    y_modified = y_base_modified + y_background
    # Compare directly with 32 points experimental data
    return np.sum((y_modified - y) ** 2)

# 7. Iterate over the experimental data
rough_plots = []
color_palette = Set3[len(rough_df.columns[1:])+2]
bounds = ((None, None), (0, None), (0, None), (0, None))
backgrounds = figure(title = 'Background functions with experimental data', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
backgroundsb = figure(title = 'Background functions', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
downsamples = figure(title = 'Downsampled points', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])

for i, col in enumerate(rough_df.columns[1:]):
    # 8. Get initial guesses
    rough_plot = figure(title = str(col), width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
    guess = [guess_df.loc[var][col] for var in ['x0', 'Abase', 'sigma', 'Agaussian']]
    # x0 = params[0]

    # 8. Call minimization function
    y_rough = rough_df[col].copy().values
    cost_fn = lambda p:cost_function(p, y_rough)
    result = minimize(cost_fn, guess)
    # result = minimize(cost_fn, params, bounds=bounds)
    optimized_parameters = result.x
    colu = col + '_opt'
    x0_opt, A0_opt, sigma_opt, A1_opt = optimized_parameters
    optimized_df.loc['x0'][colu] = x0_opt
    optimized_df.loc['Abase'][colu] = A0_opt
    optimized_df.loc['sigma'][colu] = sigma_opt
    optimized_df.loc['Agaussian'][colu] = A1_opt
    
    # 7. Calculate new optimized modified function
    x_new_opt = x_rough + x0_opt 
    # interpolate base function with respect to x_new (32 points)
    y_base_opt = A0_opt*pchip(x_new_opt) 
    # calculate background on original axis and with x0
    y_background_opt = gaussian(x_new_opt, x0_opt, sigma_opt, A1_opt)
    # calculate optmized function
    y_optimized = y_base_opt + y_background_opt

    vline = Span(location=0.0, dimension = 'height', line_color='#FEEED9', line_width=1)
    rough_plot.add_layout(vline)

    # Plot optimize function lines
    rough_plot.line(x_rough, y_base_opt, legend_label = 'Base', line_width = 5, color='#F96F5D')
    rough_plot.line(x_rough, y_background_opt, legend_label = 'Bbackground', line_width = 5, color='#F9B5AC')
    rough_plot.line(x_rough, y_optimized, legend_label = 'Optimized function', line_width = 5, color='#987284')
    rough_plot.triangle(x_rough, y_optimized, legend_label = 'Optimized points', size = 8, color=color_palette[1])
    backgrounds.line(x_rough, y_background_opt, color = color_palette[i], line_width = 5 , legend_label = f"Background {col}")
    backgroundsb.line(x_rough, y_background_opt, color = color_palette[i], line_width = 5 , legend_label = f"Background {col}")
    downsamples.line(x_rough, y_optimized, line_width=4, legend_label = f'Downsampling {col}', color = color_palette[i+1],  alpha = 0.9, line_dash='dashed')
    downsamples.triangle(x_rough, y_optimized, size = 9, legend_label = f'Downsampling {col}', color = color_palette[i+1])

    # Plot rough experimental data
    rough_plot.line('xaxis', col, source=source_rough, color = '#9DC3E6', legend_label = str(col), line_width=4, line_dash = 'dashed')
    rough_plot.circle('xaxis', col, source=source_rough, fill_color= color_palette[i], size=7, legend_label = f"{col} points")
    backgrounds.line('xaxis', col, source=source_rough, color = color_palette[i], legend_label = str(col), line_width=4)
    backgrounds.circle('xaxis', col, source=source_rough, fill_color= color_palette[i], size=7, legend_label = str(col))
    downsamples.line('xaxis', col, source=source_rough, color = color_palette[i], legend_label = str(col), line_width=4)
    downsamples.circle('xaxis', col, source=source_rough, fill_color= color_palette[i], size=7, legend_label = str(col))
    
    # Plot format
    rough_plot.y_range = Range1d(-5000, 50000)
    rough_plot.xaxis.ticker.desired_num_ticks = 10
    rough_plot.yaxis.ticker.desired_num_ticks = 10
    rough_plot = plot_format(rough_plot, "Degrees", "Intensity", "top_left", "10pt", "8pt", "9pt")
    rough_plots.append(rough_plot)

plots = [backgrounds, backgroundsb, downsamples]
for plot in plots:
    plot = plot_format(plot, "Degrees", "Intensity", "top_left", "10pt", "10pt", "9pt")
    plot.y_range = Range1d(-5000, 50000)
    rough_plots.append(plot)

grid_rough = gridplot(children = rough_plots, ncols = 3, merge_tools=False, width = 480, height = 430)
show(grid_rough)

merged_df = guess_df.join(optimized_df)\
    [['ann1', 'ann1_opt', 'pt2', 'pt2_opt', 'pt2b', 'pt2b_opt', 'pt2c', 'pt2c_opt', 'pt2d', 'pt2d_opt', 'pt2e', 'pt2e_opt']]
display(merged_df)

Unnamed: 0_level_0,ann1,ann1_opt,pt2,pt2_opt,pt2b,pt2b_opt,pt2c,pt2c_opt,pt2d,pt2d_opt,pt2e,pt2e_opt
Variables,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
x0,0.0,0.005229,0.0,0.008596,0.0,0.021531,0.0,0.02253,0.0,0.013593,0.0,0.199183
Abase,0.9,1.000891,0.91,0.997384,0.93,0.970683,0.9,0.931525,0.82,0.821728,0.5,0.525512
sigma,1.0,1.155163,1.0,0.813048,2.0,1.9805,2.0,1.835617,2.0,1.893213,1.8,1.85384
Agaussian,100.0,75.382282,0.0,-160.750838,600.0,415.142205,1000.0,997.044148,2250.0,2306.781058,6000.0,5974.75289


In [None]:
#| column: screen
from bokeh.palettes import Set3
from scipy.optimize import minimize

# 1. Import base function and rough data
base_function = pd.read_csv('data/base_funtion_interpolated.csv')
x_base = base_function['x_base'].values.round(3)
y_base = base_function['y_base'].values

rough_df = pd.read_excel('data/rough_samples.xlsx', sheet_name='Data')
source_rough = ColumnDataSource(rough_df)
x_rough = rough_df['xaxis'].values.round(3)

guess_df = pd.read_excel('data/rough_samples.xlsx', sheet_name='Gaussian')
guess_df = guess_df.set_index('Variables')

columns = ['ann1_opt', 'pt2_opt', 'pt2b_opt', 'pt2c_opt', 'pt2d_opt', 'pt2e_opt']
index = ['x0', 'Abase', 'sigma', 'Agaussian']

optimized_df = pd.DataFrame(columns=columns, index=index)

# 2. Define base function
def base_function(y_base, A0):
    return A0*y_base

# 3. Define gaussian function
gaussian = lambda x, x0, sigma, A1: A1 * np.exp(-((x - x0) / sigma) ** 2 / 2)

# 4. Define modified function
def modified_function(params, x):
    x0, A0, sigma, A1 = params
    y_base_modified = base_function(y_base, A0)
    y_background = gaussian(x, x0, sigma, A1)
    y_modified = y_base_modified + y_background
    return y_modified

# 5. Define cost function
def cost_function(params, x, y):
    x_base_modified = x_base + params[0]
    y_modified = modified_function(params, x_base_modified)
    indices = np.where(np.isin(x_base_modified, (x_rough+params[0])))[0]
    y_modified_points = y_modified[indices]
    return np.sum((y_modified_points - y) ** 2)
    # return np.mean(np.abs(y_modified_points - y))
    # return np.mean((y_modified_points - y) ** 2)
    # return np.sum(np.abs(y_modified_points - y))

# 6. Iterate over the experimental data and plot the initial guesses
rough_plots = []
color_palette = Set3[len(rough_df.columns[1:])+2]
bounds = ((None, None), (0, None), (0, None), (0, None))
backgrounds = figure(title = 'Background functions with experimental data', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
backgroundsb = figure(title = 'Background functions', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
downsamples = figure(title = 'Downsampled points', width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])

for i, col in enumerate(rough_df.columns[1:]):
    # 7. Get initial guesses
    rough_plot = figure(title = str(col), width = 550, height = 450, tooltips = [("index", "$index"),("(x,y)", "($x, $y)")])
    params = [guess_df.loc[var][col] for var in ['x0', 'Abase', 'sigma', 'Agaussian']]
    x0 = params[0]

    # 8. Call minimization function
    y_rough = rough_df[col].copy().values
    cost_fn = lambda p:cost_function(p, x_base, y_rough)
    result = minimize(cost_fn, params, bounds=bounds)
    optimized_parameters = result.x
    colu = col + '_opt'
    optimized_df.loc['x0'][colu] = optimized_parameters[0]
    optimized_df.loc['Abase'][colu] = optimized_parameters[1]
    optimized_df.loc['sigma'][colu] = optimized_parameters[2]
    optimized_df.loc['Agaussian'][colu] = optimized_parameters[3]
    
    # 9. Calculate new optimized modified function
    x_optimized = x_base + optimized_parameters[0]
    y_optimized = modified_function(optimized_parameters, x=x_optimized)
    indices = np.where(np.isin(x_optimized, x_rough + params[0]))[0]
    y_optimized_points = y_optimized[indices] 
    y_background_optimized = gaussian(x_optimized, optimized_parameters[0], optimized_parameters[2], optimized_parameters[3])
    y_base_optimized = base_function(y_base, optimized_parameters[1])

    # Plot initial guess
    # x_base_modified = x_base + x0
    # y_base_modified = params[1]*pchip(x_base)
    # y_background = gaussian(x_base_modified, params[0], params[2], params[3])
    # y_modified = modified_function(params, x_base_modified)
    # rough_plot.line(x_base_modified, y_modified, legend_label = 'Initial modified function', line_width = 5, color='#F3F9D2')
    # rough_plot.line(x_base_modified, y_base_modified, legend_label = 'Initial base function', line_width = 5, color = '#E6F9AF')
    # rough_plot.line(x_base_modified, y_background, legend_label = 'Initial background', line_width = 5, color = '#FEFFEA')
    
    # 10. Plot Optimized modified function
    rough_plot.line(x_optimized, y_optimized, legend_label = 'Optimized modified function', line_width = 5, color='#987284')
    rough_plot.line(x_optimized, y_background_optimized, legend_label = 'Optimized background', line_width = 5, color='#F9B5AC')
    rough_plot.line(x_optimized, y_base_optimized, legend_label = 'Optimized base', line_width = 5, color='#F96F5D')
    backgrounds.line(x_optimized, y_background_optimized, color = color_palette[i], line_width = 5 , legend_label = f"Background {col}")
    backgroundsb.line(x_optimized, y_background_optimized, color = color_palette[i], line_width = 5 , legend_label = f"Background {col}")

    # 11. Plot experimental data
    rough_plot.line('xaxis', col, source=source_rough, color = '#9DC3E6', legend_label = str(col), line_width=4)
    rough_plot.circle('xaxis', col, source=source_rough, fill_color= color_palette[i], size=7, legend_label = str(col))
    backgrounds.line('xaxis', col, source=source_rough, color = color_palette[i], legend_label = str(col), line_width=4)
    backgrounds.circle('xaxis', col, source=source_rough, fill_color= color_palette[i], size=7, legend_label = str(col))
    downsamples.line('xaxis', col, source=source_rough, color = color_palette[i], legend_label = str(col), line_width=4)
    downsamples.circle('xaxis', col, source=source_rough, fill_color= color_palette[i], size=7, legend_label = str(col))

    # 12. Optimized modified function downsampled points
    rough_plot.line(x_rough+optimized_parameters[0], y_optimized_points, line_width=4, legend_label = 'Downsampling', color = '#D1603D',  alpha = 0.9, line_dash='dashed')
    rough_plot.triangle(x_rough+optimized_parameters[0], y_optimized_points, size = 9, legend_label = 'Downsampling', color = '#DB8A74')
    downsamples.line(x_rough+optimized_parameters[0], y_optimized_points, line_width=4, legend_label = f'Downsampling {col}', color = color_palette[i+1],  alpha = 0.9, line_dash='dashed')
    downsamples.triangle(x_rough+optimized_parameters[0], y_optimized_points, size = 9, legend_label = f'Downsampling {col}', color = color_palette[i+1])

    # 13. Plot format
    vline = Span(location=0.0, dimension = 'height', line_color='#FEEED9', line_width=1)
    rough_plot.add_layout(vline)
    rough_plot.y_range = Range1d(-5000, 50000)
    rough_plot.xaxis.ticker.desired_num_ticks = 10
    rough_plot.yaxis.ticker.desired_num_ticks = 10
    rough_plot = plot_format(rough_plot, "Degrees", "Intensity", "top_left", "10pt", "10pt", "9pt")
    rough_plots.append(rough_plot)

plots = [backgrounds, backgroundsb, downsamples]
for plot in plots:
    plot = plot_format(plot, "Degrees", "Intensity", "top_left", "10pt", "10pt", "9pt")
    plot.y_range = Range1d(-5000, 50000)
    rough_plots.append(plot)
    
grid_rough = gridplot(children = rough_plots, ncols = 3, merge_tools=False, width = 480, height = 430)
show(grid_rough)

merged_df = guess_df.join(optimized_df)\
    [['ann1', 'ann1_opt', 'pt2', 'pt2_opt', 'pt2b', 'pt2b_opt', 'pt2c', 'pt2c_opt', 'pt2d', 'pt2d_opt', 'pt2e', 'pt2e_opt']]
display(merged_df)
