In [1]:
import pandas as pd
import numpy as np
from itertools import product, combinations
from collections import defaultdict
from sklearn.preprocessing import StandardScaler
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from scipy.interpolate import griddata

import plotly.io as pio
import plotly.graph_objects as go

<!-- @format -->

### IF ABOVE import failed, run below cell to attempt to install the required packages


In [2]:
!pip install plotly scikit-learn scipy pandas numpy --quiet

<!-- @format -->

### load the dataset


In [3]:
# load the dataset, change your filename here
data_origin = pd.read_excel("250130 results.xlsx")
# !global prefix!
prefix = "250130_Jieping"

# slice the data, get rid of the first column
data = data_origin.iloc[:, 1:-1]
header = data_origin.columns
print("data from Excel file")
data

data from Excel file


Unnamed: 0,Temperature (°C),Rotation (RPM),Current (mA),Yield (Prod./IS)
0,70,250,25,76
1,40,100,15,21
2,70,100,15,53
3,40,100,35,29
4,70,400,35,74
5,40,400,15,56
6,70,400,15,67
7,40,400,35,30
8,70,250,35,71
9,75,250,25,71


<!-- @format -->

### find the max value of the 'Yield (Prod./IS)' column, find the corresponding row index


In [4]:
max_yield = data["Yield (Prod./IS)"].max()
max_yield_row_index = data["Yield (Prod./IS)"].idxmax()
print(f"Max_yield: {max_yield}, max_yield_row_index: {max_yield_row_index}")
print(f"Condition of the max yield: \n{data.iloc[max_yield_row_index, :]}")

Max_yield: 76, max_yield_row_index: 0
Condition of the max yield: 
Temperature (°C)     70
Rotation (RPM)      250
Current (mA)         25
Yield (Prod./IS)     76
Name: 0, dtype: int64


<!-- @format -->

### function to save the figure


In [5]:
def save_fig(fig: go.Figure, filename: str, auto_open=True) -> None:
    pio.write_html(fig, file=filename, auto_open=auto_open)

<!-- @format -->

### function to plot the original data as a scatter plot


In [6]:
# function to plot the original data
def plot_original_data(
    data: pd.DataFrame,
    axial_x: str,
    axial_y: str,
    axial_z: str,
    axial_meta: list,
    marker_size: int = 6,
) -> go.Figure:
    data_copy = data.copy()
    print(f"axial_x: {axial_x}, axial_y: {axial_y}, axial_z: {axial_z}")

    # find the max and min value of z axis
    max_z_index = data[axial_z].idxmax()
    min_z_index = data[axial_z].idxmin()
    # drop the max and min value, we will plot them separately
    data_copy = data_copy.drop([max_z_index, min_z_index])

    # concat all the meta data into one string, formatted as {header1 name}: data1\n{header2 name}: data2\n...
    meta_data_copy = []
    for i in range(len(data_copy)):
        meta_data_copy.append(
            "<br>".join(
                [f"{header}: {data_copy.iloc[i][header]}" for header in axial_meta]
            )
        )
    # print(f"meta_data: {meta_data}")
    fig = go.Figure(
        data=[
            go.Scatter3d(
                x=data_copy[axial_x],
                y=data_copy[axial_y],
                z=data_copy[axial_z],
                meta=meta_data_copy,
                mode="markers",
                marker=dict(
                    size=marker_size, color="blue", opacity=1.0, symbol="circle"
                ),
                name="Yield",
                hovertemplate=f"{axial_x}: %{{x}}<br>{axial_y}: %{{y}}<br>{axial_z}: %{{z}}<br>%{{meta}}",
            )
        ]
    )

    # we need to generate to original meta
    meta_data = []
    for i in range(len(data)):
        meta_data.append(
            "<br>".join([f"{header}: {data.iloc[i][header]}" for header in axial_meta])
        )
    # Separately plot the highest z axis value with red
    fig.add_trace(
        go.Scatter3d(
            x=[data[axial_x][max_z_index]],
            y=[data[axial_y][max_z_index]],
            z=[data[axial_z][max_z_index]],
            meta=[meta_data[max_z_index]],
            mode="markers",
            marker=dict(size=marker_size, color="red", opacity=1.0, symbol="diamond"),
            name="Max Yield",
            hovertemplate=f"{axial_x}: {data[axial_x][max_z_index]}<br>{axial_y}: {data[axial_y][max_z_index]}<br>{axial_z}: {data[axial_z][max_z_index]}<br>%{{meta}}",
        )
    )

    # Separately plot the lowest z axis value with green
    fig.add_trace(
        go.Scatter3d(
            x=[data[axial_x][min_z_index]],
            y=[data[axial_y][min_z_index]],
            z=[data[axial_z][min_z_index]],
            meta=[meta_data[min_z_index]],
            mode="markers",
            marker=dict(size=marker_size, color="green", opacity=1.0, symbol="square"),
            name="Min Yield",
            hovertemplate=f"{axial_x}: {data[axial_x][min_z_index]}<br>{axial_y}: {data[axial_y][min_z_index]}<br>{axial_z}: {data[axial_z][min_z_index]}<br>%{{meta}}",
        )
    )

    # Layout settings
    fig.update_layout(
        font=dict(size=12, family="Arial", color="black", weight="bold"),  # font size
        title=dict(
            text=f"{axial_z} vs {axial_x} and {axial_y} scatter plot",
            automargin=True,
            pad=dict(t=50, b=50),  # top and bottom padding
            font=dict(size=24, weight="bold"),  # title font size
        ),
        hoverlabel_font=dict(
            size=12, family="Arial", weight="normal"
        ),  # hover font size
        legend=dict(
            x=0.05,
            y=0.99,
            bgcolor="rgba(255, 255, 255, 0.5)",
            bordercolor="black",
            borderwidth=1,
            font=dict(size=16, weight="bold"),
        ),
        scene=dict(
            xaxis=dict(
                title=f"{axial_x}",
                showbackground=True,
                backgroundcolor="lightblue",  # DEFINE YOUR DESIRED COLOR HERE
            ),
            yaxis=dict(
                title=f"{axial_y}",
                showbackground=True,
                backgroundcolor="lightcoral",  # DEFINE YOUR DESIRED COLOR HERE
            ),
            zaxis=dict(
                title=f"{axial_z}",
                showbackground=True,
                backgroundcolor="lightgreen",  # DEFINE YOUR DESIRED COLOR HERE
                nticks=10,
                range=[0, 100],
            ),
            aspectmode="cube",
        ),
        margin=dict(autoexpand=True, l=50, r=50, t=50, b=50),
        template="plotly_white",
        meta=dict(
            data=data.to_json(), axial_x=axial_x, axial_y=axial_y, axial_z=axial_z
        ),
    )

    return fig

<!-- @format -->

### function to iterate over a combination of TWO columns from columns_to_iterate and combined with ONE column from fixed_columns to generate the three axis for plotting

### in addition, add the information from the note_column to tooltip


In [7]:
def plot_combinations(
    data: pd.DataFrame, columns_to_iterate: list, fixed_columns: list, note_column: str
) -> None:
    for x, y in list(combinations(columns_to_iterate, 2)):
        print("----------------------------------")
        for z in fixed_columns:
            # found the columns that is not in the combination
            extra = list(set(columns_to_iterate) - set([x, y, z]))
            extra.append(note_column)

            fig = plot_original_data(
                data=data,
                axial_x=x,
                axial_y=y,
                axial_z=z,
                axial_meta=extra,
            )
            # save the plot
            fig_meta = fig["layout"]["meta"]
            axial_z = fig_meta["axial_z"].split(" ")[0]
            axial_x = fig_meta["axial_x"].split(" ")[0]
            axial_y = fig_meta["axial_y"].split(" ")[0]

            save_filename = (
                f"{prefix}_experimental_data_{axial_z}_vs_{axial_x}_and_{axial_y}.html"
            )
            print(f'Plotting {z} vs {x} and {y}, saving to "{save_filename}"')
            save_fig(fig, save_filename)

<!-- @format -->

### calling the function to plot the combinations


In [8]:
print(
    f"column to iterate: {list(data.columns[:3])}, fixed column: {list(data.columns[3:4])}"
)
# plot all unique x, y combinations of the first 3 columns against the rest of the columns
# convert yield to percentage
plot_combinations(
    data=data_origin,
    columns_to_iterate=list(data.columns[:3]),
    fixed_columns=list(data.columns[3:]),
    note_column="Note",
)

column to iterate: ['Temperature (°C)', 'Rotation (RPM)', 'Current (mA)'], fixed column: ['Yield (Prod./IS)']
----------------------------------
axial_x: Temperature (°C), axial_y: Rotation (RPM), axial_z: Yield (Prod./IS)
Plotting Yield (Prod./IS) vs Temperature (°C) and Rotation (RPM), saving to "250130_Jieping_experimental_data_Yield_vs_Temperature_and_Rotation.html"
----------------------------------
axial_x: Temperature (°C), axial_y: Current (mA), axial_z: Yield (Prod./IS)
Plotting Yield (Prod./IS) vs Temperature (°C) and Current (mA), saving to "250130_Jieping_experimental_data_Yield_vs_Temperature_and_Current.html"
----------------------------------
axial_x: Rotation (RPM), axial_y: Current (mA), axial_z: Yield (Prod./IS)
Plotting Yield (Prod./IS) vs Rotation (RPM) and Current (mA), saving to "250130_Jieping_experimental_data_Yield_vs_Rotation_and_Current.html"


<!-- @format -->

### code to find the boundaries of the data (for testing)


In [9]:
def bounds(data) -> list:
    return [[min(data[col]), max(data[col])] for col in data.columns]


def bounds_np(data) -> list:
    mins = np.min(data, axis=0)
    maxs = np.max(data, axis=0)
    return [[mins_, maxs_] for mins_, maxs_ in zip(mins, maxs)]


print(f"Using pandas: {bounds(data)}")
print(f"Using numpy: {bounds_np(data.to_numpy(dtype=float))}")

Using pandas: [[30, 75], [50, 502], [8, 42], [19, 76]]
Using numpy: [[np.float64(30.0), np.float64(75.0)], [np.float64(50.0), np.float64(502.0)], [np.float64(8.0), np.float64(42.0)], [np.float64(19.0), np.float64(76.0)]]


<!-- @format -->

### code to generating the linear space datapoints (for testing)


In [10]:
# create the line space
def fullfact_np(bound_array, num_levels: int) -> list:
    return np.array(
        list(
            product(
                *[np.linspace(min_, max_, num_levels) for min_, max_ in bound_array]
            )
        )
    ).tolist()


# create the line space
def fullfact(bound_array, num_levels: int) -> list:
    return np.array(
        list(
            product(
                *[np.linspace(min_, max_, num_levels) for min_, max_ in bound_array]
            )
        )
    ).tolist()


print("testing of generating the 2 level of the fullfactorial design")
print(f"Using pandas: {fullfact(bounds(data.iloc[:, 0:3]), 2)}")
print(f"Using numpy: {fullfact_np(bounds_np(data.iloc[:, 0:3].to_numpy()), 2)}")

testing of generating the 2 level of the fullfactorial design
Using pandas: [[30.0, 50.0, 8.0], [30.0, 50.0, 42.0], [30.0, 502.0, 8.0], [30.0, 502.0, 42.0], [75.0, 50.0, 8.0], [75.0, 50.0, 42.0], [75.0, 502.0, 8.0], [75.0, 502.0, 42.0]]
Using numpy: [[30.0, 50.0, 8.0], [30.0, 50.0, 42.0], [30.0, 502.0, 8.0], [30.0, 502.0, 42.0], [75.0, 50.0, 8.0], [75.0, 50.0, 42.0], [75.0, 502.0, 8.0], [75.0, 502.0, 42.0]]


<!-- @format -->

### preprocess the data, scale the data to -1 to 1


In [11]:
scaler = StandardScaler()
parameters_preprocessed = pd.DataFrame(scaler.fit_transform(data), columns=data.columns)
print("preprocessed data:")
display(parameters_preprocessed)

preprocessed data:


Unnamed: 0,Temperature (°C),Rotation (RPM),Current (mA),Yield (Prod./IS)
0,0.969003,-0.106268,0.0,1.345435
1,-1.065903,-1.211034,-1.007951,-1.49288
2,0.969003,-1.211034,-1.007951,0.158503
3,-1.065903,-1.211034,1.007951,-1.080034
4,0.969003,0.998498,1.007951,1.242224
5,-1.065903,0.998498,-1.007951,0.313321
6,0.969003,0.998498,-1.007951,0.880984
7,-1.065903,0.998498,1.007951,-1.028429
8,0.969003,-0.106268,1.007951,1.087407
9,1.308154,-0.106268,0.0,1.087407


<!-- @format -->

### reverse the scaling back to the original data


In [12]:
parameters_inverse = scaler.inverse_transform(parameters_preprocessed)
parameters_inverse = pd.DataFrame(parameters_inverse, columns=data.columns)
print("inverse back from preprocessed data:")
parameters_inverse

inverse back from preprocessed data:


Unnamed: 0,Temperature (°C),Rotation (RPM),Current (mA),Yield (Prod./IS)
0,70.0,250.0,25.0,76.0
1,40.0,100.0,15.0,21.0
2,70.0,100.0,15.0,53.0
3,40.0,100.0,35.0,29.0
4,70.0,400.0,35.0,74.0
5,40.0,400.0,15.0,56.0
6,70.0,400.0,15.0,67.0
7,40.0,400.0,35.0,30.0
8,70.0,250.0,35.0,71.0
9,75.0,250.0,25.0,71.0


<!-- @format -->

### Fit a Gaussian Regressor on the data (Default kernel)


In [13]:
# Define a kernel with different parameters
kernel = C(1.0, (1e-5, 1e5)) * RBF(10, (1e-3, 1e3))

# Create the Gaussian Process Regressor with a regularization term
regressor = GaussianProcessRegressor(alpha=1e-3, random_state=42)

# Fit the model
regressor.fit(parameters_preprocessed.iloc[:, :3], parameters_preprocessed.iloc[:, 3])

# Print the kernel and score
print(f"Kernel: {regressor.kernel_}")
print(f"Log Marginal Likelihood: {regressor.log_marginal_likelihood_value_}")

Kernel: 1**2 * RBF(length_scale=1)
Log Marginal Likelihood: -15.21759318794614


<!-- @format -->

### function to predict the data using the trained model


In [14]:
# predict the yield
def predict_yield(
    preprocessed_df: pd.DataFrame,
    regressor: GaussianProcessRegressor,
    scaler: StandardScaler,
    num_levels: int = 2,
) -> pd.DataFrame:
    # create the line space
    line_space = fullfact(bounds(preprocessed_df.iloc[:, 0:3]), num_levels)
    line_space_df = pd.DataFrame(line_space, columns=header[1:4])
    predicted_yield, std = regressor.predict(line_space_df, return_std=True)

    # concatenate the predicted yield and the line space
    predicted_yield = np.concatenate(
        (line_space, predicted_yield.reshape(-1, 1)), axis=1
    )
    # inverse the scaling
    predicted_yield = scaler.inverse_transform(predicted_yield)
    predicted_yield = pd.DataFrame(predicted_yield, columns=preprocessed_df.columns)
    return predicted_yield, std


predicted_yield, std = predict_yield(
    preprocessed_df=parameters_preprocessed,
    regressor=regressor,
    scaler=scaler,
    num_levels=30,
)

<!-- @format -->

### the predicted data


In [15]:
predicted_yield

Unnamed: 0,Temperature (°C),Rotation (RPM),Current (mA),Yield (Prod./IS)
0,30.0,50.0,8.000000,34.459060
1,30.0,50.0,9.172414,32.929334
2,30.0,50.0,10.344828,31.464512
3,30.0,50.0,11.517241,30.099712
4,30.0,50.0,12.689655,28.867348
...,...,...,...,...
26995,75.0,502.0,37.310345,63.689534
26996,75.0,502.0,38.482759,62.942591
26997,75.0,502.0,39.655172,62.088434
26998,75.0,502.0,40.827586,61.148912


In [16]:
print("max predicted condition:")
display(predicted_yield.max())

max predicted condition:


Temperature (°C)     75.000000
Rotation (RPM)      502.000000
Current (mA)         42.000000
Yield (Prod./IS)     80.573833
dtype: float64

In [17]:
def plot_original_and_predicted_data(
    data: pd.DataFrame,
    predicted_data: dict,
    axial_x: str,
    axial_y: str,
    axial_z: str,
    axial_meta: list,
) -> go.Figure:
    # plot original data
    fig = plot_original_data(
        data=data,
        axial_x=axial_x,
        axial_y=axial_y,
        axial_z=axial_z,
        axial_meta=axial_meta,
    )

    # concat all the meta data into one string, formatted as {header1 name}: data1\n{header2 name}: data2\n...
    predicted_data_meta = {}
    for key, value in predicted_data.items():
        predicted_data_meta[key] = []
        for i in range(len(value[0])):
            predicted_data_meta[key].append(
                "<br>".join(
                    [
                        f"{header}: {value[0].iloc[i][header]}"
                        for header in axial_meta
                        if header in value[0].columns
                    ]
                )
            )
    # plot the predicted data
    # first find the max and min value of the predicted data, remove them
    overall_max_df = None
    overall_max_df_key = None
    overall_max_index = None

    overall_min_df = None
    overall_min_df_key = None
    overall_min_index = None
    # go through every key value pair in the predicted data, record the max and min index and corresponding data
    for key, value in predicted_data.items():
        df = value[0]
        current_max_index = df[axial_z].idxmax()
        current_min_index = df[axial_z].idxmin()
        if (
            overall_max_df is None
            or df[axial_z][current_max_index]
            > overall_max_df[axial_z][overall_max_index]
        ):
            overall_max_df = df
            overall_max_df_key = key
            overall_max_index = current_max_index
        if (
            overall_min_df is None
            or df[axial_z][current_min_index]
            < overall_min_df[axial_z][overall_min_index]
        ):
            overall_min_df = df
            overall_min_df_key = key
            overall_min_index = current_min_index
    # save the max and min value
    if overall_max_df is not None:
        overall_max_row = overall_max_df.iloc[overall_max_index]
        overall_max_df = overall_max_df.drop(overall_max_index)
        overall_max_meta = predicted_data_meta[overall_max_df_key][overall_max_index]
        # plot the max value
        fig.add_trace(
            go.Scatter3d(
                x=[overall_max_row[axial_x]],
                y=[overall_max_row[axial_y]],
                z=[overall_max_row[axial_z]],
                meta=overall_max_meta,
                mode="markers",
                marker=dict(size=6, color="gold", opacity=1.0, symbol="diamond"),
                name="Max Predicted Yield",
                hovertemplate=f"{axial_x}: %{{x}}<br>{axial_y}: %{{y}}<br>{axial_z}: %{{z}}<br>%{{meta}}",
            )
        )
    if overall_min_df is not None:
        overall_min_row = overall_min_df.iloc[overall_min_index]
        overall_min_df = overall_min_df.drop(overall_min_index)
        overall_min_meta = predicted_data_meta[overall_min_df_key][overall_min_index]
        fig.add_trace(
            go.Scatter3d(
                x=[overall_min_row[axial_x]],
                y=[overall_min_row[axial_y]],
                z=[overall_min_row[axial_z]],
                meta=overall_min_meta,
                mode="markers",
                marker=dict(size=6, color="maroon", opacity=1.0, symbol="diamond"),
                name="Min Predicted Yield",
                hovertemplate=f"{axial_x}: %{{x}}<br>{axial_y}: %{{y}}<br>{axial_z}: %{{z}}<br>%{{meta}}",
            )
        )

    # key is the name, value is the pandas dataframe for the data
    for key, value in predicted_data.items():
        df = value[0]
        fig.add_trace(
            go.Scatter3d(
                x=df[axial_x],
                y=df[axial_y],
                z=df[axial_z],
                meta=predicted_data_meta[key],
                mode="markers",
                marker=dict(size=3, color=value[1], opacity=0.5, symbol="circle"),
                name=f"{key}",
                hovertemplate=f"{axial_x}: %{{x}}<br>{axial_y}: %{{y}}<br>{axial_z}: %{{z}}<br>%{{meta}}",
            )
        )

    # Improve layout
    fig.update_layout(
        font=dict(size=12, family="Arial", color="black", weight="bold"),  # font size
        title=dict(
            text=f"{axial_z} vs {axial_x} and {axial_y} plot with predicted data",
            automargin=True,
            pad=dict(t=50, b=50),  # top and bottom padding
            font=dict(size=24, weight="bold"),  # title font size
        ),
        title_automargin=True,
        hoverlabel_font=dict(
            size=12, family="Arial", weight="normal"
        ),  # hover font size
        legend=dict(
            x=0.05,
            y=0.99,
            bgcolor="rgba(255, 255, 255, 0.5)",
            bordercolor="black",
            borderwidth=1,
            font=dict(size=16, weight="bold"),
        ),
        scene=dict(
            xaxis=dict(
                title=f"{axial_x}", showbackground=True, backgroundcolor="lightblue"
            ),
            yaxis=dict(
                title=f"{axial_y}", showbackground=True, backgroundcolor="lightcoral"
            ),
            zaxis=dict(
                title=f"{axial_z}",
                showbackground=True,
                backgroundcolor="lightgreen",
                nticks=10,
                range=[0, 100],
            ),
            aspectmode="cube",
        ),
        margin=dict(autoexpand=True, l=50, r=50, t=50, b=50),
        template="plotly_white",
    )

    return fig

In [18]:
def plot_combinations_with_Gaussian_regression_all(
    data: pd.DataFrame,
    predicted_data: pd.DataFrame,
    columns_to_iterate: list,
    fixed_columns: list,
    note_column: str,
) -> None:
    # now do the same trick, plot every unique combination of the first 3 columns against the rest of the columns
    for x, y in list(combinations(columns_to_iterate, 2)):
        for z in fixed_columns:
            # found the columns that is not in the combination
            extra = list(set(columns_to_iterate) - set([x, y, z]))
            extra.append(note_column)

            print(f"Plotting {z} vs {x} and {y} after Gaussian Regression")
            fig = plot_original_and_predicted_data(
                data=data,
                predicted_data={"Predicted Data Points": [predicted_data, "green"]},
                axial_x=x,
                axial_y=y,
                axial_z=z,
                axial_meta=extra,
            )

            # save the plot
            fig_meta = fig["layout"]["meta"]
            axial_z = fig_meta["axial_z"].split(" ")[0]
            axial_x = fig_meta["axial_x"].split(" ")[0]
            axial_y = fig_meta["axial_y"].split(" ")[0]

            save_fig(
                fig,
                f"{prefix}_predicted_all_{axial_z}_vs_{axial_x}_and_{axial_y}.html",
                auto_open=True,
            )

In [19]:
plot_combinations_with_Gaussian_regression_all(
    data=data_origin,
    predicted_data=predicted_yield,
    columns_to_iterate=list(data.columns[:3]),
    fixed_columns=list(data.columns[3:]),
    note_column="Note",
)

Plotting Yield (Prod./IS) vs Temperature (°C) and Rotation (RPM) after Gaussian Regression
axial_x: Temperature (°C), axial_y: Rotation (RPM), axial_z: Yield (Prod./IS)
Plotting Yield (Prod./IS) vs Temperature (°C) and Current (mA) after Gaussian Regression
axial_x: Temperature (°C), axial_y: Current (mA), axial_z: Yield (Prod./IS)
Plotting Yield (Prod./IS) vs Rotation (RPM) and Current (mA) after Gaussian Regression
axial_x: Rotation (RPM), axial_y: Current (mA), axial_z: Yield (Prod./IS)


In [20]:
# generalize the function to filter either the max or min value
def filter_min_max(
    parameter: pd.DataFrame,
    predicted_yield: pd.Series,
    mode: str,
    x_axis_header: str,
    y_axis_header: str,
) -> pd.DataFrame:
    # Create a dictionary to store the highest yield for each (x, y) pair
    filtered_points = defaultdict(
        lambda: float("-inf") if mode == "max" else float("inf")
    )

    # Iterate over the points to keep the highest yield for each (x, y) pair
    for i in range(len(predicted_yield)):
        x, y, z = (
            parameter[x_axis_header][i],
            parameter[y_axis_header][i],
            predicted_yield[i],
        )
        if mode == "max":
            if z > filtered_points[(x, y)]:
                filtered_points[(x, y)] = z
        elif mode == "min":
            if z < filtered_points[(x, y)]:
                filtered_points[(x, y)] = z

    # Extract the filtered data
    filtered_x = []
    filtered_y = []
    filtered_z = []

    for (x, y), z in filtered_points.items():
        filtered_x.append(x)
        filtered_y.append(y)
        filtered_z.append(z)

    # generated header for the return dataframe, it will be the x, y, z columns corresponding header
    header_x = data.columns[data.columns.get_loc(x_axis_header)]
    header_y = data.columns[data.columns.get_loc(y_axis_header)]
    header_z = predicted_yield.name

    header = [header_x, header_y, header_z]

    return pd.DataFrame(list(zip(filtered_x, filtered_y, filtered_z)), columns=header)

In [21]:
def plot_combinations_with_Gaussian_regression_only_max_and_min_points(
    data: pd.DataFrame,
    predicted_yield: pd.DataFrame,
    columns_to_iterate: list,
    fixed_columns: list,
    note_column: str,
) -> None:
    # now do the same trick, plot every unique combination of the first 3 columns against the rest of the columns
    for x, y in list(combinations(columns_to_iterate, 2)):
        # pick these two columns out of the dataframes
        parameter = predicted_yield[[x, y]]
        for z in fixed_columns:
            # found the columns that is not in the combination
            extra = list(set(columns_to_iterate) - set([x, y, z]))
            extra.append(note_column)

            predicted_yield_Series = predicted_yield["Yield (Prod./IS)"]
            print(
                f"Plotting {z} vs {x} and {y} after Gaussian Regression with only the max and min values"
            )
            filter_predicted_yield_max = filter_min_max(
                parameter=parameter,
                predicted_yield=predicted_yield_Series,
                mode="max",
                x_axis_header=x,
                y_axis_header=y,
            )
            filter_predicted_yield_min = filter_min_max(
                parameter=parameter,
                predicted_yield=predicted_yield_Series,
                mode="min",
                x_axis_header=x,
                y_axis_header=y,
            )

            fig = plot_original_and_predicted_data(
                data=data,
                predicted_data={
                    "Max Predicted Data Points": [filter_predicted_yield_max, "green"],
                    "Min Predicted Data Points": [filter_predicted_yield_min, "red"],
                },
                axial_x=x,
                axial_y=y,
                axial_z=z,
                axial_meta=extra,
            )

            # save the plot
            fig_meta = fig["layout"]["meta"]
            axial_z = fig_meta["axial_z"].split(" ")[0]
            axial_x = fig_meta["axial_x"].split(" ")[0]
            axial_y = fig_meta["axial_y"].split(" ")[0]

            save_fig(
                fig,
                f"{prefix}_predicted_only_max_and_min_{axial_z}_vs_{axial_x}_and_{axial_y}_.html",
                auto_open=True,
            )


plot_combinations_with_Gaussian_regression_only_max_and_min_points(
    data=data_origin,
    predicted_yield=predicted_yield,
    columns_to_iterate=list(data.columns[:3]),
    fixed_columns=list(data.columns[3:]),
    note_column="Note",
)

Plotting Yield (Prod./IS) vs Temperature (°C) and Rotation (RPM) after Gaussian Regression with only the max and min values
axial_x: Temperature (°C), axial_y: Rotation (RPM), axial_z: Yield (Prod./IS)
Plotting Yield (Prod./IS) vs Temperature (°C) and Current (mA) after Gaussian Regression with only the max and min values
axial_x: Temperature (°C), axial_y: Current (mA), axial_z: Yield (Prod./IS)
Plotting Yield (Prod./IS) vs Rotation (RPM) and Current (mA) after Gaussian Regression with only the max and min values
axial_x: Rotation (RPM), axial_y: Current (mA), axial_z: Yield (Prod./IS)


In [22]:
def plot_combinations_with_Gaussian_regression_as_surface_plot(
    data: pd.DataFrame,
    predicted_yield: pd.DataFrame,
    columns_to_iterate: list,
    fixed_columns: list,
    note_column: str,
) -> None:
    for x, y in list(combinations(columns_to_iterate, 2)):
        # pick these two columns out of the dataframes
        parameter = predicted_yield[[x, y]]
        for z in fixed_columns:
            # found the columns that is not in the combination
            extra = list(set(columns_to_iterate) - set([x, y, z]))
            extra.append(note_column)

            predicted_yield_Series = predicted_yield["Yield (Prod./IS)"]

            print(f"Surface plot {z} vs {x} and {y} after Gaussian Regression")
            filter_predicted_yield_max = filter_min_max(
                parameter=parameter,
                predicted_yield=predicted_yield_Series,
                mode="max",
                x_axis_header=x,
                y_axis_header=y,
            )
            # Create grid data for surface plot
            grid_x, grid_y = np.meshgrid(
                np.linspace(
                    min(parameter[x]),
                    max(parameter[x]),
                    100,
                ),
                np.linspace(
                    min(parameter[y]),
                    max(parameter[y]),
                    100,
                ),
            )
            # Interpolate the z values onto the grid
            grid_z = griddata(
                (filter_predicted_yield_max[x], filter_predicted_yield_max[y]),
                filter_predicted_yield_max[z],
                (grid_x, grid_y),
                method="cubic",
            )
            fig = plot_original_data(
                data=data,
                axial_x=x,
                axial_y=y,
                axial_z=z,
                axial_meta=extra,
            )

            # mark the max value
            max_index = filter_predicted_yield_max[z].idxmax()
            fig.add_scatter3d(
                x=[filter_predicted_yield_max[x][max_index]],
                y=[filter_predicted_yield_max[y][max_index]],
                z=[filter_predicted_yield_max[z][max_index]],
                mode="markers",
                marker=dict(size=8, color="gold", opacity=1.0, symbol="diamond"),
                name="Max Predicted Data Points",
                hovertemplate=f"{x}: %{{x}}<br>{y}: %{{y}}<br>{z}: %{{z}}",
            )
            # Create the surface plot
            fig.add_surface(
                x=grid_x,
                y=grid_y,
                z=grid_z,
                showscale=False,
                colorscale="Viridis",
                opacity=0.80,
                name="Surface Plot",
                showlegend=True,
                hovertemplate=f"{x}: %{{x}}<br>{y}: %{{y}}<br>{z}: %{{z}}",
            )
            # Improve layout
            fig.update_layout(
                font=dict(
                    size=12, family="Arial", color="black", weight="bold"
                ),  # font size
                title=dict(
                    text=f"{z} vs {x} and {y} plot with predicted surface plot",
                    automargin=True,
                    pad=dict(t=50, b=50),  # top and bottom padding
                    font=dict(size=24, weight="bold"),  # title font size
                ),
                hoverlabel_font=dict(
                    size=12, family="Arial", weight="normal"
                ),  # hover font size
                legend=dict(
                    x=0.05,
                    y=0.99,
                    bgcolor="rgba(255, 255, 255, 0.5)",
                    bordercolor="black",
                    borderwidth=1,
                    font=dict(size=16, weight=1000),
                ),
                scene=dict(
                    xaxis=dict(
                        title=x, showbackground=True, backgroundcolor="lightblue"
                    ),
                    yaxis=dict(
                        title=y, showbackground=True, backgroundcolor="lightyellow"
                    ),
                    zaxis=dict(
                        title=z,
                        showbackground=True,
                        backgroundcolor="lightgreen",
                        nticks=10,
                        range=[0, 100],
                    ),
                    aspectmode="cube",
                ),
                margin=dict(autoexpand=True, l=50, r=50, t=50, b=50),
                template="plotly_white",
                meta=dict(
                    axial_x=x,
                    axial_y=y,
                    axial_z=z,
                ),
            )

            # save the plot
            fig_meta = fig["layout"]["meta"]
            axial_z = fig_meta["axial_z"].split(" ")[0]
            axial_x = fig_meta["axial_x"].split(" ")[0]
            axial_y = fig_meta["axial_y"].split(" ")[0]

            # save the plot
            save_fig(
                fig,
                f"{prefix}_surface_plot_{axial_z}_vs_{axial_x}_and_{axial_y}.html",
            )


plot_combinations_with_Gaussian_regression_as_surface_plot(
    data=data_origin,
    predicted_yield=predicted_yield,
    columns_to_iterate=list(data.columns[:3]),
    fixed_columns=list(data.columns[3:]),
    note_column="Note",
)

Surface plot Yield (Prod./IS) vs Temperature (°C) and Rotation (RPM) after Gaussian Regression
axial_x: Temperature (°C), axial_y: Rotation (RPM), axial_z: Yield (Prod./IS)
Surface plot Yield (Prod./IS) vs Temperature (°C) and Current (mA) after Gaussian Regression
axial_x: Temperature (°C), axial_y: Current (mA), axial_z: Yield (Prod./IS)
Surface plot Yield (Prod./IS) vs Rotation (RPM) and Current (mA) after Gaussian Regression
axial_x: Rotation (RPM), axial_y: Current (mA), axial_z: Yield (Prod./IS)
