<a href="https://colab.research.google.com/github/francji1/01NAEX/blob/main/code/01NAEX_Exercise_09_python_student_solution_VR.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# O1NAEX Exercise 09

author: Vendula Rusá

In [None]:
!pip install pyDOE3

In [None]:
import itertools
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pyDOE3 import ff2n
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.graphics.factorplots import interaction_plot
from statsmodels.stats.anova import anova_lm
from scipy import stats as st

plt.style.use("seaborn-v0_8-whitegrid")
plt.rcParams["figure.figsize"] = (8, 5)

In [None]:
def contour_slices(model, x_var, y_var, fixed_var, fixed_levels, grid_points=41, cmap="viridis", title=""):
    all_vars = {"A_num", "C_num", "D_num"}
    xs = np.linspace(-1, 1, grid_points)
    ys = np.linspace(-1, 1, grid_points)
    X, Y = np.meshgrid(xs, ys)
    fig, axes = plt.subplots(1, len(fixed_levels), figsize=(5 * len(fixed_levels), 4), sharex=True, sharey=True)
    if not isinstance(axes, np.ndarray):
        axes = np.array([axes])
    levels = None
    cs = None

    for ax, lvl in zip(axes, fixed_levels):
        data = pd.DataFrame({x_var: X.ravel(), y_var: Y.ravel()})
        for var in all_vars - {x_var, y_var, fixed_var}:
            data[var] = 0.0
        data[fixed_var] = lvl
        Z = model.predict(data).to_numpy().reshape(X.shape)
        if levels is None:
            levels = np.linspace(Z.min(), Z.max(), 20)
        cs = ax.contourf(xs, ys, Z, levels=levels, cmap=cmap)
        ax.set_title(f"{fixed_var.replace('_num', '').upper()} = {lvl}")
        ax.set_xlabel(x_var.replace('_num', ''))
        ax.set_ylabel(y_var.replace('_num', ''))

    fig.suptitle(title)
    if cs is not None:
        fig.subplots_adjust(right=0.85) # Adjust subplot parameters to make room for colorbar
        cbar_ax = fig.add_axes([0.88, 0.15, 0.04, 0.7]) # Add a new axes for the colorbar
        fig.colorbar(cs, cax=cbar_ax, label="Predicted Rate")


##	Problem 11.8
from the chapter 11 -  D. C. Montgomery DAoE - 8. edition.

The data  were collected in	an experiment to optimize crystal growth as a function of	three variables $x_1$, $x_2$, and $x_3$. Large values of y (`Yield` in grams)	are desirable. Fit a second-order model and analyze the fitted surface. Under what set of conditions is maximum growth achieved?

In [None]:
# !pip install pyDOE3
# from pyDOE3 import ff2n

In [None]:
import pandas as pd

df118 = pd.read_csv("https://raw.githubusercontent.com/francji1/01NAEX/main/data/Ex118.csv", sep=";")
df118

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(15, 5))

plt.subplot(1, 3, 1)
sns.scatterplot(data=df118, x='x1', y='Yield')
plt.title('Yield vs x1')

plt.subplot(1, 3, 2)
sns.scatterplot(data=df118, x='x2', y='Yield')
plt.title('Yield vs x2')

plt.subplot(1, 3, 3)
sns.scatterplot(data=df118, x='x3', y='Yield')
plt.title('Yield vs x3')

plt.tight_layout()
plt.show()

In [None]:
formula = "Yield ~ x1 + x2 + x3 + I(x1**2) + I(x2**2) + I(x3**2) + x1:x2 + x1:x3 + x2:x3"
model = smf.ols(formula, data=df118)
results = model.fit()
print(results.summary())

In [None]:
formula_sig = "Yield ~  x2 + x3  + I(x2**2) + I(x3**2) "
model_sig = smf.ols(formula_sig, data=df118).fit()
print(model_sig.summary())

In [None]:
from numpy.linalg import inv, eig

# Extract Vectors and Matrices for Canonical Analysis
# As per lecture slides: y_hat = b0 + x'b + x'Bx [cite: 600]

# Intercept (beta_0)
b0 = model_sig.params['Intercept']

# Vector b (linear coefficients: beta_2, beta_3)
b = np.array([
    model_sig.params['x2'],
    model_sig.params['x3']
])

# Matrix B (quadratic and interaction coefficients)

B = np.array([
    [   model_sig.params['I(x2 ** 2)'],  0],
    [   0, model_sig.params['I(x3 ** 2)']]
])

print("--- Matrix B (Quadratic & Interaction) ---")
print(B)
print("\n")

# Calculate Stationary Point
# Formula: x_s = -0.5 * inv(B) * b  [cite: 604]
x_stationary = -0.5 * np.dot(inv(B), b)

print(f"--- Stationary Point (Coded Units) ---")
print(f"x2: {x_stationary[0]:.4f}")
print(f"x3: {x_stationary[1]:.4f}")
print("\n")

# Predict Yield at Stationary Point
# Formula: y_s = b0 + 0.5 * x_s' * b  [cite: 606]
y_stationary = b0 + 0.5 * np.dot(x_stationary.T, b)
print(f"--- Predicted Maximum Yield ---")
print(f"Yield: {y_stationary:.4f}")
print("\n")

# Characterize the Stationary Point (Eigenvalues)
# The signs of the eigenvalues of B tell us the nature of the surface:
# All negative -> Maximum
# All positive -> Minimum
# Mixed signs  -> Saddle Point
eigenvalues, eigenvectors = eig(B)

print("--- Eigenvalues of B ---")
print(eigenvalues)

if np.all(eigenvalues < 0):
    print("\nResult: The stationary point is a MAXIMUM.")
elif np.all(eigenvalues > 0):
    print("\nResult: The stationary point is a MINIMUM.")
else:
    print("\nResult: The stationary point is a SADDLE POINT.")

In [None]:
# Create grid for contour plot
x2_range = np.linspace(df118["x2"].min(), df118["x2"].max(), 100)
x3_range = np.linspace(df118["x3"].min(), df118["x3"].max(), 100)
X2, X3 = np.meshgrid(x2_range, x3_range)

# Predict Yield across the grid
grid = {
    "x2": X2.ravel(),
    "x3": X3.ravel()
}
Y_pred = model_sig.predict(grid).values.reshape(X2.shape)

# Plot contour
plt.figure(figsize=(8,6))
contour = plt.contourf(X2, X3, Y_pred, levels=30, cmap="viridis")
plt.colorbar(contour)
plt.xlabel("x2")
plt.ylabel("x3")
plt.title("Contour Plot of Predicted Yield")
plt.show()

Best choice of conditions for maximazing the yield is to set x2=0.0565, x3=-0.0810.  Since x1 is insignificant, it can be set to any value in the valid range $[-1, 1]$.



##	Problem 11.12
from the chapter 11 -  D. C. Montgomery DAoE - 8. edition.

Consider the three-variable central composite design. Analyze the data and draw conclusions, assuming that we wish to maximize `Conversion` ($y_1$) with			`Activity` ($y_2$) between 55 and 60	achieved?

In [None]:
df118 = pd.read_csv("https://raw.githubusercontent.com/francji1/01NAEX/main/data/Ex1112.csv", sep=";")
df118

In [None]:
form_quad = (
    "Response ~ Time + Temperature + Catalyst + "
    "I(Time**2) + I(Temperature**2) + I(Catalyst**2) + "
    "Time:Temperature + Time:Catalyst + Temperature:Catalyst"
)

# Model for Conversion
model_conv = smf.ols(form_quad.replace("Response","Conversion"), data=df118).fit()
print("\n=== Conversion Model Summary ===")
print(model_conv.summary())
# Conversion model ANOVA
anova_conv = sm.stats.anova_lm(model_conv, typ=2)
print("=== Conversion Model ANOVA ===")
print(anova_conv)

In [None]:
# Model for Activity
model_act = smf.ols(form_quad.replace("Response","Activity"), data=df118).fit()
print("\n=== Activity Model Summary ===")
print(model_act.summary())

# Activity model ANOVA
anova_act = sm.stats.anova_lm(model_act, typ=2)
print("\n=== Activity Significant Model ANOVA ===")
print(anova_act)

In [None]:
# Model for Activity without insignificant factors
model_act_sig = smf.ols("Activity ~ Time + Catalyst + I(Time**2)", data=df118).fit()
print("\n=== Activity Model Summary ===")
print(model_act_sig.summary())

In [None]:
def perform_canonical_analysis(model):
    """
    Performs Canonical Analysis on a fitted statsmodels object.
    Automatically detects active variables and constructs b and B matrices.
    Returns the stationary point (x_stationary) and the predicted value at that point.
    """
    print(f"\n{'='*20} CANONICAL ANALYSIS {'='*20}")

    # 1. Identify Model Variables (excluding Intercept)
    params = model.params
    all_terms = params.index.tolist()

    # Filter for linear terms (assuming simple names like 'x1', 'x2', 'Time', 'Temperature')
    linear_vars = [term for term in all_terms if
                   term != 'Intercept' and
                   ':' not in term and
                   'I(' not in term]

    linear_vars = sorted(linear_vars)
    dim = len(linear_vars)

    print(f"Active Variables: {linear_vars}")

    # 2. Construct b Vector (Linear Coefficients)
    b = np.array([params[var] for var in linear_vars])

    # 3. Construct B Matrix (Quadratic & Interaction Coefficients)
    B = np.zeros((dim, dim))

    for i, var_i in enumerate(linear_vars):
        for j, var_j in enumerate(linear_vars):
            if i == j:
                # Quadratic Term: Look for I(var**2)
                quad_term = f"I({var_i} ** 2)"
                if quad_term not in params: quad_term = f"I({var_i}**2)"

                if quad_term in params:
                    B[i, j] = params[quad_term]
            elif i < j:
                # Interaction Term: Look for var_i:var_j or var_j:var_i
                inter_term = f"{var_i}:{var_j}"
                if inter_term not in params: inter_term = f"{var_j}:{var_i}"

                if inter_term in params:
                    val = 0.5 * params[inter_term]
                    B[i, j] = val
                    B[j, i] = val # Symmetric

    print("\n--- Matrix B (Quadratic/Interaction) ---")
    print(B)

    # 4. Calculate Stationary Point: x_s = -0.5 * inv(B) * b
    try:
        B_inv = inv(B)
        x_stationary = -0.5 * np.dot(B_inv, b)

        print("\n--- Stationary Point (Coded Units) ---")
        for var, val in zip(linear_vars, x_stationary):
            print(f"{var}: {val:.4f}")

        # 5. Predict Response at Stationary Point
        b0 = params['Intercept']
        y_stationary = b0 + 0.5 * np.dot(x_stationary, b)
        print(f"\n--- Predicted Response at Stationary Point ---")
        print(f"Value: {y_stationary:.4f}")

        # 6. Eigenvalues
        eigenvalues, _ = eig(B)
        if np.all(eigenvalues < 0):
            print("Result: The stationary point is a MAXIMUM.")
        elif np.all(eigenvalues > 0):
            print("Result: The stationary point is a MINIMUM.")
        else:
            print("Result: The stationary point is a SADDLE POINT.")

        return linear_vars, x_stationary, y_stationary

    except np.linalg.LinAlgError:
        print("\n[!] Matrix B is Singular.")
        return None, None, None

In [None]:
# Find the ideal point for Conversion
# (This assumes 'model_conv' is your fitted model for Conversion)
vars_opt, x_opt, y_opt = perform_canonical_analysis(model_conv)

# Check Constraints (Activity between 55 and 60)
if x_opt is not None:
    # Create a single-row DataFrame for prediction
    # We map the optimal values found (x_opt) to their variable names (vars_opt)
    pred_input = {var: [val] for var, val in zip(vars_opt, x_opt)}
    pred_df = pd.DataFrame(pred_input)

    # Predict Activity at this exact point
    # (assumes 'model_act_sig' is your fitted Activity model)
    activity_at_peak = model_act_sig.predict(pred_df)[0]

    print(f"\n{'='*20} CONSTRAINT CHECK {'='*20}")
    print(f"At the maximum Conversion ({y_opt:.2f}%):")
    print(f"Predicted Activity = {activity_at_peak:.2f}")


In [None]:
# --------------------------------------------------------
# EXAMINE CONVERSION SLICES AT DIFFERENT TEMPERATURES
# --------------------------------------------------------
temp_levels = [-1, 0, 1]

contour_slices(
    model=model_conv,
    x_var="Time",
    y_var="Catalyst",
    fixed_var="Temperature",
    fixed_levels=temp_levels,
    title="Conversion at Different Temperature Levels"
)
plt.show()

# --------------------------------------------------------
# PLOT ACTIVITY CONTOUR (only function of Time & Catalyst)
# --------------------------------------------------------
print("\nPlotting Activity contour...")
xs = np.linspace(-1, 1, 100)
ys = np.linspace(-1, 1, 100)
X, Y = np.meshgrid(xs, ys)
dfA = pd.DataFrame({"Time": X.ravel(), "Catalyst": Y.ravel(), "Temperature": 0.0})
Z_act = model_act_sig.predict(dfA).to_numpy().reshape(X.shape)

plt.figure(figsize=(6,5))
cp = plt.contourf(xs, ys, Z_act, levels=20, cmap="turbo")
plt.colorbar(cp, label="Activity")
plt.title("Activity Response Surface (Time vs Catalyst)")
plt.xlabel("Time")
plt.ylabel("Catalyst")
plt.show()


In [None]:
temp_levels = [-1, 0, 1]   # Temperature slices
activity_low = 55
activity_high = 60

# --------------------------------------------------------
# PREPARE GRID (Time, Catalyst)
# --------------------------------------------------------
xs = np.linspace(-1, 1, 121)
ys = np.linspace(-1, 1, 121)
X, Y = np.meshgrid(xs, ys)

# Activity surface is temperature-independent
dfA = pd.DataFrame({
    "Time": X.ravel(),
    "Catalyst": Y.ravel(),
    "Temperature": 0.0
})
Z_act = model_act_sig.predict(dfA).to_numpy().reshape(X.shape)

# --------------------------------------------------------
# GENERATE OVERLAY PLOTS FOR EACH TEMPERATURE SLICE
# --------------------------------------------------------
# Mask for Activity outside 55–60
mask = (Z_act < 55) | (Z_act > 60)
Z_white = np.ones_like(Z_act) * np.nan
Z_white[mask] = 1  # will be plotted in white

# Create subplots
fig, axes = plt.subplots(1, 3, figsize=(18, 5), sharex=True, sharey=True)

for ax, T_level in zip(axes, temp_levels):
    # Conversion at this T slice
    dfC = pd.DataFrame({
        "Time": X.ravel(),
        "Catalyst": Y.ravel(),
        "Temperature": T_level
    })
    Z_conv = model_conv.predict(dfC).to_numpy().reshape(X.shape)

    # --- 1) Conversion base layer ---
    cs1 = ax.contourf(xs, ys, Z_conv, 30, cmap="viridis", alpha=0.9)

    # --- 2) White overlay for Activity outside 55–60 ---
    ax.contourf(xs, ys, Z_white, levels=1, colors=["white"])

    # --- 3) White contours for Activity thresholds ---
    ax.contour(xs, ys, Z_act, levels=[55, 60], colors="white", linewidths=2)
    cp2 = ax.contour(xs, ys, Z_act, levels=10, colors="white", linewidths=1)
    ax.clabel(cp2, inline=True, fontsize=8)

    ax.set_title(f"Temperature = {T_level}")
    ax.set_xlabel("Time")
    ax.set_ylabel("Catalyst")

# Add a single colorbar for Conversion
fig.colorbar(cs1, ax=axes, orientation='vertical', label='Conversion')
plt.suptitle("Overlay Plots: Conversion + Activity (White area = Activity outside 55–60)", fontsize=16)
#plt.tight_layout(rect=[0, 0, 0.95, 0.95])
plt.show()

We can see the desirable seting in the overlay plot with  Temperature set to -1.