# Example for the usage of categorial variables in DOE 

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
import matplotlib

from bofire.data_models.constraints.api import (
    LinearEqualityConstraint,
    LinearInequalityConstraint,
)
from bofire.data_models.domain.api import Domain
from bofire.data_models.features.api import ContinuousInput, ContinuousOutput, CategoricalInput, DiscreteInput, ContinuousBinaryInput
from bofire.strategies.doe.design import find_local_max_ipopt, find_find_local_max_ipopt_binary_naive

In [None]:
def visualize_example(x, y):
    fig = plt.figure(figsize=((10,10)))
    ax = fig.add_subplot()
    ax.set_title("Linear model")
    ax.set_xlabel("$x_1$")
    ax.set_ylabel("$x_2$")
    plt.rcParams["figure.figsize"] = (10,8)

    #plot feasible polytope
    ax.plot(
        [10, 0, 0],
        [0, 0, 15],
        linewidth=2
    )
    ax.plot(
        [-5, 10],
        [10, -5],
        linewidth=2
    )
    ax.plot(
        [0, 4],
        [15, -5],
        linewidth=2
    )
    ax.plot(
        [0, 5],
        [-2, 3],
        linewidth=2
    )

    #plot D-optimal solutions
    ax.scatter(
        x=x,
        y=y,
        marker="o",
        s=40,
        color="orange",
        label="optimal_design solution, 12 points"
    )

    plt.legend()

This will fail, as categorial varibales can not be used in DOE. In addtition there is no option within DoE which allowes categorical variabels within the constraints, hence even even if the DoE would allow categorical variables within its domain, there would be no option to make use of them.

In [None]:
domain = Domain(
   inputs = [
    ContinuousInput(key="x1", bounds = (-1,1)),
    ContinuousInput(key="x2", bounds = (0.1, 1)),
    ContinuousInput(key="x3", bounds = (0, 0.6)),
    CategoricalInput(key="k1", categories=["A", "B", "C"], allowed=[True, True, True])
    ],
   outputs = [ContinuousOutput(key="y")],
   constraints = [
       LinearInequalityConstraint(features=["x1","x2","x3"], coefficients=[1,1,1], rhs=1),
       LinearInequalityConstraint(features=["x1","x2"], coefficients=[5,4], rhs=3.9),
       LinearInequalityConstraint(features=["x1","x2"], coefficients=[-20,5], rhs=-3)
   ]
)

d_optimal_design = find_local_max_ipopt(domain, "linear", n_experiments=12, ipopt_options={"disp":0}).to_numpy().T

## Use binary variables to encode categorical variables
This is WIP. There are no binary variables yet and there is no mapping between binary variables to categorical variables.
In this example we have three constraints a: x1 + x2 <= 5, b: x1 + 0.2 x2 <= 3, c: x1 - x2 <= 2 and 0 <= x1 <= 5, 0 <= x2 <= 15.
We want to have only two of these three constraints active.

In [None]:
domain = Domain(
   inputs = [
    ContinuousInput(key="x1", bounds = (0,5)),
    ContinuousInput(key="x2", bounds = (0, 15)),
    ],
   outputs = [ContinuousOutput(key="y")],
   constraints = [
       LinearInequalityConstraint(features=["x1","x2"], coefficients=[1, 1], rhs=5),
       LinearInequalityConstraint(features=["x1","x2"], coefficients=[1, 0.2], rhs=3),
       LinearInequalityConstraint(features=["x1","x2"], coefficients=[1, -1], rhs=2),
   ]
)

d_optimal_design = find_local_max_ipopt(domain, "linear", n_experiments=12, ipopt_options={"disp":0}).to_numpy().T

In [None]:
visualize_example(d_optimal_design[0], d_optimal_design[1])

When having binary variables in the optimization problem, we need to relax them to solve the optimization problem efficiently. The relaxation of binary variables is acchieved by relaxing to the interval [0,1]. Hence, for showcasing the relaxed optimization problem we can use continous variables bound on the interval [0,1] to immitate the relaxed binary varaibles. Try out what happens, if you fix the bounds of "a1" and "a2" to 0 or 1

In [None]:
domain = Domain(
   inputs = [
    ContinuousInput(key="x1", bounds = (0,5)),
    ContinuousInput(key="x2", bounds = (0, 15)),
    ContinuousInput(key="a1", bounds = (0,1)),
    ContinuousInput(key="a2", bounds = (0,1))
    ],
   outputs = [ContinuousOutput(key="y")],
   constraints = [
       # Case 1: a and b are active
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, 1, 10, -10], rhs=15),
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, 0.2, 2, -2], rhs=5),
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, -1, -3, 3], rhs=5),
       # Case 2: a and c are active
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, 1, -10, -10], rhs=5),
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, 0.2, 2, 2], rhs=7),
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, -1, -3, -3], rhs=2),
       # Case 3: c and b are active
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, 1, 0, -10], rhs=5),
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, 0.2, 0, 2], rhs=5),
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, -1, 0, 3], rhs=5),
       
   ]
)

d_optimal_design = find_local_max_ipopt(domain, "linear", n_experiments=12, ipopt_options={"disp":0})
print(d_optimal_design)
d_optimal_design = d_optimal_design.to_numpy().T

In [None]:
visualize_example(d_optimal_design[2], d_optimal_design[3])

Instead of using ContinuousInput we can use BinaryInput, which behaves like the ContinuousInput but can be easily fixed to either 0 or 1 and only allows for bounds to be (0, 1)

In [None]:
domain = Domain(
   inputs = [
    ContinuousInput(key="x1", bounds = (0,5)),
    ContinuousInput(key="x2", bounds = (0, 15)),
    ContinuousBinaryInput(key="a1", bounds = (0,1)),
    ContinuousBinaryInput(key="a2", bounds = (0,1))
    ],
   outputs = [ContinuousOutput(key="y")],
   constraints = [
       # Case 1: a and b are active
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, 1, 10, -10], rhs=15),
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, 0.2, 2, -2], rhs=5),
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, -1, -3, 3], rhs=5),
       # Case 2: a and c are active
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, 1, -10, -10], rhs=5),
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, 0.2, 2, 2], rhs=7),
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, -1, -3, -3], rhs=2),
       # Case 3: c and b are active
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, 1, 0, -10], rhs=5),
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, 0.2, 0, 2], rhs=5),
       LinearInequalityConstraint(features=["x1","x2", "a1", "a2"], coefficients=[1, -1, 0, 3], rhs=5),
       
   ]
)

d_optimal_design = find_local_max_ipopt(domain, "linear", n_experiments=12, ipopt_options={"disp":0})
print(d_optimal_design)
d_optimal_design = d_optimal_design.to_numpy().T

In [None]:
visualize_example(d_optimal_design[2], d_optimal_design[3])

## Exhaustive Search
A naive approach to find which setting for the binary variables leads to a minimum is to do an exhaustive search of the problem space.  


In [None]:
domain = Domain(
        inputs=[
            ContinuousInput(key="x1", bounds=(0, 5)),
            ContinuousInput(key="x2", bounds=(0, 15)),
            ContinuousBinaryInput(key="a1"),
            ContinuousBinaryInput(key="a2")
        ],
        outputs=[ContinuousOutput(key="y")],
        constraints=[
            # Case 1: a and b are active
            LinearInequalityConstraint(features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, 10, -10], rhs=15),
            LinearInequalityConstraint(features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 2, -2], rhs=5),
            LinearInequalityConstraint(features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, -3, 3], rhs=5),
            # Case 2: a and c are active
            LinearInequalityConstraint(features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, -10, -10], rhs=5),
            LinearInequalityConstraint(features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 2, 2], rhs=7),
            LinearInequalityConstraint(features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, -3, -3], rhs=2),
            # Case 3: c and b are active
            LinearInequalityConstraint(features=["x1", "x2", "a1", "a2"], coefficients=[1, 1, 0, -10], rhs=5),
            LinearInequalityConstraint(features=["x1", "x2", "a1", "a2"], coefficients=[1, 0.2, 0, 2], rhs=5),
            LinearInequalityConstraint(features=["x1", "x2", "a1", "a2"], coefficients=[1, -1, 0, 3], rhs=5),

        ]
    )

d_optimal_design = find_find_local_max_ipopt_binary_naive(domain, "linear", n_experiments=12,
                                                        ipopt_options={"disp": 0}, prohibited_binary_combinations=[(1, 1)])
print(d_optimal_design)
d_optimal_design = d_optimal_design.to_numpy().T


In [None]:
visualize_example(d_optimal_design[2], d_optimal_design[3])