# Optimization of Coffee Extraction Parameters Sequentially Using Optuna


In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import optuna as op

In [15]:
dose_low = 10
dose_high = 15
dose_step = 0.1

grinde_size_low = 6
grinde_size_high = 10

water_temp_low = 85
water_temp_high = 92

pouring_amount_low = 150
pouring_amount_high = 180

In [16]:
from optuna.distributions import FloatDistribution, IntDistribution
from IPython.display import Markdown


def define_search_params():
    return {
        "dose": FloatDistribution(low=dose_low, high=dose_high, step=dose_step),
        "grinde_size": IntDistribution(low=grinde_size_low, high=grinde_size_high),
        "water_temp": IntDistribution(low=water_temp_low, high=water_temp_high),
        "pouring_amount": IntDistribution(low=pouring_amount_low, high=pouring_amount_high),
    }


ingredients = {
    "dose": ("DOSE", "g"),
    "grinde_size": ("Grinder Size", ""),
    "water_temp": ("Water Temperature", "C"),
    "pouring_amount": ("Pouring Water Amount", "g"),
}


def create_recipe_table(params):
    line = "| Ingredients | Unit | Amount |\n"
    line += "| --- | --- | --- |\n"
    for name in ingredients.keys():
        value = params[name]
        _name, _unit = ingredients[name]
        if name == "dose":
            formatted_value = f"{value:.1f}"
        else:
            formatted_value = f"{int(value):d}"

        line += f"| {_name} | {_unit} | {formatted_value} |\n"
    return line

In [17]:
import os

db_name = "optuna_coffee_param.db"
if os.path.exists(db_name):
    os.remove(db_name)

study = op.create_study(storage=f"sqlite:///{db_name}", study_name="bayesian-coffee", load_if_exists=True, direction="maximize")

[I 2024-09-08 20:21:30,008] A new study created in RDB with name: bayesian-coffee


In [18]:
search_space = define_search_params()
trial = study.ask(fixed_distributions=search_space)
# display(trial.params)
display(Markdown(create_recipe_table(trial.params)))
# print(f" -----> {trial.number} trials finished")

| Ingredients | Unit | Amount |
| --- | --- | --- |
| DOSE | g | 10.5 |
| Grinder Size |  | 6 |
| Water Temperature | C | 88 |
| Pouring Water Amount | g | 166 |


In [19]:
objective_value = 4
study.tell(trial, values=objective_value)
trial = study.ask(fixed_distributions=search_space)
display(Markdown(create_recipe_table(trial.params)))

| Ingredients | Unit | Amount |
| --- | --- | --- |
| DOSE | g | 12.2 |
| Grinder Size |  | 6 |
| Water Temperature | C | 85 |
| Pouring Water Amount | g | 156 |


In [20]:
objective_value = 7
study.tell(trial, values=objective_value)
trial = study.ask(fixed_distributions=search_space)
display(Markdown(create_recipe_table(trial.params)))

| Ingredients | Unit | Amount |
| --- | --- | --- |
| DOSE | g | 12.9 |
| Grinder Size |  | 9 |
| Water Temperature | C | 87 |
| Pouring Water Amount | g | 159 |


In [21]:
objective_value = 8
study.tell(trial, values=objective_value)

FrozenTrial(number=2, state=1, values=[8.0], datetime_start=datetime.datetime(2024, 9, 8, 20, 21, 30, 300610), datetime_complete=datetime.datetime(2024, 9, 8, 20, 21, 30, 373810), params={'dose': 12.9, 'grinde_size': 9, 'water_temp': 87, 'pouring_amount': 159}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'dose': FloatDistribution(high=15.0, log=False, low=10.0, step=0.1), 'grinde_size': IntDistribution(high=10, log=False, low=6, step=1), 'water_temp': IntDistribution(high=92, log=False, low=85, step=1), 'pouring_amount': IntDistribution(high=180, log=False, low=150, step=1)}, trial_id=3, value=None)

#### Fetching values in the database


In [22]:
display(study.trials_dataframe())

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_dose,params_grinde_size,params_pouring_amount,params_water_temp,state
0,0,4.0,2024-09-08 20:21:30.062552,2024-09-08 20:21:30.182823,0 days 00:00:00.120271,10.5,6,166,88,COMPLETE
1,1,7.0,2024-09-08 20:21:30.205145,2024-09-08 20:21:30.279810,0 days 00:00:00.074665,12.2,6,156,85,COMPLETE
2,2,8.0,2024-09-08 20:21:30.300610,2024-09-08 20:21:30.373810,0 days 00:00:00.073200,12.9,9,159,87,COMPLETE


#### To determine the value manually


In [23]:
modified_params = dict(
    dose=10.1,
    grinde_size=9,
    water_temp=88,
    pouring_amount=168,
)
objective_value = 9
display(Markdown(create_recipe_table(modified_params)))

study.add_trial(
    op.create_trial(
        state=op.trial.TrialState.COMPLETE,
        params=modified_params,
        distributions=search_space,
        user_attrs={"maual": True},
        value=objective_value,
    ),
)

| Ingredients | Unit | Amount |
| --- | --- | --- |
| DOSE | g | 10.1 |
| Grinder Size |  | 9 |
| Water Temperature | C | 88 |
| Pouring Water Amount | g | 168 |


In [24]:
trial = study.ask(fixed_distributions=search_space)
display(Markdown(create_recipe_table(trial.params)))

| Ingredients | Unit | Amount |
| --- | --- | --- |
| DOSE | g | 11.1 |
| Grinder Size |  | 8 |
| Water Temperature | C | 86 |
| Pouring Water Amount | g | 156 |


In [25]:
fig = op.visualization.plot_optimization_history(study)
fig.show()

In [26]:
fig = op.visualization.plot_parallel_coordinate(study)
fig.show()