## Optimization of coffee parameters


In [62]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import optuna as op

In [63]:
from sklearn.model_selection import train_test_split

coffee = pd.read_excel("./your_coffee_data.xlsx")
col_params = ["dose (g)", "grind size", "water temperature (C)", "pouring amount (g)", "tds (%)"]
X = coffee[col_params]
y = coffee["target"]

display(X.shape)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

(20, 5)

In [64]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import cross_val_score
from sklearn.neural_network import MLPRegressor


def objective_param(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 50, 200),
        "max_depth": trial.suggest_int("max_depth", 2, 50),
        "min_samples_split": trial.suggest_float("min_samples_split", 0.1, 1.0),
    }
    # model = RandomForestRegressor(**params)
    model = GradientBoostingRegressor(**params)

    # params = {
    #     "hidden_layer_sizes": (trial.suggest_int("n_neurons", 10, 100),),  # Number of neurons in hidden layer
    #     "activation": trial.suggest_categorical("activation", ["relu", "tanh"]),  # activation function
    #     "solver": trial.suggest_categorical("solver", ["adam", "lbfgs"]),  # Optimization Algorithm
    #     "alpha": trial.suggest_float("alpha", 1e-5, 1e-2),  # L2 regularization
    #     "learning_rate": trial.suggest_categorical("learning_rate", ["constant", "adaptive"]),  # Adjustment of learning rate
    #     "max_iter": trial.suggest_int("max_iter", 200, 1000),  # Maximum number of iterations
    # }
    # model = MLPRegressor(**params)

    score = cross_val_score(model, X_train, y_train, cv=3, scoring="neg_mean_squared_error")  # negative mean squared error: -∑(y - y')^2 -> minimization
    rmse = np.sqrt(-score.mean())
    return rmse


study = op.create_study(direction="minimize")
study.optimize(objective_param, n_trials=100, n_jobs=-1)

print("Optimization completed.\n")
print("Best hyperparameters: ", study.best_params)
print("Best RMSE: ", study.best_value)

[I 2024-09-08 15:34:55,665] A new study created in memory with name: no-name-409b6fe2-1d7c-4d90-8f87-0add2c1b4e55
[I 2024-09-08 15:34:57,141] Trial 1 finished with value: 1.6146011396502657 and parameters: {'n_estimators': 60, 'max_depth': 29, 'min_samples_split': 0.4656460367520947}. Best is trial 1 with value: 1.6146011396502657.
[I 2024-09-08 15:34:57,434] Trial 4 finished with value: 1.5662950662632091 and parameters: {'n_estimators': 70, 'max_depth': 39, 'min_samples_split': 0.3612850328539682}. Best is trial 4 with value: 1.5662950662632091.
[I 2024-09-08 15:34:57,968] Trial 3 finished with value: 1.514635938163199 and parameters: {'n_estimators': 91, 'max_depth': 37, 'min_samples_split': 0.3556749613059903}. Best is trial 3 with value: 1.514635938163199.
[I 2024-09-08 15:34:58,052] Trial 7 finished with value: 1.5777039736935496 and parameters: {'n_estimators': 111, 'max_depth': 27, 'min_samples_split': 0.9181901566674525}. Best is trial 3 with value: 1.514635938163199.
[I 2024-

Optimization completed.

Best hyperparameters:  {'n_estimators': 86, 'max_depth': 46, 'min_samples_split': 0.7868245186705273}
Best RMSE:  1.5060755256345344


In [65]:
# best_model = RandomForestRegressor(**study.best_params)
best_model = GradientBoostingRegressor(**study.best_params)
# best_params = study.best_params.copy()
# if "n_neurons" in best_params:
#     best_params["hidden_layer_sizes"] = (best_params.pop("n_neurons"),)
# best_model = MLPRegressor(**best_params)


best_model.fit(X_train, y_train)
test_score = best_model.score(X_test, y_test)

print("Test score: ", test_score)

Test score:  0.3318150709922756


In [66]:
def objective(trial):
    params = {
        "dose (g)": trial.suggest_float("dose (g)", 10, 15),
        "grind size": trial.suggest_int("grind size", 6, 10),
        "water temperature (C)": trial.suggest_int("water temperature (C)", 85, 92),
        "pouring amount (g)": trial.suggest_int("pouring amount (g)", 120, 180),
        "tds (%)": trial.suggest_float("tds (%)", 1.15, 1.35),
    }
    candidate = pd.DataFrame([params], columns=col_params)
    predicted_score = best_model.predict(candidate)[0]
    return -predicted_score


study = op.create_study(direction="minimize")
study.optimize(objective, n_trials=100, n_jobs=-1)

print("Optimization completed.")
print("   Best hyperparameters: ")
for k, v in study.best_params.items():
    print(f"      {k}: {v}")
print(f"   Best predicted score: {(-study.best_value):.4f}")

[I 2024-09-08 15:35:41,724] A new study created in memory with name: no-name-54b5d4b6-e7eb-49c1-af47-7d35fe1c3d70


[I 2024-09-08 15:35:41,750] Trial 1 finished with value: -4.160018515645461 and parameters: {'dose (g)': 12.940103699810795, 'grind size': 9, 'water temperature (C)': 90, 'pouring amount (g)': 140, 'tds (%)': 1.3408225122844877}. Best is trial 1 with value: -4.160018515645461.
[I 2024-09-08 15:35:41,757] Trial 0 finished with value: -5.708103379623933 and parameters: {'dose (g)': 12.240597907932916, 'grind size': 10, 'water temperature (C)': 91, 'pouring amount (g)': 171, 'tds (%)': 1.3000358759241775}. Best is trial 0 with value: -5.708103379623933.
[I 2024-09-08 15:35:41,768] Trial 2 finished with value: -5.540832219182338 and parameters: {'dose (g)': 10.339477629749666, 'grind size': 8, 'water temperature (C)': 91, 'pouring amount (g)': 133, 'tds (%)': 1.3021251990296294}. Best is trial 0 with value: -5.708103379623933.
[I 2024-09-08 15:35:41,780] Trial 3 finished with value: -4.053089528636467 and parameters: {'dose (g)': 11.874636318883415, 'grind size': 6, 'water temperature (C)'

Optimization completed.
   Best hyperparameters: 
      dose (g): 10.09629685049064
      grind size: 7
      water temperature (C): 87
      pouring amount (g): 180
      tds (%): 1.261185904423661
   Best predicted score: 9.1779
