## 1. Libraries & EDA

In [None]:
# data science
import numpy as np
import pandas as pd

# deep learning & model training
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras


# hyperparam tuning
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from sklearn.metrics import mean_squared_error

# Model selection
import mlflow
from mlflow.models import infer_signature

# path
import os


In [2]:
# Import sample data - wine quality
url = "https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/whitewines.csv"
raw_df = pd.read_csv(url,
                     sep = ',')

raw_df.head() # Quality is the target output

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [3]:
# Check for missing values
raw_df.isnull().sum()

fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64

In [4]:
# Look at summary of measures
raw_df.describe()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
count,4898.0,4898.0,4898.0,4898.0,4898.0,4898.0,4898.0,4898.0,4898.0,4898.0,4898.0,4898.0
mean,6.854788,0.278241,0.334192,6.391415,0.045772,35.308085,138.360657,0.994027,3.188267,0.489847,10.514267,5.877909
std,0.843868,0.100795,0.12102,5.072058,0.021848,17.007137,42.498065,0.002991,0.151001,0.114126,1.230621,0.885639
min,3.8,0.08,0.0,0.6,0.009,2.0,9.0,0.98711,2.72,0.22,8.0,3.0
25%,6.3,0.21,0.27,1.7,0.036,23.0,108.0,0.991723,3.09,0.41,9.5,5.0
50%,6.8,0.26,0.32,5.2,0.043,34.0,134.0,0.99374,3.18,0.47,10.4,6.0
75%,7.3,0.32,0.39,9.9,0.05,46.0,167.0,0.9961,3.28,0.55,11.4,6.0
max,14.2,1.1,1.66,65.8,0.346,289.0,440.0,1.03898,3.82,1.08,14.2,9.0


### 2. Data Preparation

In [5]:
# Preparation of training data
train, test = train_test_split(raw_df, test_size = 0.25, random_state = 42)


In [6]:
# Extracting the array of both training and testing dataset
train_x = train.drop(['quality'], axis = 1).values
train_y = train[['quality']].values.ravel() # ravel() flattens the data of n-dimension array

# Testing dataset - so the data will not see this dataset
test_x = test.drop(['quality'], axis = 1).values
test_y = test[['quality']].values.ravel()

In [7]:
# Prepare training & validation data for the model training
train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size = 0.2, random_state = 42)

# Inferring model signature from both input & output
signature = infer_signature(train_x, train_y)

### 3. Training our ANN model


In [8]:
train_x.shape[1]

11

In [9]:
# Function to run a full function

def train_model(params, epochs, train_x, train_y, valid_x, valid_y, test_x, test_y):

    # Performing column-wise normalization
    mean = np.mean(train_x, axis = 0)
    var = np.var(train_x, axis = 0)

    # Define model architecture
    model = keras.Sequential(
        [
            keras.Input([train_x.shape[1]]),
            keras.layers.Normalization(mean = mean, variance = var),
            keras.layers.Dense(64, activation = 'relu'),
            keras.layers.Dense(1)
        ]
    )

    # Compiling the model
    model.compile(
        optimizer = keras.optimizers.SGD(
            learning_rate = params['lr'],
            momentum = params['momentum']
        ),
        loss = "mean_squared_error",
        metrics = [keras.metrics.RootMeanSquaredError()]
    )

    # Training the model & track them with mlflow
    with mlflow.start_run(nested = True):
        model.fit(train_x, train_y, validation_data = (valid_x, valid_y),
                  epochs = epochs,
                  batch_size = 64
                  )
        
        # Model evaluation
        eval_result = model.evaluate(valid_x, valid_y, batch_size = 64)

        eval_rmse = eval_result[1]

        # Log the params & reuslts
        mlflow.log_params(params)
        mlflow.log_metric("evaluation_rmse", eval_rmse)

        # Log the model & return the status
        mlflow.tensorflow.log_model(model, "model", signature = signature)

        return {"loss": eval_rmse, "status": STATUS_OK, "model": model}

In [10]:
## hyperopt objective function for MLflow to track
def objective(params):
    # For MLflow to track the parameters and results for each run
    result = train_model(
        params,
        epochs = 3,
        train_x = train_x,
        train_y = train_y,
        valid_x = valid_x,
        valid_y = valid_y,
        test_x = test_x,
        test_y = test_y
    )
    return result


In [11]:
# Define the param-variables 
space = {
    "lr": hp.loguniform('lr', np.log(1e-5), np.log(1e-1)),
    'momentum': hp.uniform('momentum', 0.0, 1.0)
}

In [12]:
mlflow.set_experiment("../white-wine-quality")

with mlflow.start_run():
    # Conduct hyperparam search using hyperopt
    trials = Trials()
    best = fmin(
        fn = objective,
        space = space,
        algo = tpe.suggest,
        max_evals = 4,
        trials = trials
    )

    # Fetch the details 
    best_run = sorted(trials.results, key = lambda x: x["loss"])[0]

    # Log the best parameters, loss, and model
    mlflow.log_params(best)
    mlflow.log_metric('eval_rmse', best_run['loss'])
    mlflow.tensorflow.log_model(best_run['model'], name = 'model', signature = signature)

    # Print best params & loss
    print(f"Best parameters: {best}")
    print(f"Best evaluated RMSE: {best_run['loss']}")

2026/01/26 15:20:38 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.schemas
2026/01/26 15:20:38 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.tables
2026/01/26 15:20:38 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.types
2026/01/26 15:20:38 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.constraints
2026/01/26 15:20:38 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.defaults
2026/01/26 15:20:38 INFO alembic.runtime.plugins: setup plugin alembic.autogenerate.comments
2026/01/26 15:20:38 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2026/01/26 15:20:38 INFO mlflow.store.db.utils: Updating database tables
2026/01/26 15:20:38 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/26 15:20:38 INFO alembic.runtime.migration: Will assume non-transactional DDL.
2026/01/26 15:20:38 INFO alembic.runtime.migration: Context impl SQLiteImpl.
2026/01/26 15:20:38 INFO alembic.runtime

  0%|          | 0/4 [00:00<?, ?trial/s, best loss=?]

2026-01-26 15:20:38.302674: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2026-01-26 15:20:38.302847: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2026-01-26 15:20:38.302854: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2026-01-26 15:20:38.303295: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2026-01-26 15:20:38.303316: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/3                                            

  0%|          | 0/4 [00:00<?, ?trial/s, best loss=?]

2026-01-26 15:20:38.570680: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m14s[0m 331ms/step - loss: 35.7082 - root_mean_squared_error: 5.9756
[1m13/46[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m0s[0m 4ms/step - loss: 35.1372 - root_mean_squared_error: 5.9276   
[1m26/46[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 4ms/step - loss: 35.4794 - root_mean_squared_error: 5.9563
[1m39/46[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 4ms/step - loss: 35.5845 - root_mean_squared_error: 5.9652
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 35.5850 - root_mean_squared_error: 5.9652
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 35.5450 - root_mean_squared_error: 5.9620 - val_loss: 35.3766 - val_root_mean_squared_error: 5.9478

Epoch 2/3                                            

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 14ms/step - loss: 36.1385 - root_mean_squared_error: 6.0115
[1m13/46[0m [32m━━━━━[0m[37m━━━




Epoch 1/3                                                                     

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 233ms/step - loss: 35.9698 - root_mean_squared_error: 5.9975
[1m14/46[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 4ms/step - loss: 35.0884 - root_mean_squared_error: 5.9235   
[1m27/46[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 4ms/step - loss: 34.9807 - root_mean_squared_error: 5.9144
[1m41/46[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 4ms/step - loss: 34.9593 - root_mean_squared_error: 5.9126
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 34.7402 - root_mean_squared_error: 5.8941 - val_loss: 33.8951 - val_root_mean_squared_error: 5.8219

Epoch 2/3                                                                     

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 14ms/step - loss: 33.8329 - root_mean_squared_error: 5.8166
[1m14/46[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m 




Epoch 1/3                                                                      

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 230ms/step - loss: 35.8701 - root_mean_squared_error: 5.9892
[1m14/46[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 4ms/step - loss: 35.9983 - root_mean_squared_error: 5.9998   
[1m28/46[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m0s[0m 4ms/step - loss: 35.9635 - root_mean_squared_error: 5.9969
[1m42/46[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 4ms/step - loss: 35.8806 - root_mean_squared_error: 5.9900
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 35.5732 - root_mean_squared_error: 5.9643 - val_loss: 35.4813 - val_root_mean_squared_error: 5.9566

Epoch 2/3                                                                      

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 14ms/step - loss: 37.7338 - root_mean_squared_error: 6.1428
[1m15/46[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m




Epoch 1/3                                                                      

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 237ms/step - loss: 37.3207 - root_mean_squared_error: 6.1091
[1m14/46[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 4ms/step - loss: 16.4412 - root_mean_squared_error: 3.9065   
[1m28/46[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m0s[0m 4ms/step - loss: 10.7078 - root_mean_squared_error: 3.0644
[1m42/46[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 4ms/step - loss: 8.2039 - root_mean_squared_error: 2.6382 
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 2.5944 - root_mean_squared_error: 1.6107 - val_loss: 0.6148 - val_root_mean_squared_error: 0.7841

Epoch 2/3                                                                      

[1m 1/46[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 14ms/step - loss: 0.6286 - root_mean_squared_error: 0.7929
[1m15/46[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [




100%|██████████| 4/4 [00:21<00:00,  5.40s/trial, best loss: 0.8028776049613953]
Best parameters: {'lr': 0.04181875089060582, 'momentum': 0.22563247666543063}
Best evaluated RMSE: 0.8028776049613953


### 4. Output data for inferencing 'best' model

In [13]:
# Create 'data' directory if it doesn't exist
os.makedirs("data", exist_ok = True)

# Save to csv for usage later
test.to_csv("data/test.csv", index = False)
