In [163]:
import tensorflow as tf
from tensorflow import keras

print("TensorFlow version:", tf.__version__)
print("Keras version:", keras.__version__)

TensorFlow version: 2.16.2
Keras version: 3.11.3


In [164]:
import pandas as pd
import numpy as np
from hyperopt import STATUS_OK,Trials,fmin,hp,tpe
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

import mlflow
from mlflow.models import infer_signature


In [165]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
wine_quality = fetch_ucirepo(id=186) 
  
# data (as pandas dataframes) 
X = wine_quality.data.features 
y = wine_quality.data.targets 
  
# metadata 
print(wine_quality.metadata) 
  
# variable information 
print(wine_quality.variables) 


{'uci_id': 186, 'name': 'Wine Quality', 'repository_url': 'https://archive.ics.uci.edu/dataset/186/wine+quality', 'data_url': 'https://archive.ics.uci.edu/static/public/186/data.csv', 'abstract': 'Two datasets are included, related to red and white vinho verde wine samples, from the north of Portugal. The goal is to model wine quality based on physicochemical tests (see [Cortez et al., 2009], http://www3.dsi.uminho.pt/pcortez/wine/).', 'area': 'Business', 'tasks': ['Classification', 'Regression'], 'characteristics': ['Multivariate'], 'num_instances': 4898, 'num_features': 11, 'feature_types': ['Real'], 'demographics': [], 'target_col': ['quality'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 2009, 'last_updated': 'Wed Nov 15 2023', 'dataset_doi': '10.24432/C56S3T', 'creators': ['Paulo Cortez', 'A. Cerdeira', 'F. Almeida', 'T. Matos', 'J. Reis'], 'intro_paper': {'ID': 252, 'type': 'NATIVE', 'title': 'Modeling wine preferences

In [166]:
ind_df=pd.DataFrame(X)
target_df=pd.DataFrame(y)

data=ind_df
data['quality']=target_df


In [167]:
data

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.88,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.70,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
6492,6.2,0.21,0.29,1.6,0.039,24.0,92.0,0.99114,3.27,0.50,11.2,6
6493,6.6,0.32,0.36,8.0,0.047,57.0,168.0,0.99490,3.15,0.46,9.6,5
6494,6.5,0.24,0.19,1.2,0.041,30.0,111.0,0.99254,2.99,0.46,9.4,6
6495,5.5,0.29,0.30,1.1,0.022,20.0,110.0,0.98869,3.34,0.38,12.8,7


In [171]:
##Loading and splitting data
ind_df = pd.DataFrame(X)
target_df = pd.DataFrame(y)

data = ind_df.copy()  # ✅ make an independent copy
data['quality'] = target_df

# Split into features and target
ind_data = data.drop(columns=['quality'])
dep_data = data['quality']

# Train/test split (with reproducibility)
X_train, X_test, y_train, y_test = train_test_split(ind_data, dep_data, test_size=0.20, random_state=42)

# Validation split (also reproducible)
train_x, valid_x, train_y, valid_y = train_test_split(X_train, y_train, test_size=0.20, random_state=42)



In [172]:
def train_model(params,epochs,train_x,train_y,valid_x,valid_y,test_x,test_y):
    ##model arch
    mean_s=np.mean(train_x,axis=0)
    var=np.var(train_x,axis=0)
    model=keras.Sequential(
        [keras.Input([train_x.shape[1]]),
        keras.layers.Normalization(mean=mean_s,variance=var),
        keras.layers.Dense(64,activation='relu'),
        keras.layers.Dense(1)])
    ##Compile. the model
    model.compile(optimizer=keras.optimizers.SGD(learning_rate=params['lr'],momentum=params['momentum']),
    loss='mean_squared_error',
    metrics=[keras.metrics.RootMeanSquaredError()])
    
    ##Train the model
    with mlflow.start_run(nested=True):
        model.fit(train_x,train_y, validation_data=(valid_x, valid_y),epochs=epochs,batch_size=64)
        ##Evaluate the model
        eval_result=model.evaluate(valid_x,valid_y,batch_size=64)
        eval_rmse=eval_result[1]
        ##signature
        train_x_np = np.array(train_x, dtype=np.float32)
        preds=model.predict(train_x_np)
        signature=infer_signature(train_x_np,preds)

        ##log the params and result
        mlflow.log_params(params)
        mlflow.log_metric('eval_rmse',eval_rmse)

        ##Log the model
        mlflow.tensorflow.log_model(model,'model',signature=signature)
    return {
        'loss': eval_rmse,
        'status': STATUS_OK,
        'model': model
    }

    

In [None]:
##Objective function
def objective(params):
    result = train_model(
        params,
        epochs=3,
        train_x=train_x,
        train_y=train_y,
        valid_x=valid_x,
        valid_y=valid_y,
        test_x=X_test,
        test_y=y_test
    )
    return result


In [174]:


space = {
    "lr": hp.loguniform('lr', np.log(1e-5), np.log(1e-1)),   # learning rate between 1e-5 and 1e-1
    "momentum": hp.uniform('momentum', 0.1, 0.9)             # momentum between 0.1 and 0.9
}


In [None]:
##before running this make sure to be in you dev env and running mlflow ui suing terminal
mlflow.set_experiment("/wine-quality")
with mlflow.start_run():
    trials=Trials()
    best=fmin(
        fn=objective,
        space=space,
        algo=tpe.suggest,
        max_evals=4,
        trials=trials
    )
    best_run=sorted(trials.results,key=lambda x: x['loss'])[0]
    mlflow.log_params(best)
    mlflow.log_metric('eval_rmse',best_run['loss'])

    ##Log the model
    best_model = best_run["model"]
    import numpy as np
    from mlflow.models.signature import infer_signature
    train_x_np = np.array(train_x, dtype=np.float32)
    preds = best_model.predict(train_x_np)
    correct_signature = infer_signature(train_x_np, preds)
    mlflow.tensorflow.log_model(best_run["model"],'model',signature=correct_signature)
    print(f'Best parameters :{best}')
    print(f"Best eval rmse :{best_run['loss']}")

    
    

Epoch 1/3                                            

[1m 1/65[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:02[0m 979ms/step - loss: 34.7373 - root_mean_squared_error: 5.8938
[1m 3/65[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 30ms/step - loss: 34.1690 - root_mean_squared_error: 5.8453   
[1m 5/65[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 36ms/step - loss: 34.1857 - root_mean_squared_error: 5.8468
[1m 6/65[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 42ms/step - loss: 34.1750 - root_mean_squared_error: 5.8459
[1m 7/65[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 46ms/step - loss: 34.1281 - root_mean_squared_error: 5.8419
[1m 8/65[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m3s[0m 58ms/step - loss: 34.1155 - root_mean_squared_error: 5.8408
[1m10/65[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m2s[0m 53ms/step - loss: 34.1533 - root_mean_squared_error: 5.8440
[1m12/65[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m2s[0m 50ms/step - loss: 34.1735





Epoch 1/3                                                                     

[1m 1/65[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m53s[0m 841ms/step - loss: 33.5983 - root_mean_squared_error: 5.7964
[1m 3/65[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 26ms/step - loss: 33.3436 - root_mean_squared_error: 5.7738  
[1m 5/65[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 30ms/step - loss: 31.3541 - root_mean_squared_error: 5.5943
[1m 7/65[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 28ms/step - loss: 29.0621 - root_mean_squared_error: 5.3755
[1m 9/65[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 28ms/step - loss: 26.7703 - root_mean_squared_error: 5.1428
[1m12/65[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1s[0m 26ms/step - loss: 23.8595 - root_mean_squared_error: 4.8291
[1m15/65[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m1s[0m 25ms/step - loss: 21.7279 - root_mean_squared_error: 4.5899
[1m18/65[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m1s[0m 23





Epoch 1/3                                                                      

[1m 1/65[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m41s[0m 654ms/step - loss: 35.3935 - root_mean_squared_error: 5.9492
[1m 5/65[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 13ms/step - loss: 34.6592 - root_mean_squared_error: 5.8871  
[1m 9/65[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 13ms/step - loss: 34.6288 - root_mean_squared_error: 5.8846
[1m15/65[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 34.5514 - root_mean_squared_error: 5.8780
[1m20/65[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 34.4895 - root_mean_squared_error: 5.8727
[1m24/65[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 34.4761 - root_mean_squared_error: 5.8716
[1m28/65[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m0s[0m 12ms/step - loss: 34.4710 - root_mean_squared_error: 5.8712
[1m32/65[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1





Epoch 1/3                                                                      

[1m 1/65[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m53s[0m 829ms/step - loss: 36.8073 - root_mean_squared_error: 6.0669
[1m 4/65[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 18ms/step - loss: 35.2597 - root_mean_squared_error: 5.9375  
[1m 7/65[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 18ms/step - loss: 34.4442 - root_mean_squared_error: 5.8680
[1m11/65[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m0s[0m 17ms/step - loss: 33.4436 - root_mean_squared_error: 5.7812
[1m16/65[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 15ms/step - loss: 32.1179 - root_mean_squared_error: 5.6631
[1m20/65[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 14ms/step - loss: 31.0414 - root_mean_squared_error: 5.5646
[1m23/65[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m0s[0m 15ms/step - loss: 30.2411 - root_mean_squared_error: 5.4897
[1m26/65[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1





100%|██████████| 4/4 [01:08<00:00, 17.17s/trial, best loss: 0.765654981136322] 
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step




Best parameters :{'lr': 0.0013731016466518, 'momentum': 0.7352551333441845}
Best eval rmse :0.765654981136322


In [None]:
##Inferencing : we can load the best model form model flow ui
from mlflow.models import validate_serving_input
model_uri='models:/m-b34793258c1340b5a27cbb9073a72b11' ##model uri  with lowest rmse of .76 
from mlflow.models import convert_input_example_to_serving_input
serving_payload=convert_input_example_to_serving_input(X_test)
validate_serving_input(model_uri,serving_payload)

Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 2551.06it/s] 


[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step


array([[6.513489 ],
       [5.0877047],
       [6.256715 ],
       ...,
       [6.7648506],
       [5.3786   ],
       [5.5734005]], dtype=float32)

In [183]:
###Inferencing using pyfuncModel
model_uri='models:/m-b34793258c1340b5a27cbb9073a72b11'
loaded_model=mlflow.pyfunc.load_model(model_uri)

##Predicting 
X_test_np = X_test.astype(np.float32) 
loaded_model.predict(X_test_np)



[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step


array([[6.513489 ],
       [5.0877047],
       [6.256715 ],
       ...,
       [6.7648506],
       [5.3786   ],
       [5.5734005]], dtype=float32)