# Wine Quality

In [57]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

In [58]:
df = pd.read_csv("winequalityN.csv")
df.sample(5)


Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
1969,white,6.3,0.41,0.3,3.2,0.03,49.0,164.0,0.9927,3.53,0.79,11.7,7
2489,white,6.1,0.28,0.24,19.95,0.074,32.0,174.0,0.99922,3.19,0.44,9.3,6
5290,red,9.5,0.37,0.52,2.0,0.082,6.0,26.0,0.998,3.18,0.51,9.5,5
3017,white,5.9,0.34,0.3,3.8,0.035,57.0,135.0,0.99016,3.09,0.34,12.0,6
699,white,7.4,0.31,0.48,14.2,0.042,62.0,204.0,0.9983,3.06,0.59,9.4,5


In [59]:
# wine type
df["wine_type"] = [1 if wine_type == "white" else 0 for wine_type in df["type"]]
df.sample(5)

Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,wine_type
5905,red,9.1,0.3,0.34,2.0,0.064,12.0,25.0,0.99516,3.26,0.84,11.7,7,0
5580,red,8.5,0.46,0.31,2.25,0.078,32.0,58.0,0.998,3.33,0.54,9.8,5,0
1279,white,6.6,0.27,0.3,1.9,0.025,14.0,153.0,0.9928,3.29,0.62,10.5,6,1
176,white,7.2,0.32,0.47,5.1,0.044,19.0,65.0,0.991,3.03,0.41,12.6,4,1
5366,red,11.4,0.36,0.69,2.1,0.09,6.0,21.0,1.0,3.17,0.62,9.2,6,0


In [60]:
# wine quality
df["wine_quality"] = [1 if quality > 5 else 0 for quality in df["quality"]]
df.sample(5)

Unnamed: 0,type,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,wine_type,wine_quality
1773,white,7.8,0.39,0.26,9.9,0.059,33.0,181.0,0.9955,3.04,0.42,10.9,6,1,1
5661,red,9.3,0.655,0.26,2.0,0.096,5.0,35.0,0.99738,3.25,0.42,9.6,5,0,0
6019,red,6.6,0.5,0.0,1.8,0.062,21.0,28.0,0.99352,3.44,0.55,12.3,6,0,1
6223,red,6.7,0.46,0.24,1.7,0.077,18.0,34.0,0.9948,3.39,0.6,10.6,6,0,1
5362,red,11.5,0.315,0.54,2.1,0.084,5.0,15.0,0.9987,2.98,0.7,9.2,6,0,1


In [61]:
df.drop(columns=["type", "quality"], inplace=True)
df.sample(5)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,wine_type,wine_quality
6411,6.4,0.56,0.15,1.8,0.078,17.0,65.0,0.99294,3.33,0.6,10.5,0,1
5742,9.9,0.25,0.46,1.7,0.062,26.0,42.0,0.9959,3.18,0.83,10.6,0,1
3773,5.0,0.61,0.12,1.3,0.009,65.0,100.0,0.9874,3.26,0.37,13.5,1,0
1491,7.5,0.2,0.49,1.3,0.031,8.0,97.0,0.9918,3.06,0.62,11.1,1,0
5763,7.2,0.635,0.07,2.6,0.077,16.0,86.0,0.99748,3.51,0.54,9.7,0,0


In [64]:
X = df.drop(columns=["wine_quality"])
y = df["wine_quality"]


In [65]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [66]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X_scaled,y, test_size = 0.2, random_state=42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(5197, 12)
(1300, 12)
(5197,)
(1300,)


In [67]:
np.bincount(y_train)

array([1916, 3281])

In [68]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

tf.random.set_seed(42) #This is crucial for ensuring reproducibility in your experiments.

## Callbacks

#### Model Checkpoint - saves the model on a current epoch if the validation metric improved from the previous epoch

In [69]:

cb_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath = "checkpoints/model-{epoch:02d}-{val_accuracy:.2f}.hdf5",
    monitor = "val_accuracy",
    mode="max",
    save_best_only = True,
    verbose = 1
)

#### ReduceLROnPlateau - reduces learning rate when there's no progress in training

In [70]:
reducelr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor = "val_loss",
    mode = "min",
    factor = 0.1,
    patience = 10,
    min_lr = 0.00001,
    verbose = 1
)

In [71]:
earlystop = tf.keras.callbacks.EarlyStopping(
    monitor = "val_accuracy",
    min_delta = 0.001,
    patience = 10,
    verbose = 1,
    mode = "max"
)

In [72]:
csvlog = tf.keras.callbacks.CSVLogger(
    filename = "trianing_log.csv",
    separator = ",",
    append = True
)

## Model Training

In [73]:
model = Sequential([
    Dense(units=32, activation="relu"),
    Dense(units=16, activation="relu"),
    Dense(units=8, activation="relu"),
    Dense(units=1, activation="sigmoid")]
)

In [80]:
loss = tf.keras.losses.BinaryCrossentropy()
opt = tf.keras.optimizers.Adam()
bin_acc = tf.keras.metrics.BinaryAccuracy(name="accuracy")
acc = tf.keras.metrics.Accuracy(name="normal acc")
model.compile(
    loss=loss,
    optimizer=opt,
    metrics=[bin_acc, acc]
)

In [81]:
model.fit(
    X_train,
    y_train,
    epochs = 300,
    validation_data = (X_test, y_test),
    callbacks = [cb_checkpoint, reducelr, earlystop, csvlog]
)

Epoch 1/300
Epoch 1: val_accuracy did not improve from 0.36000
Epoch 2/300
Epoch 2: val_accuracy did not improve from 0.36000
Epoch 3/300
Epoch 3: val_accuracy did not improve from 0.36000
Epoch 4/300
Epoch 4: val_accuracy did not improve from 0.36000
Epoch 5/300
Epoch 5: val_accuracy did not improve from 0.36000
Epoch 6/300
Epoch 6: val_accuracy did not improve from 0.36000
Epoch 7/300
Epoch 7: val_accuracy did not improve from 0.36000
Epoch 8/300
Epoch 8: val_accuracy did not improve from 0.36000
Epoch 9/300
Epoch 9: val_accuracy did not improve from 0.36000
Epoch 10/300
Epoch 10: val_accuracy did not improve from 0.36000

Epoch 10: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 11/300
Epoch 11: val_accuracy did not improve from 0.36000
Epoch 11: early stopping


<keras.src.callbacks.History at 0x7dcd949c6650>

In [56]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 32)                416       
                                                                 
 dense_9 (Dense)             (None, 16)                528       
                                                                 
 dense_10 (Dense)            (None, 8)                 136       
                                                                 
 dense_11 (Dense)            (None, 1)                 9         
                                                                 
Total params: 1089 (4.25 KB)
Trainable params: 1089 (4.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
