In [2]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [3]:
# Dependencies
import numpy as np
import pandas as pd

In [4]:
import keras
keras.__version__

Using TensorFlow backend.


'2.3.1'

In [94]:
# Data Set Information:

# The two datasets are related to red and white variants of the Portuguese "Vinho Verde" wine. For more details, consult: [Web Link] or the reference [Cortez et al., 2009]. Due to privacy and logistic issues, only physicochemical (inputs) and sensory (the output) variables are available (e.g. there is no data about grape types, wine brand, wine selling price, etc.).

# These datasets can be viewed as classification or regression tasks. The classes are ordered and not balanced (e.g. there are many more normal wines than excellent or poor ones). Outlier detection algorithms could be used to detect the few excellent or poor wines. Also, we are not sure if all input variables are relevant. So it could be interesting to test feature selection methods.


# Attribute Information:

# For more information, read [Cortez et al., 2009].
# Input variables (based on physicochemical tests):
# 1 - fixed acidity
# 2 - volatile acidity
# 3 - citric acid
# 4 - residual sugar
# 5 - chlorides
# 6 - free sulfur dioxide
# 7 - total sulfur dioxide
# 8 - density
# 9 - pH
# 10 - sulphates
# 11 - alcohol
# Output variable (based on sensory data):
# 12 - quality (score between 0 and 10)

In [5]:
survey = pd.read_csv('Resources/winequality-white.csv')
survey.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


## Data Pre-Processing

In [7]:
X = survey.drop("quality", axis=1)
y = survey["quality"]
print(X.shape, y.shape)

(4898, 11) (4898,)


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from keras.utils import to_categorical

In [9]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y, train_size=0.75, test_size=0.25)


In [10]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [11]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [12]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

# Create a Deep Learning Model

In [20]:
from keras.models import Sequential
from keras.layers import Dense

In [21]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=11))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=7, activation='softmax'))

In [22]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [23]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 100)               1200      
_________________________________________________________________
dense_6 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_7 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_8 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_9 (Dense)              (None, 7)                 707       
Total params: 32,207
Trainable params: 32,207
Non-trainable params: 0
_________________________________________________________________


In [24]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Epoch 1/100
 - 1s - loss: 1.3239 - accuracy: 0.4623
Epoch 2/100
 - 0s - loss: 1.1545 - accuracy: 0.5029
Epoch 3/100
 - 0s - loss: 1.1262 - accuracy: 0.5121
Epoch 4/100
 - 0s - loss: 1.1172 - accuracy: 0.5219
Epoch 5/100
 - 0s - loss: 1.1142 - accuracy: 0.5214
Epoch 6/100
 - 0s - loss: 1.0991 - accuracy: 0.5372
Epoch 7/100
 - 0s - loss: 1.0862 - accuracy: 0.5399
Epoch 8/100
 - 0s - loss: 1.0812 - accuracy: 0.5385
Epoch 9/100
 - 0s - loss: 1.0673 - accuracy: 0.5546
Epoch 10/100
 - 0s - loss: 1.0591 - accuracy: 0.5554
Epoch 11/100
 - 0s - loss: 1.0581 - accuracy: 0.5475
Epoch 12/100
 - 0s - loss: 1.0538 - accuracy: 0.5527
Epoch 13/100
 - 0s - loss: 1.0410 - accuracy: 0.5595
Epoch 14/100
 - 0s - loss: 1.0429 - accuracy: 0.5589
Epoch 15/100
 - 0s - loss: 1.0335 - accuracy: 0.5521
Epoch 16/100
 - 0s - loss: 1.0302 - accuracy: 0.5611
Epoch 17/100
 - 0s - loss: 1.0266 - accuracy: 0.5636
Epoch 18/100
 - 0s - loss: 1.0217 - accuracy: 0.5647
Epoch 19/100
 - 0s - loss: 1.0167 - accuracy: 0.5641
Ep

<keras.callbacks.callbacks.History at 0x20678abdd30>

## Quantify our Trained Model

In [25]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 1.1868860113864041, Accuracy: 0.5975510478019714


## Make Predictions

In [26]:
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [27]:
print(f"Predicted classes: {list(y_train_categorical[:5])}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: [array([0., 0., 1., 0., 0., 0., 0.], dtype=float32), array([1., 0., 0., 0., 0., 0., 0.], dtype=float32), array([0., 0., 0., 1., 0., 0., 0.], dtype=float32), array([0., 0., 1., 0., 0., 0., 0.], dtype=float32), array([0., 0., 0., 1., 0., 0., 0.], dtype=float32)]
Actual Labels: [8, 5, 7, 5, 5]


In [28]:
# Save the model
model.save('whitewinequality_model_trained.h5')

In [29]:
# Load the model
from keras.models import load_model
survey_model = load_model('whitewinequality_model_trained.h5')

In [30]:
#Evaluate
model_loss, model_accuracy = survey_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2
)

print(f'Loaded Model Loss: {model_loss}, Accuracy: {model_accuracy}')

Loaded Model Loss: 1.1868860113864041, Accuracy: 0.5975510478019714
