In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [2]:
# Dependencies
import numpy as np
import pandas as pd

In [3]:
import keras
keras.__version__

Using TensorFlow backend.


'2.3.1'

In [5]:
# Data Set Information:

# The two datasets are related to red and white variants of the Portuguese "Vinho Verde" wine. For more details, consult: [Web Link] or the reference [Cortez et al., 2009]. Due to privacy and logistic issues, only physicochemical (inputs) and sensory (the output) variables are available (e.g. there is no data about grape types, wine brand, wine selling price, etc.).

# These datasets can be viewed as classification or regression tasks. The classes are ordered and not balanced (e.g. there are many more normal wines than excellent or poor ones). Outlier detection algorithms could be used to detect the few excellent or poor wines. Also, we are not sure if all input variables are relevant. So it could be interesting to test feature selection methods.


# Attribute Information:

# For more information, read [Cortez et al., 2009].
# Input variables (based on physicochemical tests):
# 1 - fixed acidity
# 2 - volatile acidity
# 3 - citric acid
# 4 - residual sugar
# 5 - chlorides
# 6 - free sulfur dioxide
# 7 - total sulfur dioxide
# 8 - density
# 9 - pH
# 10 - sulphates
# 11 - alcohol
# Output variable (based on sensory data):
# 12 - quality (score between 0 and 10)

In [4]:
survey = pd.read_csv('Resources/winequality-red.csv')
survey.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [7]:
# purged_df = survey[["alcohol", "sulphates", "total sulfur dioxide", "volatile acidity", "quality"]]
# purged_df.head()

## Data Pre-Processing

In [5]:
X = survey.drop("quality", axis=1)
y = survey["quality"]
print(X.shape, y.shape)

(1599, 11) (1599,)


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from keras.utils import to_categorical

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y, train_size=0.75, test_size=0.25)


In [8]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [9]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [10]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

# Create a Deep Learning Model

In [12]:
from keras.models import Sequential
from keras.layers import Dense

In [13]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=11))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
# model.add(Dense(units=100, activation='relu'))
# model.add(Dense(units=100, activation='relu'))
# model.add(Dense(units=100, activation='relu'))
# model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=6, activation='softmax'))

In [14]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [15]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               1200      
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_6 (Dense)              (None, 6)                 606       
Total params: 42,206
Trainable params: 42,206
Non-trainable params: 0
__________________________________________________

In [18]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=500,
    shuffle=True,
    verbose=2
)

Epoch 1/500
 - 0s - loss: 0.4628 - accuracy: 0.8082
Epoch 2/500
 - 0s - loss: 0.4378 - accuracy: 0.8215
Epoch 3/500
 - 0s - loss: 0.4202 - accuracy: 0.8274
Epoch 4/500
 - 0s - loss: 0.4169 - accuracy: 0.8274
Epoch 5/500
 - 0s - loss: 0.4106 - accuracy: 0.8374
Epoch 6/500
 - 0s - loss: 0.4196 - accuracy: 0.8307
Epoch 7/500
 - 0s - loss: 0.3824 - accuracy: 0.8474
Epoch 8/500
 - 0s - loss: 0.3721 - accuracy: 0.8549
Epoch 9/500
 - 0s - loss: 0.3969 - accuracy: 0.8432
Epoch 10/500
 - 0s - loss: 0.4108 - accuracy: 0.8299
Epoch 11/500
 - 0s - loss: 0.4146 - accuracy: 0.8290
Epoch 12/500
 - 0s - loss: 0.3816 - accuracy: 0.8399
Epoch 13/500
 - 0s - loss: 0.3692 - accuracy: 0.8532
Epoch 14/500
 - 0s - loss: 0.3631 - accuracy: 0.8624
Epoch 15/500
 - 0s - loss: 0.4067 - accuracy: 0.8224
Epoch 16/500
 - 0s - loss: 0.3667 - accuracy: 0.8490
Epoch 17/500
 - 0s - loss: 0.3835 - accuracy: 0.8440
Epoch 18/500
 - 0s - loss: 0.3844 - accuracy: 0.8449
Epoch 19/500
 - 0s - loss: 0.3788 - accuracy: 0.8365
Ep

Epoch 155/500
 - 0s - loss: 0.0193 - accuracy: 0.9958
Epoch 156/500
 - 0s - loss: 0.0244 - accuracy: 0.9917
Epoch 157/500
 - 0s - loss: 0.0208 - accuracy: 0.9950
Epoch 158/500
 - 0s - loss: 0.0205 - accuracy: 0.9967
Epoch 159/500
 - 0s - loss: 0.0257 - accuracy: 0.9933
Epoch 160/500
 - 0s - loss: 0.0391 - accuracy: 0.9892
Epoch 161/500
 - 0s - loss: 0.0439 - accuracy: 0.9833
Epoch 162/500
 - 0s - loss: 0.0996 - accuracy: 0.9616
Epoch 163/500
 - 0s - loss: 0.1801 - accuracy: 0.9341
Epoch 164/500
 - 0s - loss: 0.2661 - accuracy: 0.9083
Epoch 165/500
 - 0s - loss: 0.2317 - accuracy: 0.9199
Epoch 166/500
 - 0s - loss: 0.2097 - accuracy: 0.9216
Epoch 167/500
 - 0s - loss: 0.1246 - accuracy: 0.9466
Epoch 168/500
 - 0s - loss: 0.0708 - accuracy: 0.9766
Epoch 169/500
 - 0s - loss: 0.0335 - accuracy: 0.9942
Epoch 170/500
 - 0s - loss: 0.0225 - accuracy: 0.9967
Epoch 171/500
 - 0s - loss: 0.0213 - accuracy: 0.9942
Epoch 172/500
 - 0s - loss: 0.0285 - accuracy: 0.9933
Epoch 173/500
 - 0s - loss: 

Epoch 307/500
 - 0s - loss: 0.2036 - accuracy: 0.9283
Epoch 308/500
 - 0s - loss: 0.0941 - accuracy: 0.9683
Epoch 309/500
 - 0s - loss: 0.0840 - accuracy: 0.9867
Epoch 310/500
 - 0s - loss: 0.0707 - accuracy: 0.9817
Epoch 311/500
 - 0s - loss: 0.0586 - accuracy: 0.9817
Epoch 312/500
 - 0s - loss: 0.0427 - accuracy: 0.9908
Epoch 313/500
 - 0s - loss: 0.0315 - accuracy: 0.9925
Epoch 314/500
 - 0s - loss: 0.0197 - accuracy: 0.9958
Epoch 315/500
 - 0s - loss: 0.0110 - accuracy: 0.9967
Epoch 316/500
 - 0s - loss: 0.0081 - accuracy: 0.9975
Epoch 317/500
 - 0s - loss: 0.0074 - accuracy: 0.9983
Epoch 318/500
 - 0s - loss: 0.0076 - accuracy: 0.9975
Epoch 319/500
 - 0s - loss: 0.0101 - accuracy: 0.9967
Epoch 320/500
 - 0s - loss: 0.0181 - accuracy: 0.9950
Epoch 321/500
 - 0s - loss: 0.0083 - accuracy: 0.9967
Epoch 322/500
 - 0s - loss: 0.0062 - accuracy: 0.9983
Epoch 323/500
 - 0s - loss: 0.0054 - accuracy: 0.9975
Epoch 324/500
 - 0s - loss: 0.0068 - accuracy: 0.9975
Epoch 325/500
 - 0s - loss: 

Epoch 459/500
 - 0s - loss: 0.0056 - accuracy: 0.9983
Epoch 460/500
 - 0s - loss: 0.0045 - accuracy: 0.9983
Epoch 461/500
 - 0s - loss: 0.0044 - accuracy: 0.9983
Epoch 462/500
 - 0s - loss: 0.0049 - accuracy: 0.9975
Epoch 463/500
 - 0s - loss: 0.0044 - accuracy: 0.9975
Epoch 464/500
 - 0s - loss: 0.0045 - accuracy: 0.9983
Epoch 465/500
 - 0s - loss: 0.0051 - accuracy: 0.9983
Epoch 466/500
 - 0s - loss: 0.0049 - accuracy: 0.9983
Epoch 467/500
 - 0s - loss: 0.0110 - accuracy: 0.9967
Epoch 468/500
 - 0s - loss: 0.1336 - accuracy: 0.9566
Epoch 469/500
 - 0s - loss: 0.2254 - accuracy: 0.9224
Epoch 470/500
 - 0s - loss: 0.1944 - accuracy: 0.9324
Epoch 471/500
 - 0s - loss: 0.0926 - accuracy: 0.9708
Epoch 472/500
 - 0s - loss: 0.0607 - accuracy: 0.9791
Epoch 473/500
 - 0s - loss: 0.0614 - accuracy: 0.9817
Epoch 474/500
 - 0s - loss: 0.0469 - accuracy: 0.9883
Epoch 475/500
 - 0s - loss: 0.0380 - accuracy: 0.9892
Epoch 476/500
 - 0s - loss: 0.0115 - accuracy: 0.9975
Epoch 477/500
 - 0s - loss: 

<keras.callbacks.callbacks.History at 0x1a34ad3fd0>

## Quantify our Trained Model

In [19]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 4.0753144264221195, Accuracy: 0.6449999809265137


## Make Predictions

In [20]:
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [21]:
print(f"Predicted classes: {list(y_train_categorical[:5])}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: [array([0., 0., 1., 0., 0., 0.], dtype=float32), array([0., 0., 1., 0., 0., 0.], dtype=float32), array([0., 0., 1., 0., 0., 0.], dtype=float32), array([0., 0., 0., 1., 0., 0.], dtype=float32), array([0., 0., 0., 1., 0., 0.], dtype=float32)]
Actual Labels: [6, 5, 8, 4, 5]


In [22]:
# Save the model
model.save('redwinequality_model_trained.h5')

In [23]:
# Load the model
from keras.models import load_model
survey_model = load_model('redwinequality_model_trained.h5')

In [24]:
#Evaluate
model_loss, model_accuracy = survey_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2
)

print(f'Loaded Model Loss: {model_loss}, Accuracy: {model_accuracy}')

Loaded Model Loss: 1.2307132434844972, Accuracy: 0.6000000238418579
