In [1]:
import numpy as np
import pandas as pd

from tensorflow import keras
from tensorflow.keras import layers, callbacks

from sklearn.model_selection import train_test_split

from sklearn import metrics

In [2]:
data = pd.read_csv('winequality-red.csv')
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


## Exploratory Data Analysis

In [3]:
data['quality'].unique()

array([5, 6, 7, 4, 8, 3], dtype=int64)

In [4]:
y = data['quality']
y

0       5
1       5
2       5
3       6
4       5
       ..
1594    5
1595    6
1596    6
1597    5
1598    6
Name: quality, Length: 1599, dtype: int64

In [5]:
X = data.drop('quality', axis=1)
X.head(3)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8


In [6]:
X.shape

(1599, 11)

## Data Preprocessing
+ Splitting into the Training and Validation Sets

In [7]:
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = 0.23, random_state = 0)

In [8]:
df_train = data.sample(frac=0.8, random_state=0)
df_valid = data.drop(df_train.index)
display(df_train.head())

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
1109,10.8,0.47,0.43,2.1,0.171,27.0,66.0,0.9982,3.17,0.76,10.8,6
1032,8.1,0.82,0.0,4.1,0.095,5.0,14.0,0.99854,3.36,0.53,9.6,5
1002,9.1,0.29,0.33,2.05,0.063,13.0,27.0,0.99516,3.26,0.84,11.7,7
487,10.2,0.645,0.36,1.8,0.053,5.0,14.0,0.9982,3.17,0.42,10.0,6
979,12.2,0.45,0.49,1.4,0.075,3.0,6.0,0.9969,3.13,0.63,10.4,5


In [9]:
df_train.max(axis=0)

fixed acidity            15.9000
volatile acidity          1.5800
citric acid               0.7900
residual sugar           13.9000
chlorides                 0.6110
free sulfur dioxide      72.0000
total sulfur dioxide    289.0000
density                   1.0032
pH                        4.0100
sulphates                 1.9800
alcohol                  14.9000
quality                   8.0000
dtype: float64

In [None]:
max_ = X_train.max(axis=0)
max_

## Create a Fully Connected Neural Network

In [None]:
model = keras.Sequential([
    layers.Dense(16, activation='relu', input_shape=[11]),
    layers.Dense(16, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1)
])

How does the Network learn?

## Using the Optimizer and Loss Functions
These two functions take charge of the Model Training Process

In [None]:
model.compile(optimizer='adam', loss='mae')

### Early Stopping
 To prevent underfitting and overfitting

In [None]:
early_stopping = callbacks.EarlyStopping(
    min_delta = 0.001,
    patience = 20,
    restore_best_weights = True
)

In [None]:
model_data = model.fit(X_train, y_train,
          validation_data = (X_valid, y_valid),
          batch_size = 128,
          epochs = 300,
          callbacks = [early_stopping]
         )

In [None]:
preds = model.predict(X_valid)
score = metrics.mean_absolute_error(preds, y_valid)
print("MAE >> ", score)

In [None]:
preds[:10]

In [None]:
for pair in zip(y_valid[:10], preds[:10]):
    print(f"True Target: {pair[0]}\t\tPrediction >> {pair[1]}")

In [None]:
history = pd.DataFrame(model_data.history)
history.head()

In [None]:
history.plot()