In [21]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

## Importing necessary modules required

In [22]:
pd.options.display.max_rows = None
pd.options.display.max_columns = None

## Maximising the rows and columns view in jupyter notebook.

In [23]:
data = pd.read_csv('winequality-red.csv')
data

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
5,7.4,0.66,0.0,1.8,0.075,13.0,40.0,0.9978,3.51,0.56,9.4,5
6,7.9,0.6,0.06,1.6,0.069,15.0,59.0,0.9964,3.3,0.46,9.4,5
7,7.3,0.65,0.0,1.2,0.065,15.0,21.0,0.9946,3.39,0.47,10.0,7
8,7.8,0.58,0.02,2.0,0.073,9.0,18.0,0.9968,3.36,0.57,9.5,7
9,7.5,0.5,0.36,6.1,0.071,17.0,102.0,0.9978,3.35,0.8,10.5,5


In [24]:
df = data.copy()
df

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
5,7.4,0.66,0.0,1.8,0.075,13.0,40.0,0.9978,3.51,0.56,9.4,5
6,7.9,0.6,0.06,1.6,0.069,15.0,59.0,0.9964,3.3,0.46,9.4,5
7,7.3,0.65,0.0,1.2,0.065,15.0,21.0,0.9946,3.39,0.47,10.0,7
8,7.8,0.58,0.02,2.0,0.073,9.0,18.0,0.9968,3.36,0.57,9.5,7
9,7.5,0.5,0.36,6.1,0.071,17.0,102.0,0.9978,3.35,0.8,10.5,5


In [25]:
df['quality'].median()

6.0

In [26]:
targets = np.where(df['quality'] > df['quality'].median() , 1 , 0 )
inputs = df.drop(['quality'], axis =1)

In [27]:
scaled_inputs = preprocessing.scale(inputs)

## Scaling the inputs after dropping the unecessary features which might affect accuracy of model.

In [28]:
shuffled_indices = np.arange(inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_inputs = scaled_inputs[shuffled_indices]
shuffled_targets = targets[shuffled_indices]

## Shuffling of data is done for proper training of data as this shuffled data will be used for training model it should have
##   all possible outcomes shown to model.

In [29]:
inputs_count = inputs.shape[0]
unscaled_inputs_count = int(0.8*inputs_count)
validation_count = int(0.1*unscaled_inputs_count)
test_count = inputs_count - unscaled_inputs_count - validation_count

train_inputs = shuffled_inputs[:unscaled_inputs_count]
train_targets = shuffled_targets[:unscaled_inputs_count]

validation_inputs = shuffled_inputs[unscaled_inputs_count:unscaled_inputs_count+validation_count]
validation_targets = shuffled_targets[unscaled_inputs_count:unscaled_inputs_count+validation_count]

test_inputs = shuffled_inputs[unscaled_inputs_count+validation_count:]
test_targets = shuffled_targets[unscaled_inputs_count+validation_count:]

## The data is divides into a Training , validation and Testing stages.

In [30]:
import tensorflow as tf

In [31]:
input_s = train_inputs.shape[1]
output_s= 2
hidden_s = 50

## Here the input elements , output elements and the hidden layer i.e. the neural network size is specified.

model = tf.keras.Sequential([
#                           
                            tf.keras.layers.Dense(hidden_s,activation='relu'),
                            tf.keras.layers.Dense(hidden_s,activation='relu'),    
                            tf.keras.layers.Dense(output_s,activation='softmax'),
    
    
])

## Model is given with activation function , output function and in this sequential is same as y = ax + b.

model.compile(optimizer='adam', loss ='sparse_categorical_crossentropy', metrics=['accuracy'])

## Model is compiled using adam and loss is calculated using sparse_categorical_crossentropy.

batch_s = 400
max_epochs = 50
early_stopping= tf.keras.callbacks.EarlyStopping(patience=2)

## Here the batch size and iteration(epochs) are specified and early stopping is used as we dont want our model to overfit the data.

model.fit(train_inputs,train_targets,batch_size = batch_s,epochs = max_epochs ,callbacks=[early_stopping],validation_data = (validation_inputs,validation_targets),verbose=2)

## The model is trained here and the required data is also given.

Train on 1279 samples, validate on 127 samples
Epoch 1/50
1279/1279 - 1s - loss: 0.6840 - accuracy: 0.5551 - val_loss: 0.5822 - val_accuracy: 0.7874
Epoch 2/50
1279/1279 - 0s - loss: 0.5789 - accuracy: 0.7826 - val_loss: 0.4958 - val_accuracy: 0.9134
Epoch 3/50
1279/1279 - 0s - loss: 0.5129 - accuracy: 0.8483 - val_loss: 0.4413 - val_accuracy: 0.9134
Epoch 4/50
1279/1279 - 0s - loss: 0.4751 - accuracy: 0.8577 - val_loss: 0.4054 - val_accuracy: 0.8976
Epoch 5/50
1279/1279 - 0s - loss: 0.4508 - accuracy: 0.8561 - val_loss: 0.3805 - val_accuracy: 0.8976
Epoch 6/50
1279/1279 - 0s - loss: 0.4314 - accuracy: 0.8561 - val_loss: 0.3620 - val_accuracy: 0.8976
Epoch 7/50
1279/1279 - 0s - loss: 0.4134 - accuracy: 0.8569 - val_loss: 0.3467 - val_accuracy: 0.8976
Epoch 8/50
1279/1279 - 0s - loss: 0.3955 - accuracy: 0.8577 - val_loss: 0.3340 - val_accuracy: 0.8976
Epoch 9/50
1279/1279 - 0s - loss: 0.3780 - accuracy: 0.8593 - val_loss: 0.3237 - val_accuracy: 0.8898
Epoch 10/50
1279/1279 - 0s - loss: 

<tensorflow.python.keras.callbacks.History at 0x1c844615048>

In [32]:
loss , accuracy = model.evaluate(test_inputs,test_targets)

## Here loss and accuracy of model is evaluated.



In [33]:
print('\n Test Loss : {0: .2f}. Test Accuracy : {1: .2f}'.format(loss,accuracy*100.))


 Test Loss :  0.20. Test Accuracy :  94.30
