# Wine quality prediction with TensorFlow. Part 2

# Machine learning

Code Created by Luis Enrique Acevedo Galicia

Date: 2019-10-03

Here, I present a simple and easy way to create a machine learning algorithm, which is able to predict wine quality. There are 11 inputs and one target. In this part, only the Machine learnig analysis (RNN) with Tensorflow is presented. The prepicessing data can be found in part 1.

The data set was obtained from https://www.kaggle.com/vishalyo990/prediction-of-quality-of-wine/data

# The Libraries

In [2]:
import numpy as np
import tensorflow as tf

# Batch data

In [3]:
#Divide all data in batches
class Batch_data_reader():
    def __init__(self,DataSet, size_batch = None):
        npz = np.load('WINE_data_{0}.npz'.format(DataSet))
        self.inputs = npz['inputs'].astype(np.float)
        self.targets = npz['targets'].astype(np.int)
        #count the batch
        if size_batch is None:
            self.size_batch = self.inputs.shape[0]
        else:
            self.size_batch = size_batch
        self.batch_current = 0
        self.batch_count = self.inputs.shape[0]//self.size_batch
    #load next batch
    def __next__(self):
        if self.batch_current >= self.batch_count:
            self.batch_current = 0
            raise StopIteration()
        slice_batch = slice(self.batch_current*self.size_batch, (self.batch_current+1)*self.size_batch)
        batch_inputs = self.inputs[slice_batch]
        batch_targets = self.targets[slice_batch]
        self.batch_current += 1
        

        
        return batch_inputs, batch_targets
    #this part allows the class to iterate 
    def __iter__(self):
        return self

# Machine learning code

In [4]:
#define inputs and targets size
input_size = 11
output_size = 1
# hidden layer size
hidden_layer_size = 50

# Reset the default graph
tf.reset_default_graph()

# Placeholders
inputs = tf.placeholder(tf.float32, [None, input_size])
targets = tf.placeholder(tf.float32, [None, output_size])

# The net will contain 3 hidden layers


weights_1 = tf.get_variable("weights_1", [input_size, hidden_layer_size])
biases_1 = tf.get_variable("biases_1", [hidden_layer_size])
outputs_1 = tf.nn.relu(tf.matmul(inputs, weights_1) + biases_1)

weights_2 = tf.get_variable("weights_2", [hidden_layer_size, hidden_layer_size])
biases_2 = tf.get_variable("biases_2", [hidden_layer_size])
outputs_2 = tf.nn.relu(tf.matmul(outputs_1, weights_2) + biases_2)

weights_3 = tf.get_variable("weights_3", [hidden_layer_size, hidden_layer_size])
biases_3 = tf.get_variable("biases_3", [hidden_layer_size])
outputs_3 = tf.nn.tanh(tf.matmul(outputs_2, weights_3) + biases_3)

weights_4 = tf.get_variable("weights_4", [hidden_layer_size, output_size])
biases_4 = tf.get_variable("biases_4", [output_size])
# The softmax activation into the loss
outputs = tf.matmul(outputs_3, weights_4) + biases_4



# Use the softmax cross entropy loss with logits
#loss = tf.nn.softmax_cross_entropy_with_logits(logits=outputs, labels=targets)
#mean_loss = tf.reduce_mean(loss)
mean_loss = tf.reduce_mean(tf.squared_difference(outputs, targets))


#(Vtarget - Voutput)/Vtarget X 100
out_equals_target = tf.abs(tf.math.divide(tf.subtract(targets,outputs), targets))
#out_equals_target = tf.equal(tf.argmax(outputs, 1), tf.argmax(targets, 1))
accuracy = tf.reduce_mean(out_equals_target)
#accuracy = tf.reduce_mean(tf.cast(out_equals_target, tf.float32))



# Optimize with Adam
optimize = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(mean_loss)

# Create a session
sess = tf.InteractiveSession()

# Initialize the variables
initializer = tf.global_variables_initializer()
sess.run(initializer)

# Choose the batch size
batch_size = 3

# Set early stopping mechanisms
max_epochs = 100
prev_validation_loss = 9999999.


train_data = Batch_data_reader('Train', batch_size)
validation_data = Batch_data_reader('Valid')

# Create the loop for epochs 
for epoch_counter in range(max_epochs):
    
    # initialize the epoch loss to 0
    curr_epoch_loss = 0.
    
    # Iterations over the batched training data 
    for input_batch, target_batch in train_data:
        _, batch_loss = sess.run([optimize, mean_loss], 
            feed_dict={inputs: input_batch, targets: target_batch})
        
        #Get record of the batch loss
        curr_epoch_loss += batch_loss
    
    # The mean curr_epoch_loss
    
    curr_epoch_loss /= train_data.batch_count
    
    #Validation:
    
    # Validation loss and accuracy for the epoch to zero
    validation_loss = 0.
    validation_accuracy = 0.
    
    # In this case the batch size is equal to the data set
    
    for input_batch, target_batch in validation_data:
        validation_loss, validation_accuracy = sess.run([mean_loss, accuracy],
            feed_dict={inputs: input_batch, targets: target_batch})
    
    # Print results and statistics for the current epoch
    print('Epoch '+str(epoch_counter+1)+
          '. Training loss: '+'{0:.3f}'.format(curr_epoch_loss)+
          '. Validation loss: '+'{0:.3f}'.format(validation_loss)+
          '. Validation accuracy: '+'{0:.2f}'.format((1-validation_accuracy )* 100.)+'%')
    
    # An early stop (only if validation loss is increasing)
    if validation_loss > prev_validation_loss:
        break
        
    #keep epochs validation loss.
    prev_validation_loss = validation_loss
    
print('End of training.')
#print(curr_loss)
TF_w1=sess.run(weights_1)
TF_b1=sess.run(biases_1)
TF_w2=sess.run(weights_2)
TF_b2=sess.run(biases_2)
TF_w3=sess.run(weights_3)
TF_b3=sess.run(biases_3)
TF_w4=sess.run(weights_4)
TF_b4=sess.run(biases_4)

Epoch 1. Training loss: 10.383. Validation loss: 0.640. Validation accuracy: 88.83%
Epoch 2. Training loss: 0.608. Validation loss: 0.533. Validation accuracy: 89.66%
Epoch 3. Training loss: 0.589. Validation loss: 0.525. Validation accuracy: 89.81%
Epoch 4. Training loss: 0.584. Validation loss: 0.518. Validation accuracy: 89.90%
Epoch 5. Training loss: 0.577. Validation loss: 0.509. Validation accuracy: 90.04%
Epoch 6. Training loss: 0.570. Validation loss: 0.501. Validation accuracy: 90.22%
Epoch 7. Training loss: 0.564. Validation loss: 0.493. Validation accuracy: 90.34%
Epoch 8. Training loss: 0.558. Validation loss: 0.485. Validation accuracy: 90.44%
Epoch 9. Training loss: 0.552. Validation loss: 0.477. Validation accuracy: 90.54%
Epoch 10. Training loss: 0.546. Validation loss: 0.468. Validation accuracy: 90.65%
Epoch 11. Training loss: 0.539. Validation loss: 0.459. Validation accuracy: 90.77%
Epoch 12. Training loss: 0.530. Validation loss: 0.449. Validation accuracy: 90.90%


## Test the model

In [5]:
# Load the test data in batches
test_data = Batch_data_reader('Test')
#Obtain the test accuracy
for inputs_batch, targets_batch in test_data:
    test_accuracy = sess.run([accuracy],
                     feed_dict={inputs: inputs_batch, targets: targets_batch})

#convert value of first position to get the percentage of accuracy
test_accuracy_percent = (1-test_accuracy[0]) * 100.

# Print accuracy
print('Test accuracy: '+'{0:.5f}'.format(test_accuracy_percent)+'%')

Test accuracy: 90.15363%


# Test your data

In [6]:
input1 = []
n1 = input("fixed acidity :")
input1.append(float(n1))
n2 = input("volatile acidity :")
input1.append(float(n2))
n3 = input("citric acid :")
input1.append(float(n3))
n4 = input("residual sugar :")
input1.append(float(n4))
n5 = input("chlorides :")
input1.append(float(n5))
n6 = input("free sulfur dioxide :")
input1.append(float(n6))
n7 = input("total sulfur dioxide :")
input1.append(float(n7))
n8 = input("density :")
input1.append(float(n8))
n9 = input("pH :")
input1.append(float(n9))
n10 = input("sulphates :")
input1.append(float(n10))
n11 = input("alcohol :")
input1.append(float(n11))
input1=np.mat(input1)
input1 = input1.tolist()
#input1=[[7.8,0.76,0.04,2.3,0.092,15,54,0.997,3.26,0.65,9.8]]
Out1 = tf.nn.relu(tf.matmul(input1, TF_w1) + TF_b1)
out2 = tf.nn.relu(tf.matmul(Out1, TF_w2) + TF_b2)
out3 = tf.nn.tanh(tf.matmul(out2, TF_w3) + TF_b3)
out4 = tf.round(tf.matmul(out3, TF_w4) + TF_b4)
RT=sess.run(out4)
print('The quality of your wine is ', RT)

fixed acidity :7.8
volatile acidity :0.76
citric acid :0.04
residual sugar :2.3
chlorides :0.092
free sulfur dioxide :15
total sulfur dioxide :54
density :0.997
pH :3.26
sulphates :0.65
alcohol :9.8
The quality of your wine is  [[5.]]
