# Wine Classification

In this project you'll be analyzing another set of data from the [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/). 

Your goal is to classify wines grown in the same region of Italy, but from different cultivars (cultivated varities) of grape. The data consists of a class (1, 2, or 3) followed by 13 features as described in the [wine.names](data/wine.names) file. 

Follow the directions in each cell to complete the project.

In [None]:
# ======================================|
#  DO NOT CHANGE ANYTHING IN THIS CELL! |
# ======================================|

%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import pandas as pd

In [None]:
# Load the training data from 'data/wine_train.csv' into
# a pandas dataframe.
wine=pd.read_csv('data/wine_train.csv')

wine_train = pd.get_dummies(pd.read_csv('data/wine_train.csv' ,dtype={'Class':'category'}))
wine_test = pd.get_dummies(pd.read_csv('data/wine_test.csv',dtype={'Class':'category'}))

In [33]:
# Display the first few rows from the dataframe
# to ensure proper loading
wine.head()

Unnamed: 0,Class,Alcohol,Malic Acid,Ash,Alkalinity of Ash,Magnesium,Total Phenols,Flavanoids,Nonflavanoid Phenols,Proanthocyanins,Color Intensity,Hue,OD280/OD315 of Diluted Wine,Proline
0,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
1,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
2,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
3,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735
4,1,14.2,1.76,2.45,15.2,112,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450


In [34]:
# Use the dataframe.count method to ensure
# all 148 records were loaded
wine.count()

Class                          148
Alcohol                        148
Malic Acid                     148
Ash                            148
Alkalinity of Ash              148
Magnesium                      148
Total Phenols                  148
Flavanoids                     148
Nonflavanoid Phenols           148
Proanthocyanins                148
Color Intensity                148
Hue                            148
OD280/OD315 of Diluted Wine    148
Proline                        148
dtype: int64

In [57]:
# Declare placeholders and variables for your TensorFlow model here (if applicable)
learn_rate = .0005
batch_size = 13
epochs = 50

wine_train = pd.read_csv('data/wine_train.csv', dtype={'Class': 'category'})
wine_test = pd.read_csv('data/wine_test.csv', dtype={'Class': 'category'})
test_features = wine_test.as_matrix()[:,:13]
test_targets = pd.get_dummies(wine_test.Class).as_matrix()

x = tf.placeholder(tf.float32, [None, 13])

y_ = tf.placeholder(tf.float32, [None, 3])

W = tf.Variable(tf.truncated_normal([13, 3], stddev=0.1))

b = tf.Variable(tf.zeros([3]))  

y = tf.nn.softmax(tf.matmul(x, W) + b)

cost = tf.reduce_mean(tf.squared_difference(y_, y))

train_step = tf.train.GradientDescentOptimizer(learn_rate).minimize(cost)




In [58]:
# Define your TensorFlow or scikit-learn model here
y = tf.nn.softmax(tf.matmul(x, W) + b)

In [59]:
# Train the model in this cell
with tf.Session() as sess:

    # Initialize all of the Variables
    sess.run(tf.global_variables_initializer())
    
    # Operation for saving all variables
    saver = tf.train.Saver()
    
    # Training loop
    for epoch in range(epochs):
        avg_cost = 0.
        num_batches = int(wine_train.shape[0]/batch_size)
        
        for _ in range(num_batches):
            # Randomly select <batch_size> samples from the set (with replacement)
            batch = wine_train.sample(n=batch_size)

            # Capture the x and y_ data
            batch_features = batch.as_matrix()[:,:13]

            # get_dummies turns our categorical data into one-hot vectors
            batch_targets = pd.get_dummies(batch.Class).as_matrix()

            # Run the training step using batch_features and batch_targets
            # as x and y_, respectively and capture the cost at each step
            _, c = sess.run([train_step, cost], feed_dict={x:batch_features, y_:batch_targets})

            # Calculate the average cost for the epoch
            avg_cost += c/num_batches

        # Print epoch results
        print("Epoch %04d cost: %s" % (epoch + 1, "{:.4f}".format(avg_cost)))
    
    # If our model's most likely classification is equal to the one-hot index
    # add True to our correct_prediction tensor
    correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))

    # Cast the boolean variables as floats and take the mean.
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # Calculate the percentage of correct answers using the test data
    score = sess.run(accuracy, feed_dict={x: test_features, y_: test_targets}) * 100
    print("\nThe model correctly identified %s of the test data." % "{:.2f}%".format(score))
    
    
    

Epoch 0001 cost: 0.2281
Epoch 0002 cost: 0.2168
Epoch 0003 cost: 0.2179
Epoch 0004 cost: 0.2322
Epoch 0005 cost: 0.2187
Epoch 0006 cost: 0.2356
Epoch 0007 cost: 0.2349
Epoch 0008 cost: 0.2295
Epoch 0009 cost: 0.2147
Epoch 0010 cost: 0.2217
Epoch 0011 cost: 0.2150
Epoch 0012 cost: 0.2061
Epoch 0013 cost: 0.2300
Epoch 0014 cost: 0.2093
Epoch 0015 cost: 0.2113
Epoch 0016 cost: 0.2487
Epoch 0017 cost: 0.2417
Epoch 0018 cost: 0.2278
Epoch 0019 cost: 0.2307
Epoch 0020 cost: 0.2019
Epoch 0021 cost: 0.2287
Epoch 0022 cost: 0.1888
Epoch 0023 cost: 0.2272
Epoch 0024 cost: 0.1979
Epoch 0025 cost: 0.1732
Epoch 0026 cost: 0.2087
Epoch 0027 cost: 0.2187
Epoch 0028 cost: 0.2237
Epoch 0029 cost: 0.2290
Epoch 0030 cost: 0.1971
Epoch 0031 cost: 0.2073
Epoch 0032 cost: 0.2031
Epoch 0033 cost: 0.2028
Epoch 0034 cost: 0.2105
Epoch 0035 cost: 0.2235
Epoch 0036 cost: 0.2041
Epoch 0037 cost: 0.2305
Epoch 0038 cost: 0.2438
Epoch 0039 cost: 0.2190
Epoch 0040 cost: 0.2020
Epoch 0041 cost: 0.2029
Epoch 0042 cost:

In [61]:
# Run the trained model on 'data/wine_test.csv' here.
# Be sure to print out the accuracy!
session.run(correct_prediction, accuarcy={x:'data/wine_test.cvs'})