Required packages

In [3]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Acquire Data

Use `pandas` to read the iris dataset stored as a CSV file

In [4]:
data = pd.read_csv('iris.data', header=None)
data.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


# Prepare Data

Provide more meaningful column names

In [5]:
data.columns = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']

In [6]:
data.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [7]:
data.Species.unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

Use 'one hot' encoding to give the labels nominal instead of ordinal values

In [31]:
one_hot = pd.get_dummies(data)
idx = [0, 1, 50, 51, 100, 101]
one_hot.iloc[idx]

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Species_Iris-setosa,Species_Iris-versicolor,Species_Iris-virginica
0,5.1,3.5,1.4,0.2,1,0,0
1,4.9,3.0,1.4,0.2,1,0,0
50,7.0,3.2,4.7,1.4,0,1,0
51,6.4,3.2,4.5,1.5,0,1,0
100,6.3,3.3,6.0,2.5,0,0,1
101,5.8,2.7,5.1,1.9,0,0,1


# Split Data

Let scikit-learn take care of this

In [32]:
X = one_hot[['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']] # features
y = one_hot[['Species_Iris-setosa', 'Species_Iris-versicolor', 'Species_Iris-virginica']] # labels

In [33]:
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=.3)

In [34]:
len(train_X), len(train_y)

(105, 105)

In [35]:
train_X.head(), train_y.head()

(    SepalLength  SepalWidth  PetalLength  PetalWidth
 52          6.9         3.1          4.9         1.5
 58          6.6         2.9          4.6         1.3
 75          6.6         3.0          4.4         1.4
 11          4.8         3.4          1.6         0.2
 67          5.8         2.7          4.1         1.0,
     Species_Iris-setosa  Species_Iris-versicolor  Species_Iris-virginica
 52                    0                        1                       0
 58                    0                        1                       0
 75                    0                        1                       0
 11                    1                        0                       0
 67                    0                        1                       0)

In [36]:
train_X = np.array(train_X).astype(np.float32)
test_X = np.array(test_X).astype(np.float32)
train_y = np.array(train_y).astype(np.float32)
test_y = np.array(test_y).astype(np.float32)

# Train Model

Create a placeholder for the features

In [12]:
x = tf.placeholder(tf.float32, [None, 4])

Create a placeholder for the correct labels

In [13]:
y_hat = tf.placeholder(tf.float32, [None, 3])

Create a variable for the weights (can init with zero for such a small dataset)

In [14]:
W = tf.Variable(tf.zeros([4, 3]))

Create a varible for the bias (again using zero)

In [15]:
b = tf.Variable(tf.zeros([3]))

Use softmax (multinominal logistic regression) classifier.

*(just trust me, it works)*

In [16]:
softmax = tf.nn.softmax(tf.matmul(x, W) + b)

Cost function (cross entropy)

*(keep trusting me)*

In [17]:
cost_fn = tf.reduce_mean(-tf.reduce_sum(y_hat * tf.log(softmax), reduction_indices=[1]))

Optimize the cost function using gradient descent

*(take a leap of faith)*

In [18]:
trainer = tf.train.GradientDescentOptimizer(0.05).minimize(cost_fn)

In [19]:
session = tf.Session()
session.run(tf.global_variables_initializer())
for _ in range(1000):
    session.run(trainer, {x: train_X, y_hat: train_y})

# Test Model

In [20]:
predicted = tf.equal(tf.argmax(softmax, 1), tf.argmax(y_hat, 1))
score = tf.reduce_mean(tf.cast(predicted, tf.float32))

# Evaluate Model

In [21]:
print(session.run(score, {x: test_X, y_hat: test_y}))

0.977778


In [22]:
sanity_check = one_hot.iloc[[0,50,100]]
sanity_check

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Species_Iris-setosa,Species_Iris-versicolor,Species_Iris-virginica
0,5.1,3.5,1.4,0.2,1,0,0
50,7.0,3.2,4.7,1.4,0,1,0
100,6.3,3.3,6.0,2.5,0,0,1


In [23]:
predictions = session.run(softmax, {x: np.array(sanity_check[sanity_check.columns[:4]]).astype(np.float32)})
labels = one_hot.columns[4:]
for prediction in predictions:
    for probability in enumerate(prediction):
        print("Confidence in {0} is {1}%".format(labels[probability[0]][len('Species_'):], np.around(probability[1]*100,4)))
    print('-'*20)
    
    

Confidence in Iris-setosa is 98.2125%
Confidence in Iris-versicolor is 1.7874%
Confidence in Iris-virginica is 0.0001%
--------------------
Confidence in Iris-setosa is 1.7779%
Confidence in Iris-versicolor is 92.8737%
Confidence in Iris-virginica is 5.3484%
--------------------
Confidence in Iris-setosa is 0.0017%
Confidence in Iris-versicolor is 2.0498%
Confidence in Iris-virginica is 97.9484%
--------------------
