# Classification of Iris

## Package imports

In [1]:
# For building neural networks.
import keras as kr

# For interacting with data sets.
import pandas as pd

# For encoding categorical variables.
import sklearn.preprocessing as pre

# For splitting into training and test sets.
import sklearn.model_selection as mod

Using TensorFlow backend.


## Load data

In [2]:
# Load the iris data set from a URL.
df = pd.read_csv("https://raw.githubusercontent.com/ianmcloughlin/datasets/master/iris.csv")

In [3]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


## Inputs

In [4]:
# Separate the inputs from the rest of the variables.
inputs = df[['petal_length', 'petal_width', 'sepal_length', 'sepal_width']]

In [5]:
inputs

Unnamed: 0,petal_length,petal_width,sepal_length,sepal_width
0,1.4,0.2,5.1,3.5
1,1.4,0.2,4.9,3.0
2,1.3,0.2,4.7,3.2
3,1.5,0.2,4.6,3.1
4,1.4,0.2,5.0,3.6
...,...,...,...,...
145,5.2,2.3,6.7,3.0
146,5.0,1.9,6.3,2.5
147,5.2,2.0,6.5,3.0
148,5.4,2.3,6.2,3.4


## Encoded outputs

$$
\begin{align*}
  setosa     & \rightarrow  [1,0,0] \\
  versicolor & \rightarrow  [0,1,0] \\
  virginica  & \rightarrow  [0,0,1]
\end{align*}
$$

In [6]:
# Encode the classes as above.
encoder = pre.LabelBinarizer()
encoder.fit(df['class'])
outputs = encoder.transform(df['class'])

outputs

array([[1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0,

## Idea

The neural network will turn four floating point inputs into three "floating point" outputs.

$$ [5.1, 3.5, 1.4, 0.2] \rightarrow [0.8, 0.19, 0.01] $$

$$ [5.1, 3.5, 1.4, 0.2] \rightarrow [1, 0, 0] $$

## Build model

In [7]:
# Start a neural network, building it by layers.
model = kr.models.Sequential()

# Add a hidden layer with x neurons and an input layer with 4.
model.add(kr.layers.Dense(units=30, activation='relu', input_dim=4))
# Add a three neuron output layer.
model.add(kr.layers.Dense(units=3, activation='softmax'))

# Build the graph.
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])








## Split

In [8]:
# Split the inputs and outputs into training and test sets.
inputs_train, inputs_test, outputs_train, outputs_test = mod.train_test_split(inputs, outputs, test_size=0.2)

In [9]:
inputs_test.iloc[0]

petal_length    4.8
petal_width     1.4
sepal_length    6.8
sepal_width     2.8
Name: 76, dtype: float64

In [10]:
model.predict(inputs_test.as_matrix()[0:1])








  """Entry point for launching an IPython kernel.





array([[0.02864362, 0.96718645, 0.00416994]], dtype=float32)

## Train

In [11]:
# Train the neural network.
model.fit(inputs_train, outputs_train, epochs=15, batch_size=10)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x1c4c4343248>

## Predict

In [12]:
model.predict(inputs_test.as_matrix()[0:1])

  """Entry point for launching an IPython kernel.


array([[0.07707678, 0.5562058 , 0.36671743]], dtype=float32)

In [13]:
# Have the network predict the classes of the test inputs.
predictions = model.predict(inputs_test)
predictions_labels = encoder.inverse_transform(predictions)
predictions_labels

array(['versicolor', 'virginica', 'setosa', 'setosa', 'setosa',
       'virginica', 'setosa', 'virginica', 'versicolor', 'versicolor',
       'virginica', 'versicolor', 'virginica', 'setosa', 'setosa',
       'versicolor', 'setosa', 'setosa', 'versicolor', 'virginica',
       'setosa', 'versicolor', 'versicolor', 'setosa', 'setosa',
       'virginica', 'virginica', 'versicolor', 'virginica', 'virginica'],
      dtype='<U10')

## Evaluate

In [14]:
# Compare the predictions to the actual classes.
predictions_labels == encoder.inverse_transform(outputs_test)

array([ True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True])

In [15]:
(predictions_labels == encoder.inverse_transform(outputs_test)).sum()

29

## Whitening

In [16]:
import sklearn.decomposition as dec

In [17]:
pca = dec.PCA(n_components=4, whiten=True)
pca.fit(inputs_train)
inputs_train_white = pd.DataFrame(pca.transform(inputs_train), columns=inputs_train.columns)
inputs_train_white

Unnamed: 0,petal_length,petal_width,sepal_length,sepal_width
0,0.387221,0.484604,1.353335,0.761078
1,0.394136,-0.687376,-2.114471,-0.958366
2,0.925573,0.351229,2.526484,-0.225238
3,0.675388,-0.325813,-0.203693,-0.997615
4,0.221818,-1.285400,-0.049826,1.718668
...,...,...,...,...
115,0.015610,-1.118584,-0.788006,0.266962
116,-1.387309,0.580464,0.304557,1.042104
117,1.109262,0.787130,0.996607,0.446290
118,0.611141,-0.274729,0.178736,-0.899852


In [18]:
# Start a neural network, building it by layers.
model = kr.models.Sequential()

# Add a hidden layer with x neurons and an input layer with 4.
model.add(kr.layers.Dense(units=30, activation='relu', input_dim=4))
# Add a three neuron output layer.
model.add(kr.layers.Dense(units=3, activation='softmax'))

# Build the graph.
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])

In [19]:
# Train the neural network.
model.fit(inputs_train_white, outputs_train, epochs=15, batch_size=10)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x1c4c573b888>

In [20]:
# Have the network predict the classes of the test inputs.
predictions = model.predict(pca.transform(inputs_test))
predictions_labels = encoder.inverse_transform(predictions)
predictions_labels

array(['versicolor', 'virginica', 'setosa', 'setosa', 'setosa',
       'virginica', 'setosa', 'virginica', 'versicolor', 'versicolor',
       'virginica', 'virginica', 'virginica', 'setosa', 'setosa',
       'versicolor', 'setosa', 'setosa', 'virginica', 'virginica',
       'setosa', 'virginica', 'versicolor', 'setosa', 'setosa',
       'virginica', 'virginica', 'versicolor', 'virginica', 'virginica'],
      dtype='<U10')

In [21]:
(predictions_labels == encoder.inverse_transform(outputs_test)).sum()

26

## End