In [1]:
import tensorflow as tf
print("Tensorflow: v" + tf.__version__)

Tensorflow: v2.4.1


## Classification

- Used to seperate data points into classes of different labels

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals
from IPython.display import clear_output

import tensorflow as tf
import pandas as pd

### Loading the dataset

In [3]:
train_path = tf.keras.utils.get_file("iris_training.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv")
test_path = tf.keras.utils.get_file("iris_test.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv")

In [4]:
# Using Iris Flower dataset
CSV_COL_NAME = ["SepalLength", "SepdalWidth", "PetalLength", "PetalWidth", "Species"]
SPECIES = ["Setosa", "Versicolor", "Virginia"]

In [5]:
train = pd.read_csv(train_path, names=CSV_COL_NAME, header=0)
test = pd.read_csv(test_path, names=CSV_COL_NAME, header=0)

train_y = train.pop("Species")
test_y = test.pop("Species")

train.head()

Unnamed: 0,SepalLength,SepdalWidth,PetalLength,PetalWidth
0,6.4,2.8,5.6,2.2
1,5.0,2.3,3.3,1.0
2,4.9,2.5,4.5,1.7
3,4.9,3.1,1.5,0.1
4,5.7,3.8,1.7,0.3


In [6]:
train.shape

(120, 4)

### Input Function and Feature Column

In [7]:
# input function
def input_fn(feat, lab, train=True, bs=256): #features, labels, training, batch_size
    ds = tf.data.Dataset.from_tensor_slices((dict(feat), lab))
    if train:
        ds = ds.shuffle(1000).repeat()
    return ds.batch(bs)

# feature column
fc = []
for k in train.keys():
    fc.append(tf.feature_column.numeric_column(key=k))
print(fc)

[NumericColumn(key='SepalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='SepdalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='PetalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='PetalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]


### Building the Model
- Using pre-built classifier that are built-in tensorflow
- ***DNNClassifier*** (Deep Neural Network)
- ***LinearClassifier***

In [10]:
classifier = tf.estimator.DNNClassifier(feature_columns=fc, hidden_units=[30,10], n_classes=3) 
classifier.train(input_fn=lambda: input_fn(train, train_y, train=True), steps=5000)

results = classifier.evaluate(input_fn=lambda: input_fn(test, test_y, train=False))
clear_output()
print(results)

{'accuracy': 0.93333334, 'average_loss': 0.30418208, 'loss': 0.30418208, 'global_step': 5000}


### Using the model to make predictions

In [11]:
results = list(classifier.predict(input_fn=lambda: input_fn(test, test_y, train=False)))
clear_output()

for n in results:
    pred = n["class_ids"][0]
    prob = n["probabilities"]
    print(f"{SPECIES[pred]:12} p={prob[pred]:.4f}")

Versicolor   p=0.6548
Virginia     p=0.6719
Setosa       p=0.8777
Versicolor   p=0.5835
Versicolor   p=0.6133
Versicolor   p=0.7193
Setosa       p=0.9688
Virginia     p=0.5862
Versicolor   p=0.6363
Virginia     p=0.8338
Virginia     p=0.8066
Setosa       p=0.9289
Virginia     p=0.8290
Versicolor   p=0.5924
Versicolor   p=0.7350
Setosa       p=0.9459
Versicolor   p=0.7030
Setosa       p=0.8820
Setosa       p=0.9375
Virginia     p=0.8469
Setosa       p=0.9288
Virginia     p=0.5198
Virginia     p=0.8330
Virginia     p=0.5294
Versicolor   p=0.7231
Versicolor   p=0.5897
Setosa       p=0.9642
Versicolor   p=0.6986
Virginia     p=0.8715
Versicolor   p=0.7494
