# Lab - Iris Species
Reference: Iris dataset at https://archive.ics.uci.edu/ml/datasets/iris 

Exercise: 
1. Predict the Iris species on test data.




In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import shutil

## Read dataset

In [3]:
df = pd.read_csv("./data/iris/iris_dataset.csv")
df.dtypes

sepal_l       float64
sepal_w       float64
petal_l       float64
petal_w       float64
iris_class     object
dtype: object

In [4]:
df['iris_class'].value_counts()

Iris-versicolor    50
Iris-setosa        50
Iris-virginica     50
Name: iris_class, dtype: int64

## Categorize string columns

In [7]:
iris_mapping = {
    'Iris-setosa': 0,
    'Iris-virginica': 1,
    'Iris-versicolor': 2,
}
df['iris_class_num'] = df['iris_class'].map(iris_mapping)
df.head()

Unnamed: 0,sepal_l,sepal_w,petal_l,petal_w,iris_class,iris_class_num
0,5.1,3.5,1.4,0.2,Iris-setosa,0
1,4.9,3.0,1.4,0.2,Iris-setosa,0
2,4.7,3.2,1.3,0.2,Iris-setosa,0
3,4.6,3.1,1.5,0.2,Iris-setosa,0
4,5.0,3.6,1.4,0.2,Iris-setosa,0


## Features and Label

In [8]:
FEATURES_NUM = ['sepal_l','sepal_w','petal_l','petal_w']
LABEL = 'iris_class_num'

## Split into dataset of: Train, Validation, Test (70%-20%-10%)

In [9]:
df_train0, df_valid0, df_test0 = np.split(df.sample(frac=1), [int(.7*len(df)), int(.9*len(df))])

# Obtain only featured columns
df_train = df_train0[['iris_class_num','sepal_l','sepal_w','petal_l','petal_w']]
df_valid = df_valid0[['iris_class_num','sepal_l','sepal_w','petal_l','petal_w']]
df_test = df_test0[['iris_class_num','sepal_l','sepal_w','petal_l','petal_w']]

print('Train set: {}'.format(df_train0.shape))
print('Validate set: {}'.format(df_valid0.shape))
print('Test set: {}'.format(df_test0.shape))

Train set: (105, 6)
Validate set: (30, 6)
Test set: (15, 6)


## Determine metrics for validation - Accuracy

In [10]:
def print_accuracy(model, df):
  metrics = model.evaluate(input_fn = tf.estimator.inputs.pandas_input_fn(
    x = df,
    y = df[LABEL],
    batch_size = 128,
    shuffle = False
      
  ))
  print('Accuracy on dataset = {}'.format(metrics['accuracy']))
    
#print_accuracy(model, df_valid)

## Model: DNNClassifier

In [19]:
# DNNClassifier
OUTDIR = 'model/lab3_class_iris'
tf.logging.set_verbosity(tf.logging.INFO)
shutil.rmtree(OUTDIR, ignore_errors = True) # start fresh each time

model = tf.estimator.DNNClassifier(
    hidden_units = [1024, 512],
    feature_columns = [tf.feature_column.numeric_column('sepal_l'),
                       tf.feature_column.numeric_column('sepal_w'),
                       tf.feature_column.numeric_column('petal_l'),
                       tf.feature_column.numeric_column('petal_w')
                      ], 
    activation_fn = tf.nn.relu,
    n_classes=3,
    optimizer=tf.train.AdamOptimizer(
      learning_rate=0.001
    ),
    model_dir = OUTDIR,
    config = tf.estimator.RunConfig()
)

model.train(input_fn = tf.estimator.inputs.pandas_input_fn(
    x = df_train,
    y = df_train[LABEL],
    batch_size = 128,
    num_epochs = 8,
    shuffle = True
  ));

print_accuracy(model, df_valid)

INFO:tensorflow:Using config: {'_model_dir': 'model/lab3_class_iris', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12b9e7630>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running

## Prediction

In [20]:
predictions = model.predict(input_fn = tf.estimator.inputs.pandas_input_fn(
#    x = pd.DataFrame({
#                      'Hour':[9,10,12],
#                      'Minute':[0,0,0],
#                      'IsoWeekday':[4,4,5]}),
    x = df_test,
    y = None,
    batch_size = 128,
    shuffle = False
  ))

for items in predictions:
  print(items)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from model/lab3_class_iris/model.ckpt-7
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
{'logits': array([ 1.0983298 , -0.89918566, -0.39644113], dtype=float32), 'probabilities': array([0.73530924, 0.09976083, 0.16492985], dtype=float32), 'class_ids': array([0]), 'classes': array([b'0'], dtype=object)}
{'logits': array([-0.56499916,  0.10324459,  0.2415635 ], dtype=float32), 'probabilities': array([0.19264102, 0.37580568, 0.43155333], dtype=float32), 'class_ids': array([2]), 'classes': array([b'2'], dtype=object)}
{'logits': array([ 1.1060256 , -0.8941953 , -0.41908967], dtype=float32), 'probabilities': array([0.7391522 , 0.10001129, 0.16083655], dtype=float32), 'class_ids': array([0]), 'classes': array([b'0'], dtype=object)}
{'logits': array([-0.45064566,  0.07756329,  0.19751395], dtype=float32), 'probabi

In [21]:
df_test.head(16)

Unnamed: 0,iris_class_num,sepal_l,sepal_w,petal_l,petal_w
25,0,5.0,3.0,1.6,0.2
71,2,6.1,2.8,4.0,1.3
30,0,4.8,3.1,1.6,0.2
93,2,5.0,2.3,3.3,1.0
45,0,4.8,3.0,1.4,0.3
75,2,6.6,3.0,4.4,1.4
132,1,6.4,2.8,5.6,2.2
110,1,6.5,3.2,5.1,2.0
62,2,6.0,2.2,4.0,1.0
148,1,6.2,3.4,5.4,2.3
