In [291]:
import tensorflow as tf
import numpy as np
import pandas as pd
from pandas import get_dummies
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow_datasets.public_api as tfds

<h1>Load data</h1>

In [292]:
file_name = "iris.data.csv"

In [293]:
#load the dataset into memory
dataset = pd.read_csv(file_name,header=None,\
                      names=['sepal_length','sepal_width','petal_length','petal_width','species'])
#check several examples of data
dataset

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


<h1>Data preprocessing</h1>

In [294]:
#simple function to process the data.
"""
input:
    data: iris dataset
    num_features: 2 => select only petal width and petal width.
                  4 => select all features
    flower: 'Iris-setosa' => labels of Iris-setosa will be 1 and others will be 0
            'Iris-virginica' => labels of Iris-virginica will be 1 and others will be 0
            'None' => use one-hot encoding to represent the labels

return:
    x: normalized & shuffled data
    y: labels
"""
def data_preprocessing(data,num_features,flower=None):
    if num_features==2:
        features = data.columns[2:4]
    else:
        features = data.columns[0:4]
    labels = dataset.columns[4]
    print(features)
    print(labels)
    #normalize the data
    data_norm = pd.DataFrame(data)
    for feature in features:
        data[feature] = (data[feature]-data[feature].mean())/data[feature].std()

    #shuffle the data
    indices = data_norm.index.tolist()
    indices = np.array(indices)
    np.random.shuffle(indices)
    x = data_norm.reindex(indices)[features]
    y = data_norm.reindex(indices)[labels]
    if flower=='Iris-setosa':
        for i in range(len(y)):
            y[i]=1 if y[i]=='Iris-setosa' else 0
        y = y.values.reshape(len(y),1)
    elif flower=='Iris-virginica':
        for i in range(len(y)):
            y[i]=1 if y[i]=='Iris-virginica' else 0
        y = y.values.reshape(len(y),1)
    else:
        y = get_dummies(y)
    return x,y

<h1>Part a:

In [295]:
# assign categorical data
dataset.species = pd.Categorical(dataset.species)
c = dataset.species.astype('category')
dataset["species_n"] = dataset.species.cat.codes
d = dict(enumerate(c.cat.categories))
print (d)



#train test split
dataset.pop("species")
train=dataset.sample(frac=0.7,random_state=1000)
ytrain = train["species_n"].to_list()
test=dataset.drop(train.index)
ytest= test["species_n"].to_list()

{0: 'Iris-setosa', 1: 'Iris-versicolor', 2: 'Iris-virginica'}


<h2>Define hyper-parameters</h2>

<h2>Define Placeholder and Variables</h2>

Define input cunftion

In [296]:
def input_fn(features, labels, training=True, batch_size=256):
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    if training:
        dataset = dataset.shuffle(1000).repeat()
    return dataset.batch(batch_size)

#train_input_fn = make_input_fn(train, ytrain)  # here we will call the input_function that was returned to us to get a dataset object we can feed to the model
#eval_input_fn = make_input_fn(test, ytest, num_epochs=1, shuffle=False)

<h2>Define feature Columns</h2>

In [297]:
NUMERIC_COLUMNS = ['sepal_length','sepal_width','petal_length','petal_width']
feature_columns = []
for key in train.keys():
    feature_columns.append(tf.feature_column.numeric_column(key=key))
print(feature_columns)

[NumericColumn(key='sepal_length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='sepal_width', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='petal_length', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='petal_width', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='species_n', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]


<h2>Execute training</h2>

In [298]:
# 
classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns, hidden_units=[256,128], n_classes=3)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\alber\\AppData\\Local\\Temp\\tmpqsknjiyh', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [299]:
classifier.train(input_fn=lambda: input_fn(train, ytrain, training=True),
                 steps=200)  # train

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into C:\Users\alber\AppData\Local\Temp\tmpqsknjiyh\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0...
INFO:tensorflow:loss = 1.2246238, step = 0
INFO:tensorflow:global_step/sec: 185.874
INFO:tensorflow:loss = 0.72612107, step = 100 (0.539 sec)
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 200...
INFO:tensorflow:Saving checkpoints for 200 into C:\Users\alber\AppData\Local\Temp\tmpqsknjiyh\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 200...
INFO:tensorflow:Loss for final step: 0.606383.


<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x13866f77040>

In [300]:
eval_result = classifier.evaluate(input_fn=lambda: input_fn(test, ytest, training=False))
print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-11-03T00:07:34
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\alber\AppData\Local\Temp\tmpqsknjiyh\model.ckpt-200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.15400s
INFO:tensorflow:Finished evaluation at 2021-11-03-00:07:34
INFO:tensorflow:Saving dict for global step 200: accuracy = 0.9111111, average_loss = 0.55366766, global_step = 200, loss = 0.55366766
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 200: C:\Users\alber\AppData\Local\Temp\tmpqsknjiyh\model.ckpt-200

Test set accuracy: 0.911



In [301]:
result = list(classifier.predict(input_fn=lambda: input_fn(test, ytest, training=False)))
print(test.loc[0])
print(result[0]["probabilities"][0])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\alber\AppData\Local\Temp\tmpqsknjiyh\model.ckpt-200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
sepal_length    5.1
sepal_width     3.5
petal_length    1.4
petal_width     0.2
species_n       0.0
Name: 0, dtype: float64
0.69705606


<h2>Plot</h2>

In [302]:
plot_x = range(NUM_ITER)
plot_y = acc_list
plt.plot(plot_x, plot_y, color='k', linewidth=2)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.show()

NameError: name 'acc_list' is not defined

<h1>Part b:</h1>

In [None]:
x_2,y_2 = data_preprocessing(dataset,2,flower='Iris-virginica')
#generate traning and validation sets
x_train_2, x_test_2, y_train_2, y_test_2 = train_test_split(x_2,y_2,test_size=0.3)

Index(['petal_length', 'petal_width'], dtype='object')
species


<h2>Define hyper-parameter</h2>

<h2>Define Placeholder and Variables</h2>

In [None]:
tf.reset_default_graph()

<h2>Define update rule and accuracy</h2>

<h2>Execute training</h2>

<h2>Plot</h2>

<h1>Part c:</h1>

In [None]:
x_3,y_3 = data_preprocessing(dataset,4)
#generate traning and validation sets
x_train_3, x_test_3, y_train_3, y_test_3 = train_test_split(x_3,y_3,test_size=0.3)

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], dtype='object')
species


<h2>Define hyper-parameters</h2>

<h2>Define placehoder and variables</h2>

In [None]:
tf.reset_default_graph()

<h2>Define the neural network</h2>

<h2>Define cost function and accuracy</h2>

<h2>Execute training</h2>

<h2>Plot</h2>