# Classifying Iris Species with TensorFlow

In [1]:
import tensorflow as tf
from tensorflow.data import Dataset
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model
from algorithms.iris.Reader import IrisReader
from algorithms.tfAlgos.DataOps import IrisPrep
from algorithms.tfAlgos.KNN import KNN
from typing import Dict, Tuple, Callable, List
import pandas as pd
import numpy as np
from functools import reduce

iris_reader: IrisReader = IrisReader()
iris_reader.load()
raw_data: Dict[str, np.array] = iris_reader.data

raw_data

{'setosa': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],


### Create Train and Test Sets

In [2]:
setosa_x: np.array = raw_data["setosa"]
setosa_y: np.array = np.repeat("setosa", len(setosa_x))
versicolor_x: np.array = raw_data["versicolor"]
versicolor_y: np.array = np.repeat("versicolor", len(versicolor_x))

data: tf.data.Dataset = IrisPrep.combine_data(["setosa", "versicolor", "virginica"], raw_data)
train, test = IrisPrep.train_test_split(data, 50)

for elem in test.take(10):
    print(elem)

(<tf.Tensor: shape=(50, 4), dtype=float64, numpy=
array([[5.2, 2.7, 3.9, 1.4],
       [5.8, 2.8, 5.1, 2.4],
       [5.3, 3.7, 1.5, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [4.7, 3.2, 1.3, 0.2],
       [7.7, 3. , 6.1, 2.3],
       [6.5, 2.8, 4.6, 1.5],
       [6.6, 3. , 4.4, 1.4],
       [6.3, 2.7, 4.9, 1.8],
       [5.6, 2.5, 3.9, 1.1],
       [6.2, 2.8, 4.8, 1.8],
       [5.7, 2.6, 3.5, 1. ],
       [4.9, 3.6, 1.4, 0.1],
       [5. , 2. , 3.5, 1. ],
       [6.5, 3. , 5.2, 2. ],
       [7. , 3.2, 4.7, 1.4],
       [5.1, 3.5, 1.4, 0.2],
       [6.3, 2.5, 5. , 1.9],
       [6.1, 3. , 4.9, 1.8],
       [5. , 3.6, 1.4, 0.2],
       [5. , 3.2, 1.2, 0.2],
       [5.8, 2.7, 4.1, 1. ],
       [6.9, 3.1, 4.9, 1.5],
       [7.2, 3. , 5.8, 1.6],
       [6.2, 3.4, 5.4, 2.3],
       [6.4, 2.7, 5.3, 1.9],
       [5. , 3.4, 1.5, 0.2],
       [7.4, 2.8, 6.1, 1.9],
       [6.7, 3. , 5. , 1.7],
       [6.3, 3.4, 5.6, 2.4],
       [4.3, 3. , 1.1, 0.1],
       [4.8, 3.1, 1.6, 0.2],
       [4.5, 2.3, 1.3,

# Identify Neighborhoods

In [3]:
# KNN.calc_distances_to_point(
test0 = test.take(1).map(lambda x, y: x).as_numpy_iterator().next()[0]
test0

array([5.2, 2.7, 3.9, 1.4])

In [5]:
KNN.calc_distance_to_point(test0)

<function algorithms.tfAlgos.KNN.KNN.calc_distance_to_point.<locals>.calc_distance(pn: tensorflow.python.framework.ops.Tensor, labels: tensorflow.python.framework.ops.Tensor)>

In [28]:
def calc_distance(pn: tf.Tensor, label: tf.Tensor):
    return (tf.map_fn(lambda xval: tf.norm(test0 - xval, ord="euclidean"), pn), label)
train.map(lambda x, y: calc_distance(x, y)).as_numpy_iterator().next()

(array([0.38729833, 3.24037035, 0.58309519, 1.52643375, 1.00995049,
        3.8249183 , 0.3       , 1.40712473, 1.27671453, 3.04466747,
        2.2181073 , 3.05450487, 2.86006993, 1.24096736, 3.47275107,
        1.9       , 1.78044938, 2.59229628, 3.08868904, 3.56510869,
        0.3       , 2.03224014, 1.8       , 2.36854386, 2.9189039 ,
        2.53179778, 1.23693169, 3.7013511 , 2.97657521, 2.96984848,
        2.94278779, 2.93768616, 1.17898261, 1.91572441, 0.38729833,
        2.9240383 , 1.6881943 , 3.37490741, 2.80713377, 2.12837967,
        0.57445626, 2.79642629, 2.83196045, 2.98998328, 0.42426407,
        2.02484567, 0.65574385, 2.06639783, 0.87749644, 2.98998328,
        2.98998328, 3.15436206, 2.90344623, 0.43588989, 3.77624152,
        1.93649167, 0.7       , 2.9866369 , 1.2922848 , 0.83666003,
        1.36381817, 1.40356688, 2.28473193, 0.98994949, 3.10322413,
        1.26095202, 2.84604989, 2.91547595, 1.17473401, 1.22065556,
        0.37416574, 1.09544512, 3.01662063, 2.65

In [6]:
def value_tuple(x: tf.Tensor, y: tf.Tensor):
    return (x, y)
for elem in KNN.calc_distances_to_point(test0, train):#.map(value_tuple):
    tf.print(elem)

([2.7294688127912359 0.68556546004010444 2.7166155414412247 ... 1.4560219778561034 1.2727922061357855 1.2369316876852985], ["setosa" "versicolor" "setosa" ... "versicolor" "versicolor" "virginica"])


In [9]:
max5 = KNN.get_max_k(KNN.calc_distances_to_point(test0, train), 5)
for elem in max5:
    print(elem)

(<tf.Tensor: shape=(5,), dtype=float64, numpy=array([4.00874045, 3.99249296, 3.80263067, 3.69188299, 3.31963853])>, <tf.Tensor: shape=(5,), dtype=string, numpy=
array([b'virginica', b'virginica', b'virginica', b'virginica', b'setosa'],
      dtype=object)>)


In [10]:
max5.map(lambda x, y: (1/x, y)).reduce((tf.float64(0), ))
# KNN.get_max_k(dists, 5)

<MapDataset shapes: ((5,), (5,)), types: (tf.float64, tf.string)>

In [None]:
x1: tf.Tensor = tf.constant([1.,3.,2.,0.], dtype=tf.float64)
x2: tf.Tensor = tf.constant(["a","b","c","d"])
d = Dataset.from_tensors((x1, x2))
d.map(lambda x, y: x).as_numpy_iterator().next()

In [36]:
vals = np.array([4.00874045, 3.99249296, 3.80263067, 3.69188299, 3.31963853])
labs = np.array([b'virginica', b'virginica', b'virginica', b'virginica', b'setosa'])
cats = [b"setosa", b"versicolor", b"virginica"]
out = (np.zeros(len(cats)), np.array(cats))
for i, v in enumerate(vals):
    out[0][np.argwhere(out[1] == labs[i])] += 1/v 

print(out)
out[1][np.argmax(out[0])]

(array([0.30123762, 0.        , 1.03376532]), array([b'setosa', b'versicolor', b'virginica'], dtype='|S10'))


b'virginica'

In [44]:
cats2 = map(lambda x: x.encode('UTF-8'), ["setosa", "versicolor", "virginica"])
list(cats2)

AttributeError: 'bytes' object has no attribute 'encode'

In [30]:
b'virginica'

b'virginica'

In [None]:
t = tf.constant(list(dataset.as_numpy_iterator()))
tf.print(t)
tk = tf.math.top_k(t, 10, sorted=True)
vals, idxs = tf.math.top_k(t, 10, sorted=True)
tf.print(tk)
tf.gather(t, tk.indices)