<a href="https://colab.research.google.com/github/harshaljanjani/EverythingML/blob/main/ML%20In%20Python/Day_6_IrisFlowerClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Classification Algorithms - Day 6**

### 1) Core Learning Algorithms - Classification
### 2) Core Learning Algorithms - Building The Classification Model

In [1]:
%tensorflow_version 2.x  
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
import pandas as pd

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [2]:
#Dataset Introduction: Iris Flowers Dataset
#This specific dataset seperates flowers into 3 different classes of species: Sertosa, Versicolor, Virginica
#Information about each flower: 1) Sepal Length, 2) Sepal Width, 3) Petal Length, 4) Petal Width

#Constants: CSV_COLUMN_NAMES(Properties Of Each Flower), SPECIES(Types Of Flower Species)
CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Setosa', 'Versicolor', 'Virginica']

In [3]:
#Keras - Sub-module of Tensorflow 
train_path = tf.keras.utils.get_file(
    "iris_training.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv")
test_path = tf.keras.utils.get_file(
    "iris_test.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv")

train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0) #header = 0 -> Row 0 is the header
test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
# Here we use keras (a module inside of TensorFlow) to grab our datasets and read them into a pandas dataframe

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv
Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/iris_test.csv


In [4]:
train.head() #Species -> Already encoded data

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Species
0,6.4,2.8,5.6,2.2,2
1,5.0,2.3,3.3,1.0,1
2,4.9,2.5,4.5,1.7,2
3,4.9,3.1,1.5,0.1,0
4,5.7,3.8,1.7,0.3,0


In [5]:
y_train = train.pop('Species')
y_test = test.pop('Species')

In [6]:
print(test.shape)
print(train.shape)

(30, 4)
(120, 4)


In [7]:
test.head()

Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth
0,5.9,3.0,4.2,1.5
1,6.9,3.1,5.4,2.1
2,5.1,3.3,1.7,0.5
3,6.0,3.4,4.5,1.6
4,5.5,2.5,4.0,1.3


In [8]:
#Input Function
def input_fn(features,labels,training=True,batch_size=256):
  #Convert the inputs into a dataset
  dataset = tf.data.Dataset.from_tensor_slices((dict(features),labels))
  #Shuffle and repeat if you are in training mode
  if training:
    dataset = dataset.shuffle(1000).repeat()
  return dataset.batch(batch_size)

In [9]:
#Feature Columns: Since all data is numeric / no categorical data, only one 'for' loop is required, loop through all keys in train.keys()
feature_columns = []
for feature_name in train.keys(): #train.keys() -> all columns
  feature_columns.append(tf.feature_column.numeric_column(key=feature_name))
print(feature_columns)

[NumericColumn(key='SepalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='SepalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='PetalLength', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='PetalWidth', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]


## **Building The Model**

In [24]:
#For classification models, two prebuilt choices in Tensorflow:
# 1) Deep Neural Network Classifier (DNNClassifier) ✅ (Recommended by Tensorflow Docs.)
# 2) LinearClassifier (Working is similar to Linear Regression Estimator)

tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) #handle-warnings

#Task: Build a DNN with 2 hidden layers with 30 and 10 hidden nodes each
classifier = tf.estimator.DNNClassifier(
    feature_columns = feature_columns,
    # Two hidden layers of 30 and 10 nodes respectively.
    hidden_units = [30, 10],
    # The model must choose between 3 classes (3 classes of flowers).
    n_classes = 3
)

## **Training The Model**

In [25]:
classifier.train(
    input_fn=lambda: input_fn(train, y_train, training=True),
    steps=5000) # steps -> similar to epoch
# We include a lambda to avoid creating an inner function previously

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x7f86c698fa90>

## **Evaluation : Seeing how well the trained model does with the testing data**

In [26]:
eval_result = classifier.evaluate(input_fn = lambda: input_fn(test, y_test, training=False))
print('Test set accuracy: {accuracy:0.3f}'.format(**eval_result))
#26/01/2023 -> Test set accuracy of 96.7%

Test set accuracy: 0.967


## **Making predictions for specific flowers**

In [33]:
def input_fn(features, batch_size=256): #No y_train/y_eval (during predictions, we don't have the label, we want to "know" what the label could be)
  #Convert the inputs into a Dataset without labels
  return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size)

features = ['SepalLength','SepalWidth','PetalLength','PetalWidth'] 
predict = {} #dict: key(feature)->val(float(value given as input for the feature))

print('Please type numeric values as prompted.')
for feature in features:
  valid = True
  while valid:
    val = input(feature+": ")
    if not val.isdigit():
      valid = False
  predict[feature] = [float(val)] #Even if we have one value, we need to put it in a list because Tensorflow expects more than one value for making predictions in general, each of which represents a new row/new flower we're making a prediction for

predictions = classifier.predict(input_fn = lambda: input_fn(predict)) #dict of dicts
#For each prediction in the predictions dictionary
for prediction_dict in predictions:
  class_id = prediction_dict['class_ids'][0]
  probability = prediction_dict['probabilities'][class_id]

  print('Prediction of the flower: Species: "{}" (Probability: {:.1f}%)'.format(SPECIES[class_id], 100 * probability))

Please type numeric values as prompted.
SepalLength: 23
SepalLength: 12
SepalLength: 12
SepalLength: 3
SepalLength: 4
SepalLength: 2
SepalLength: 0.5
SepalWidth: 2
SepalWidth: 0.4
PetalLength: 0.5
PetalWidth: 0.3
Prediction of the flower: Species: "Versicolor" (Probability: 38.7%)


In [34]:
#Examples for predictions, with labels:
expected = ['Setosa', 'Versicolor', 'Virginica']
predict_x = {
    'SepalLength': [5.1, 5.9, 6.9],
    'SepalWidth': [3.3, 3.0, 3.1],
    'PetalLength': [1.7, 4.2, 5.4],
    'PetalWidth': [0.5, 1.5, 2.1],
}