In [1]:
 #  Copyright [2020] [name of copyright owner]

 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at

 #     http://www.apache.org/licenses/LICENSE-2.0

 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.

This tutorial demostrates how to process and classify structured data (e.g. tabular data).
To begin with, download iris dataset from https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data


In [20]:
import numpy as np
import pandas as pd

import tensorflow as tf

from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

In [23]:
dataset_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
path_to_downloaded_file = tf.keras.utils.get_file('iris.data', dataset_url)
dataframe = pd.read_csv(path_to_downloaded_file, header=None)
dataframe.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [24]:
#change label to numeric ones
dataframe[4] = np.where (dataframe[4] == 'Iris-setosa', 0, np.where(dataframe[4] == 'Iris-virginica', 2, 1))
dataframe[4]

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: 4, Length: 150, dtype: int64

In [47]:
#we shuffle the data
dataframe = shuffle(dataframe)
dataframe.reset_index(inplace=True, drop=True)
dataframe

Unnamed: 0,0,1,2,3,4
0,5.6,3.0,4.1,1.3,1
1,6.5,3.2,5.1,2.0,2
2,6.0,3.4,4.5,1.6,1
3,5.4,3.9,1.3,0.4,0
4,5.1,3.3,1.7,0.5,0
...,...,...,...,...,...
145,6.1,3.0,4.9,1.8,2
146,5.7,3.0,4.2,1.2,1
147,6.4,2.7,5.3,1.9,2
148,5.7,2.5,5.0,2.0,2


In [117]:
train, test = train_test_split(dataframe, test_size=0.1)
train, val = train_test_split(train, test_size=0.1)
print('number of train examples:', len(train))
print('number of validation examples:', len(val))
print('number of test examples:', len(test))

# A utility method to create a tf.data dataset from a Pandas Dataframe
def covert_df(dataframe, batch_size=32):
  labels = list(dataframe[4])
  labels = tf.one_hot(labels, 3)
  dataframe = dataframe.drop(columns=[4])
  ds = tf.data.Dataset.from_tensor_slices((dataframe, labels))
  ds = ds.batch(batch_size) 
  return ds

#define a small batch
batch_size = 5 
train = covert_df(train, batch_size=batch_size)
val = covert_df(val, batch_size=batch_size)
test = covert_df(test,  batch_size=batch_size)


number of train examples: 121
number of validation examples: 14
number of test examples: 15


In [120]:
# Now we define a 4 layers simple sequencial model 
model = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(4)),
        layers.Dense(2, activation="relu", name="firstLayer"),
        layers.Dense(3, activation="relu", name="secondLayer"),
        layers.Dense(3, name="fourthLayer"),
    ]
)



model.summary()

Model: "sequential_18"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
firstLayer (Dense)           (None, 2)                 10        
_________________________________________________________________
secondLayer (Dense)          (None, 3)                 9         
_________________________________________________________________
fourthLayer (Dense)          (None, 3)                 12        
Total params: 31
Trainable params: 31
Non-trainable params: 0
_________________________________________________________________


In [122]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(train,batch_size = batch_size,
          validation_data=val,
          epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x7fc0c04c0d30>