In [1]:
 #  Copyright [2020] [name of copyright owner]

 #  Licensed under the Apache License, Version 2.0 (the "License");
 #  you may not use this file except in compliance with the License.
 #  You may obtain a copy of the License at

 #     http://www.apache.org/licenses/LICENSE-2.0

 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.

This tutorial demostrates how to process and classify structured data (e.g. tabular data).
To begin with, download iris dataset from https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data


In [2]:
import numpy as np
import pandas as pd

import tensorflow as tf

from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

In [3]:
dataset_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
path_to_downloaded_file = tf.keras.utils.get_file('iris.data', dataset_url)
dataframe = pd.read_csv(path_to_downloaded_file, header=None)
dataframe.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
#change label to numeric ones
dataframe[4] = np.where (dataframe[4] == 'Iris-setosa', 0, np.where(dataframe[4] == 'Iris-virginica', 2, 1))
dataframe[4]

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: 4, Length: 150, dtype: int64

In [5]:
#we shuffle the data
dataframe = shuffle(dataframe)
dataframe.reset_index(inplace=True, drop=True)
dataframe

Unnamed: 0,0,1,2,3,4
0,5.5,2.4,3.8,1.1,1
1,6.3,2.8,5.1,1.5,2
2,4.9,3.0,1.4,0.2,0
3,4.8,3.0,1.4,0.1,0
4,5.1,2.5,3.0,1.1,1
...,...,...,...,...,...
145,7.2,3.0,5.8,1.6,2
146,6.3,2.5,5.0,1.9,2
147,5.0,3.0,1.6,0.2,0
148,4.8,3.4,1.6,0.2,0


In [6]:
train, test = train_test_split(dataframe, test_size=0.1)
train, val = train_test_split(train, test_size=0.1)
print('number of train examples:', len(train))
print('number of validation examples:', len(val))
print('number of test examples:', len(test))

# A utility method to create a tf.data dataset from a Pandas Dataframe
def covert_df(dataframe, batch_size=32):
  labels = list(dataframe[4])
  labels = tf.one_hot(labels, 3)
  dataframe = dataframe.drop(columns=[4])
  ds = tf.data.Dataset.from_tensor_slices((dataframe, labels))
  ds = ds.batch(batch_size) 
  return ds

#define a small batch
batch_size = 5 
train = covert_df(train, batch_size=batch_size)
val = covert_df(val, batch_size=batch_size)
test = covert_df(test,  batch_size=batch_size)
print(train)


number of train examples: 121
number of validation examples: 14
number of test examples: 15
<BatchDataset shapes: ((None, 4), (None, 3)), types: (tf.float64, tf.float32)>


In [7]:
# Now we define a 4 layers simple sequencial model 
model = tf.keras.Sequential(
    [
        tf.keras.Input(shape=(4)),
        layers.Dense(2, name="firstLayer"),
        layers.Dense(3, name="secondLayer"),
        layers.Dense(3, name="fourthLayer"),
    ]
)



model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
firstLayer (Dense)           (None, 2)                 10        
_________________________________________________________________
secondLayer (Dense)          (None, 3)                 9         
_________________________________________________________________
fourthLayer (Dense)          (None, 3)                 12        
Total params: 31
Trainable params: 31
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.fit(train,
          validation_data=val,
          epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fe0d0100cc0>