In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [9]:
#Read the dataset file
data = pd.read_csv('/content/Iris.csv', index_col = 0)

In [12]:
#Display the first five rows of the dataframe
print(data.head())

    SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
Id                                                                       
1             5.1           3.5            1.4           0.2  Iris-setosa
2             4.9           3.0            1.4           0.2  Iris-setosa
3             4.7           3.2            1.3           0.2  Iris-setosa
4             4.6           3.1            1.5           0.2  Iris-setosa
5             5.0           3.6            1.4           0.2  Iris-setosa


In [13]:
#Displaying the information of the dataframe
print(data.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 150 entries, 1 to 150
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   SepalLengthCm  150 non-null    float64
 1   SepalWidthCm   150 non-null    float64
 2   PetalLengthCm  150 non-null    float64
 3   PetalWidthCm   150 non-null    float64
 4   Species        150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 7.0+ KB
None


In [11]:
print(data.shape)

(150, 5)


In [14]:
# check there is no null values and check for missing values
print(data.isna().sum())

SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64


In [15]:



#Check for duplicates
print(data.duplicated(keep = False).value_counts())
print(data[data.duplicated(keep = False)])

False    145
True       5
dtype: int64
     SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm         Species
Id                                                                           
10             4.9           3.1            1.5           0.1     Iris-setosa
35             4.9           3.1            1.5           0.1     Iris-setosa
38             4.9           3.1            1.5           0.1     Iris-setosa
102            5.8           2.7            5.1           1.9  Iris-virginica
143            5.8           2.7            5.1           1.9  Iris-virginica


In [16]:
#Drop duplicates
data = data.drop_duplicates(keep = 'first').copy()
print(data.shape)

(147, 5)


In [18]:
#Reset the index of the dataframe

data = data.reset_index(drop = True)

In [19]:
#Split the dataframe into X(features) and y(target or labels)
X = data.iloc[:, 0:4].values
y = data['Species'].values

In [20]:
print(X.shape)
print(y.shape)

(147, 4)
(147,)


In [21]:
#Normalization on X
scaler = MinMaxScaler()
scaler.fit(X)
X_scaled = scaler.fit_transform(X)


In [22]:
#Convert target using LabelEncoder
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

In [23]:
#Categorically encoding the labels

num_categories = 3
y_onehot = keras.utils.to_categorical(y_encoded, num_categories)

In [24]:
#convert X_scaled and y_onehot into train and valid data

X_train, X_valid, y_train, y_valid = train_test_split(X_scaled, y_onehot,
                                                     test_size = 0.2,
                                                     random_state = 42)

In [25]:
#Define a model

model = Sequential()
model.add(Dense(units = 512, activation = 'relu', input_shape = (4,)))
model.add(Dense(units = 512, activation = 'relu'))
model.add(Dense(units = 512, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(units = num_categories, activation = 'softmax'))

In [26]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               2560      
                                                                 
 dense_1 (Dense)             (None, 512)               262656    
                                                                 
 dense_2 (Dense)             (None, 512)               262656    
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_3 (Dense)             (None, 3)                 1539      
                                                                 
Total params: 529411 (2.02 MB)
Trainable params: 529411 (2.02 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [27]:
#Compiling the model

model.compile(loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [28]:
history=model.fit(X_train, y_train,
          epochs = 100,
          verbose = 1,
          validation_data = (X_valid, y_valid))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [29]:
#Print actual and predicted value

y_pred = model.predict(X_valid)

actual = np.argmax(y_valid, axis = 1)
predicted = np.argmax(y_pred, axis = 1)

print(f"Actual:    {actual}")
print(f"Predicted: {predicted}")



Actual:    [2 1 2 0 2 0 1 0 1 0 0 1 2 1 2 1 0 1 2 0 0 2 0 2 1 0 1 2 1 0]
Predicted: [2 1 2 0 2 0 1 0 2 0 0 1 2 1 2 2 0 1 2 0 0 2 0 2 1 0 1 2 1 0]
