In [1]:
# import required libraries
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import tensorflow
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import keras_tuner as kt

In [2]:
# load and read dataset
dataset = pd.read_excel('final_dataset.xlsx')
dataset.head()

Unnamed: 0,Red_Mean,Red_Median,Red_Variance,Red_Std,Green_Mean,Green_Median,Green_Variance,Green_Std,Blue_Mean,Blue_Median,Blue_Variance,Blue_Std,label
0,121.849594,145.0,3304.497718,57.484761,125.967102,135.0,1763.033006,41.988487,111.251572,134.0,3330.577657,57.711157,0
1,103.830032,102.0,2152.24139,46.392256,114.606018,121.0,1838.561514,42.87845,95.103455,88.0,3073.705972,55.441013,1
2,116.047089,134.0,2488.556041,49.885429,119.54425,125.0,1193.293666,34.544083,100.909332,119.0,2621.018971,51.195888,0
3,136.331741,139.0,1649.347666,40.612162,147.529755,151.0,1412.353454,37.581291,126.857681,129.0,2512.158838,50.121441,1
4,129.85025,134.0,1940.935613,44.056051,131.718399,139.0,1782.426972,42.218799,101.312119,94.0,2798.758005,52.903289,1


In [3]:
# check for discriptive statistics
dataset.describe()

Unnamed: 0,Red_Mean,Red_Median,Red_Variance,Red_Std,Green_Mean,Green_Median,Green_Variance,Green_Std,Blue_Mean,Blue_Median,Blue_Variance,Blue_Std,label
count,5702.0,5702.0,5702.0,5702.0,5702.0,5702.0,5702.0,5702.0,5702.0,5702.0,5702.0,5702.0,5702.0
mean,126.866256,134.084707,2354.106217,47.866691,131.394668,139.512539,1892.401002,42.468821,111.77351,116.321905,2898.407095,53.288393,1.020168
std,21.349339,25.988085,776.792043,7.930773,22.146187,24.043098,807.389979,9.424215,19.837836,25.417473,816.039576,7.665804,0.81219
min,72.897812,56.0,518.18539,22.763686,78.858002,82.0,259.989089,16.124177,58.766464,37.0,517.141674,22.740749,0.0
25%,114.593369,119.0,1798.977901,42.41436,118.01701,125.0,1284.345478,35.837766,100.909451,102.0,2304.471245,48.004909,0.0
50%,125.856995,134.0,2282.452842,47.775023,128.989677,137.0,1792.696035,42.340241,110.126236,114.0,2882.111371,53.685299,1.0
75%,136.507111,146.0,2776.939211,52.696672,141.508694,151.0,2423.053734,49.224523,119.115005,126.0,3438.022143,58.63465,2.0
max,246.597122,255.0,5933.46028,77.028957,251.254089,255.0,4775.445991,69.104602,235.422943,255.0,9020.131135,94.974371,2.0


In [4]:
# check for null values if any
dataset.isnull().sum()

Red_Mean          0
Red_Median        0
Red_Variance      0
Red_Std           0
Green_Mean        0
Green_Median      0
Green_Variance    0
Green_Std         0
Blue_Mean         0
Blue_Median       0
Blue_Variance     0
Blue_Std          0
label             0
dtype: int64

In [5]:
# find data types of each column
dataset.dtypes

Red_Mean          float64
Red_Median        float64
Red_Variance      float64
Red_Std           float64
Green_Mean        float64
Green_Median      float64
Green_Variance    float64
Green_Std         float64
Blue_Mean         float64
Blue_Median       float64
Blue_Variance     float64
Blue_Std          float64
label               int64
dtype: object

# ANN Implementation

In [6]:
# separate dataset into independent and dependent sets
x = dataset.iloc[:, : -1]
y = dataset.iloc[:, -1]
y.head()

0    0
1    1
2    0
3    1
4    1
Name: label, dtype: int64

In [7]:
# scale values in independent set
scaler = StandardScaler()
x = scaler.fit_transform(x)

In [8]:
# now classify dataset into training and testing
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1, random_state = 1)

In [9]:
# prepare and build model
model = Sequential()

# input layer
model.add(Dense(units = 12, input_dim = 12, input_shape = (12, ), kernel_initializer = 'uniform'))

# first hidden layer
model.add(Dense(units = 128, activation = 'relu', kernel_initializer = 'uniform'))

# second hidden layer
model.add(Dense(units = 128, activation = 'tanh', kernel_initializer = 'uniform'))

# third hidden layer
model.add(Dense(units = 128, activation = 'relu', kernel_initializer = 'uniform'))

# output layer
model.add(Dense(units = 3, activation = 'softmax', kernel_initializer = 'uniform'))

# summarize model's performance
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 12)                156       
                                                                 
 dense_1 (Dense)             (None, 128)               1664      
                                                                 
 dense_2 (Dense)             (None, 128)               16512     
                                                                 
 dense_3 (Dense)             (None, 128)               16512     
                                                                 
 dense_4 (Dense)             (None, 3)                 387       
                                                                 
Total params: 35231 (137.62 KB)
Trainable params: 35231 (137.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [10]:
# compile model
model.compile(optimizer = 'adam', loss = 'SparseCategoricalCrossentropy', metrics = ['accuracy'])

In [11]:
# fit model on the data
model.fit(x_train, y_train, epochs = 5, batch_size = 20, validation_data = (x_test, y_test))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x20e9375c810>

In [12]:
# evaluate model for training dataset
model.evaluate(x_train, y_train)



[0.0964641273021698, 0.9647242426872253]

In [13]:
# evaluate test set
model.evaluate(x_test, y_test)



[0.09451629966497421, 0.9667250514030457]

# Hyperparameter Tuning

### Tuning Optimizer

In [35]:
# define a function to loop among different optimizers
def mydef(hp):

    model1 = Sequential()

    # input layer
    model1.add(Dense(units = 12, input_dim = 12, input_shape = (12, ), kernel_initializer = 'uniform'))

    # first hidden layer
    model1.add(Dense(units = 128, activation = 'relu', kernel_initializer = 'uniform'))

    # second hidden layer
    model1.add(Dense(units = 128, activation = 'tanh', kernel_initializer = 'uniform'))

    # third hidden layer
    model1.add(Dense(units = 128, activation = 'relu', kernel_initializer = 'uniform'))

    # declare all the optimizers
    optimizers = hp.Choice('optimizer', values = ['adadelta', 'SDG', 'adagrad'])

    # compile
    model1.compile(optimizer = optimizers, loss = 'SparseCategoricalCrossentropy', metrics = ['accuracy'])

    return model1

In [36]:
# prepare tuner
tuner = kt.RandomSearch(mydef, objective = 'val_accuracy', max_trials = 5)

In [37]:
tuner.search(x_train, y_train, epochs = 5, validation_data = (x_test, y_test))

Trial 3 Complete [00h 00m 03s]
val_accuracy: 0.9474605917930603

Best val_accuracy So Far: 0.9474605917930603
Total elapsed time: 00h 00m 07s


In [38]:
# get the best optimizer
tuner.get_best_hyperparameters()[0].values

{'optimizer': 'adagrad'}

In [39]:
# get the best model
model2 = tuner.get_best_models(num_models = 1)[0]
model2.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 12)                156       
                                                                 
 dense_1 (Dense)             (None, 128)               1664      
                                                                 
 dense_2 (Dense)             (None, 128)               16512     
                                                                 
 dense_3 (Dense)             (None, 128)               16512     
                                                                 
Total params: 34844 (136.11 KB)
Trainable params: 34844 (136.11 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### Tuning for Number of Neurons

In [18]:
# define function
def num_neurons(hp):

    # declare the model
    model = Sequential()

    # declare input layer
    model.add(Dense(units = 12, input_dim = 12, input_shape = (12, ), kernel_initializer = 'uniform'))

    # now ininitiate how many neurons should be there usibg loop
    units = hp.Int('units', min_value = 12, max_value = 128, step = 8)

    # first hidden layer
    model.add(Dense(units = units, activation = 'relu', kernel_initializer = 'uniform'))

    # second hidden layer
    model.add(Dense(units = units, activation = 'tanh', kernel_initializer = 'uniform'))

    # third hidden layer
    model.add(Dense(units = units, activation = 'relu', kernel_initializer = 'uniform'))

    # output layer
    model.add(Dense(units = 3, activation = 'softmax', kernel_initializer = 'uniform'))

    # compile model
    model.compile(optimizer = 'adam', loss = 'SparseCategoricalCrossentropy', metrics = ['accuracy'])

    return model

In [19]:
# make a tuner
tuner1 = kt.RandomSearch(num_neurons, objective = 'val_accuracy', max_trials = 20, directory = 'Num_Neurons')

In [20]:
# now find feed data to select best no of neurons
tuner1.search(x_train, y_train, epochs = 5, validation_data = (x_test, y_test))

Trial 13 Complete [00h 00m 07s]
val_accuracy: 0.9299474358558655

Best val_accuracy So Far: 0.9597197771072388
Total elapsed time: 00h 01m 35s


In [21]:
# find out best no of neurons
tuner1.get_best_hyperparameters()[0].values

{'units': 52}

### Tuning for Number of Layers

In [26]:
# define function
def num_layers(hp):

    # define model
    model3 = Sequential()

    # input layer
    model3.add(Dense(units = 12, input_dim = 12, input_shape = (12, ), kernel_initializer = 'uniform'))

    # loop for best number of layers
    for i in range(hp.Int('num_layers', min_value = 1, max_value = 10)):
        model3.add(Dense(units = 52, activation = 'relu', kernel_initializer = 'uniform'))

    # output layer
    model3.add(Dense(units = 3, activation = 'softmax', kernel_initializer = 'uniform'))

    # compile model
    model3.compile(optimizer = 'adam', loss = 'SparseCategoricalCrossentropy', metrics = ['accuracy'])

    return model3

In [27]:
# build a tuner
tuner3 = kt.RandomSearch(num_layers, objective = 'val_accuracy', max_trials = 10, directory = 'Num_Layers')

In [28]:
# now feed data into best selected model
tuner3.search(x_train, y_train, epochs = 5, validation_data = (x_test, y_test))

Trial 10 Complete [00h 00m 07s]
val_accuracy: 0.9509631991386414

Best val_accuracy So Far: 0.9509631991386414
Total elapsed time: 00h 01m 21s


In [29]:
# find best no of layers
tuner3.get_best_hyperparameters()[0].values

{'num_layers': 2}