In [24]:
import numpy as np
import pandas as pd
import math
import preprocessing as pre
from forward_feed_nn import NeuralNetwork


# Hyperparameters fine tuning

In [25]:
df = np.load('../data/fashion_train.npy')

df = pre.preprocess(df)

K_classes = np.unique(df[:, -1])

X_train = df[:,:-1]
y_train = df[:,-1]

#load test data
df = np.load('../data/fashion_test.npy')

df = pre.preprocess(df)

X_test = df[:,:-1]
y_test = df[:,-1]



In [27]:
X_train_input_layer = int(X_train.shape[1])

X_test_inputlayer = int(X_test.shape[1])

In [28]:
layer_options = [[X_train_input_layer, 128, 5], [X_train_input_layer, 256, 128, 5], [X_train_input_layer, 512, 256, 128, 5]]
dropout_rates = [0, 0.2, 0.3, 0.5]
learning_rates = [0.001, 0.01, 0.1]

best_HP = {

}

for layers in layer_options:
    for dropout in dropout_rates:
        for learning in learning_rates:
            nn = NeuralNetwork(layers, learning_rate=learning, dropout_rate=dropout)
            nn.train(X_train, y_train, epochs=100)
            accuracy = nn.accuracy(nn.predict(X_test), y_test)
            print(f"Layers: {layers}, Dropout: {dropout}, Accuracy: {accuracy}, Learning Rate: {learning}")
            best_HP[accuracy] = (layers, dropout, accuracy, learning)

Epoch 0, Loss: 1.6209
Epoch 10, Loss: 0.4483
Epoch 20, Loss: 0.8073
Epoch 30, Loss: 0.4082
Epoch 40, Loss: 0.8283
Epoch 50, Loss: 0.2101
Epoch 60, Loss: 0.5551
Epoch 70, Loss: 0.6158
Epoch 80, Loss: 0.4472
Epoch 90, Loss: 0.4892
Layers: [65, 128, 5], Dropout: 0, Accuracy: 0.8238, Learning Rate: 0.001
Epoch 0, Loss: 0.2254
Epoch 10, Loss: 0.1880
Epoch 20, Loss: 0.5282
Epoch 30, Loss: 0.2297
Epoch 40, Loss: 0.0736
Epoch 50, Loss: 0.2859
Epoch 60, Loss: 0.3931
Epoch 70, Loss: 0.0991
Epoch 80, Loss: 0.0855
Epoch 90, Loss: 0.6889
Layers: [65, 128, 5], Dropout: 0, Accuracy: 0.8458, Learning Rate: 0.01
Epoch 0, Loss: 0.2239
Epoch 10, Loss: 0.2664
Epoch 20, Loss: 0.0562
Epoch 30, Loss: 0.0411
Epoch 40, Loss: 0.0397
Epoch 50, Loss: 0.1667
Epoch 60, Loss: 0.0145
Epoch 70, Loss: 0.0096
Epoch 80, Loss: 0.0016
Epoch 90, Loss: 0.0117
Layers: [65, 128, 5], Dropout: 0, Accuracy: 0.848, Learning Rate: 0.1
Epoch 0, Loss: 1.1569
Epoch 10, Loss: 0.2534
Epoch 20, Loss: 0.4727
Epoch 30, Loss: 0.3936
Epoch 4

In [30]:
max_key = max(best_HP)
max_value = best_HP[max_key]


print('Best hyperparameters for nn:')
print(f"Best layer: {max_value[0]}")
print(f"Best dropout: {max_value[1]}")
print(f"Best accuracy: {max_value[2]}")
print(f"Best learning rate: {max_value[3]}")


Best hyperparameters for nn:
Best layer: [65, 512, 256, 128, 5]
Best dropout: 0.2
Best accuracy: 0.8648
Best learning rate: 0.1


# best parameter
- Best layer: [65, 512, 256, 128, 5]
- Best dropout: 0.2
- Best accuracy: 0.8684
- Best learning rate: 0.1

# Final train and test

In [31]:
df = np.load('../data/fashion_train.npy') 

df = pre.preprocess(df)

best_hp_layers = [65, 512, 256, 128, 5]
best_hp_dropout = 0.2
best_hp_learning = 0.1

X = df[:,:-1]
y = df[:,-1]

nn = NeuralNetwork(best_hp_layers, learning_rate=best_hp_learning, dropout_rate=best_hp_dropout)
nn.train(X, y, epochs=100)

Epoch 0, Loss: 0.5586
Epoch 10, Loss: 0.4288
Epoch 20, Loss: 0.1846
Epoch 30, Loss: 0.1854
Epoch 40, Loss: 0.3843
Epoch 50, Loss: 0.0779
Epoch 60, Loss: 0.0492
Epoch 70, Loss: 0.0599
Epoch 80, Loss: 0.0224
Epoch 90, Loss: 0.1113


In [33]:
import pickle

with open('./models/forward_feed_nn_model.pkl', 'wb') as f:
    pickle.dump(nn, f)

# Test

In [34]:
#load test data

df = np.load('../data/fashion_test.npy')

df = pre.preprocess(df)

X = df[:,:-1]
y = df[:,-1]

accuracy = nn.accuracy(nn.predict(X), y)
print(f"Final accuracy: {accuracy*100}%")

Final accuracy: 86.38%


# Compute the std

In [35]:
import pickle
import pandas as pd

model = pickle.load(open('models/forward_feed_nn_model.pkl', 'rb'))

df = pd.DataFrame(df)

acc_list = []

for x in range(0, 1000):
    sample = df.sample(1000, replace=True)
    X = sample.iloc[:,:-1].to_numpy()
    y = sample.iloc[:,-1].to_numpy()
    preds = model.predict(X)
    accuracy = np.sum(preds == y) / len(y)
    acc_list.append(accuracy)

print(f'accuracy: {round(np.mean(acc_list)*100, 2)}% ± {round(np.std(acc_list)*100, 2)}%')

accuracy: 86.44% ± 1.08%


# compare to sklearn

In [36]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD
from sklearn.model_selection import train_test_split
import preprocessing as pre

# Train

In [37]:
df = np.load('../data/fashion_train.npy') 

df = pre.preprocess(df)

X_train, X_val, y_train, y_val = train_test_split(df[:,:-1], df[:,-1], test_size=0.2, random_state=42)

print(X_train.shape[1])

65


In [38]:


# Hyperparameters
input_size = X_train.shape[1]
hidden_layers = [256, 128]  # Hidden layer sizes
output_size = 5  # Output layer size
dropout_rate = 0.2  # Dropout rate
learning_rate = 0.1  # Learning rate

# Build the model
model = Sequential()

# Input and first hidden layer
model.add(Dense(hidden_layers[0], input_shape=(input_size,), activation='relu'))
model.add(Dropout(dropout_rate))

# Second hidden layer
model.add(Dense(hidden_layers[1], activation='relu'))
model.add(Dropout(dropout_rate))

# Output layer
model.add(Dense(output_size, activation='softmax'))

model.compile(optimizer=SGD(learning_rate=learning_rate),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [39]:
from tensorflow.keras.callbacks import EarlyStopping

# Define EarlyStopping
early_stopping = EarlyStopping(
    monitor='val_loss',   
    patience=3,           
    restore_best_weights=True  
)

# EarlyStopping
history = model.fit(
    X_train,
    y_train,
    epochs=100,                
    batch_size=64,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping]  
)


Epoch 1/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.7159 - loss: 1.0031 - val_accuracy: 0.8300 - val_loss: 0.4984
Epoch 2/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7770 - loss: 0.6182 - val_accuracy: 0.8090 - val_loss: 0.5430
Epoch 3/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7852 - loss: 0.6094 - val_accuracy: 0.8105 - val_loss: 0.5207
Epoch 4/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7829 - loss: 0.6315 - val_accuracy: 0.8260 - val_loss: 0.4925
Epoch 5/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7762 - loss: 0.6926 - val_accuracy: 0.8100 - val_loss: 0.6626
Epoch 6/100
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7858 - loss: 0.7224 - val_accuracy: 0.7275 - val_loss: 1.1290
Epoch 7/100
[1m125/12

# Final test

In [40]:
#Load test data
df = np.load('../data/fashion_test.npy')

df = pre.preprocess(df)

x = df[:,:-1]
y = df[:,-1]

print(x.shape, y.shape) 

pred = model.predict(x)

print(model.evaluate(x, y))

(5000, 65) (5000,)
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8043 - loss: 0.5518
[0.5467115044593811, 0.8080000281333923]


# Compute the accuracy and std for tensorflow model

In [41]:
from IPython.display import clear_output

df = pd.DataFrame(df)  # Convert to DataFrame if not already

acc_list = []

for _ in range(1000):  # Perform 1,000 bootstrapping iterations
    # Bootstrap sampling
    print(_)
    sample = df.sample(1000, replace=True)
    X = sample.iloc[:, :-1].to_numpy()  # Features
    y = sample.iloc[:, -1].to_numpy()  # Labels
    
    # Predict and calculate accuracy
    preds = model.predict(X)
    preds = np.argmax(preds, axis=1)  # Convert probabilities to class labels
    accuracy = np.sum(preds == y) / len(y)  # Compute accuracy
    acc_list.append(accuracy)
    clear_output(wait=True)

# Display the final mean accuracy and standard deviation
mean_acc = np.mean(acc_list) * 100
std_acc = np.std(acc_list) * 100
print(f'Accuracy: {round(mean_acc, 2)}% ± {round(std_acc, 2)}%')


Accuracy: 80.85% ± 1.26%


# Conclusion

- Even if the tensorflow model seems to have a exploding gradiant is it not over fitting, as the traning error is not lower then validation error, but the result of the accuracy i because tensorflow have a diffent implentaion, so we need to optimize the hyperparameter for tensorflow to get a good model and we also need to reduce the epoch because the exploding gradiant 