In [47]:
import numpy as np
import pandas as pd
import math
import preprocessing as pre
from forward_feed_nn import NeuralNetwork


# Hyperparameters fine tuning

In [48]:
df = np.load('../data/fashion_train.npy')

df = pre.preprocess(df)

K_classes = np.unique(df[:, -1])

X_train = df[:,:-1]
y_train = df[:,-1]

#load test data
df = np.load('../data/fashion_test.npy')

df = pre.preprocess(df)

X_test = df[:,:-1]
y_test = df[:,-1]



In [49]:
X_train_input_layer = int(X_train.shape[1])

X_test_inputlayer = int(X_test.shape[1])

In [34]:
layer_options = [[X_train_input_layer, 128, 5], [X_train_input_layer, 256, 128, 5], [X_train_input_layer, 512, 256, 128, 5]]
dropout_rates = [0, 0.2, 0.3, 0.5]
learning_rates = [0.001, 0.01, 0.1]

best_HP = {

}

for layers in layer_options:
    for dropout in dropout_rates:
        for learning in learning_rates:
            nn = NeuralNetwork(layers, learning_rate=learning, dropout_rate=dropout)
            nn.train(X_train, y_train, epochs=100)
            accuracy = nn.accuracy(nn.predict(X_test), y_test)
            print(f"Layers: {layers}, Dropout: {dropout}, Accuracy: {accuracy}, Learning Rate: {learning}")
            best_HP[accuracy] = (layers, dropout, accuracy, learning)

Epoch 0, Loss: 0.7116
Epoch 10, Loss: 0.4377
Epoch 20, Loss: 0.3869
Epoch 30, Loss: 0.1271
Epoch 40, Loss: 0.1801
Epoch 50, Loss: 0.4961
Epoch 60, Loss: 0.2468
Epoch 70, Loss: 0.6205
Epoch 80, Loss: 0.9352
Epoch 90, Loss: 0.3011
Layers: [89, 128, 5], Dropout: 0, Accuracy: 0.8312, Learning Rate: 0.001
Epoch 0, Loss: 0.5988
Epoch 10, Loss: 0.2079
Epoch 20, Loss: 0.6675
Epoch 30, Loss: 0.1247
Epoch 40, Loss: 0.1454
Epoch 50, Loss: 0.3657
Epoch 60, Loss: 0.1984
Epoch 70, Loss: 0.1133
Epoch 80, Loss: 0.2147
Epoch 90, Loss: 0.1547
Layers: [89, 128, 5], Dropout: 0, Accuracy: 0.8566, Learning Rate: 0.01
Epoch 0, Loss: 0.4397
Epoch 10, Loss: 0.2143
Epoch 20, Loss: 0.0649
Epoch 30, Loss: 0.1439
Epoch 40, Loss: 0.0039
Epoch 50, Loss: 0.0175
Epoch 60, Loss: 0.0238
Epoch 70, Loss: 0.0045
Epoch 80, Loss: 0.0020
Epoch 90, Loss: 0.0007
Layers: [89, 128, 5], Dropout: 0, Accuracy: 0.8446, Learning Rate: 0.1
Epoch 0, Loss: 1.6816
Epoch 10, Loss: 0.8379
Epoch 20, Loss: 0.3734
Epoch 30, Loss: 0.5223
Epoch 

In [42]:
max_key = max(best_HP)
max_value = best_HP[max_key]


print('Best hyperparameters for nn:')
print(f"Best layer: {max_value[0]}")
print(f"Best dropout: {max_value[1]}")
print(f"Best accuracy: {max_value[2]}")
print(f"Best learning rate: {max_value[3]}")


Best hyperparameters for nn:
Best layer: [89, 256, 128, 5]
Best dropout: 0.2
Best accuracy: 0.8684
Best learning rate: 0.1


# best parameter
- Best layer: [89, 256, 128, 5]
- Best dropout: 0.2
- Best accuracy: 0.8684
- Best learning rate: 0.1

# Final train and test

In [56]:
df = np.load('../data/fashion_train.npy') 

df = pre.preprocess(df)

best_hp_layers = [89, 256, 128, 5]
best_hp_dropout = 0.2
best_hp_learning = 0.1

X = df[:,:-1]
y = df[:,-1]

nn = NeuralNetwork(best_hp_layers, learning_rate=best_hp_learning, dropout_rate=best_hp_dropout)
nn.train(X, y, epochs=100)

Epoch 0, Loss: 0.1530
Epoch 10, Loss: 0.2831
Epoch 20, Loss: 0.0738
Epoch 30, Loss: 0.3177
Epoch 40, Loss: 0.0388
Epoch 50, Loss: 0.1004
Epoch 60, Loss: 0.1042
Epoch 70, Loss: 0.0981
Epoch 80, Loss: 0.0287
Epoch 90, Loss: 0.1012


# Test

In [60]:
#load test data

df = np.load('../data/fashion_test.npy')

df = pre.preprocess(df)

X = df[:,:-1]
y = df[:,-1]

accuracy = nn.accuracy(nn.predict(X), y)
print(f"Final accuracy: {accuracy*100}%")

Final accuracy: 86.48%


# Compute the std

In [89]:
import pickle
import pandas as pd

model = pickle.load(open('models/model_forward_feed_nn.pkl', 'rb'))

df = pd.DataFrame(df)

acc_list = []

for x in range(0, 1000):
    sample = df.sample(1000, replace=True)
    X = sample.iloc[:,:-1].to_numpy()
    y = sample.iloc[:,-1].to_numpy()
    preds = model.predict(X)
    accuracy = np.sum(preds == y) / len(y)
    acc_list.append(accuracy)

print(f'accuracy: {round(np.mean(acc_list)*100, 2)}% ± {round(np.std(acc_list)*100, 2)}%')

accuracy: 86.17% ± 1.05%


# compare to sklearn

In [28]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

# Train

In [29]:
df = np.load('../data/fashion_train.npy') 

df = pre.preprocess(df)

X_train, X_val, y_train, y_val = train_test_split(df[:,:-1], df[:,-1], test_size=0.2, random_state=42)

print(X_train.shape[1])

89


In [91]:


# Hyperparameters
input_size = X_train.shape[1]
hidden_layers = [256, 128]  # Hidden layer sizes
output_size = 5  # Output layer size
dropout_rate = 0.2  # Dropout rate
learning_rate = 0.1  # Learning rate

# Build the model
model = Sequential()

# Input and first hidden layer
model.add(Dense(hidden_layers[0], input_shape=(input_size,), activation='relu'))
model.add(Dropout(dropout_rate))

# Second hidden layer
model.add(Dense(hidden_layers[1], activation='relu'))
model.add(Dropout(dropout_rate))

# Output layer
model.add(Dense(output_size, activation='softmax'))

model.compile(optimizer=Adam(learning_rate=learning_rate),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [92]:
model.fit(X_train, y_train, epochs=5, batch_size=64, validation_data=(X_val, y_val))

Epoch 1/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.6517 - loss: 476.8136 - val_accuracy: 0.7815 - val_loss: 2079.5442
Epoch 2/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.7306 - loss: 5276.0732 - val_accuracy: 0.7130 - val_loss: 13694.9473
Epoch 3/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.7302 - loss: 31209.5781 - val_accuracy: 0.7715 - val_loss: 70022.5234
Epoch 4/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.7319 - loss: 141990.2188 - val_accuracy: 0.7655 - val_loss: 142125.5781
Epoch 5/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.7446 - loss: 210849.7500 - val_accuracy: 0.7165 - val_loss: 335374.3125


<keras.src.callbacks.history.History at 0x3642cdb90>

# Final test

In [None]:
#Load test data
df = np.load('../data/fashion_test.npy')

df = pre.preprocess(df)

x = df[:,:-1]
y = df[:,-1]

print(x.shape, y.shape) 

pred = model.predict(x)

print(model.evaluate(x, y))

(5000, 89) (5000,)
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7042 - loss: 399429.7812
[393520.5, 0.7049999833106995]


# Compute the accuracy and std for tensorflow model

In [95]:

df = pd.DataFrame(df)

acc_list = []

for x in range(0, 1000):
    sample = df.sample(1000, replace=True)
    X = sample.iloc[:,:-1].to_numpy()
    y = sample.iloc[:,-1].to_numpy()
    preds = model.predict(X)
    preds = np.argmax(preds, axis=1)
    accuracy = np.sum(preds == y) / len(y)
    acc_list.append(accuracy)

print(f'accuracy: {round(np.mean(acc_list)*100, 2)}% ± {round(np.std(acc_list)*100, 2)}%')

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

# Conclusion

- Even if the tensorflow model seems to have a exploding gradiant is it not over fitting, as the traning error is not lower then validation error, but the result of the accuracy i because tensorflow have a diffent implentaion, so we need to optimize the hyperparameter for tensorflow to get a good model and we also need to reduce the epoch because the exploding gradiant 