In [1]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Load the MNIST dataset
mnist = fetch_openml(name='mnist_784', version=1, cache=True, parser='auto')


In [3]:
# Extract features (X) and labels (y)
X, y = mnist.data, mnist.target


In [4]:
# Normalize the features using StandardScaler
scaler = StandardScaler()
X_normalized = scaler.fit_transform(X)


In [5]:

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X_normalized, y, test_size=0.2, random_state=42)
# X_train = X_train.values
# X_test = X_test.values
Y_train = Y_train.astype('float').values
Y_test = Y_test.astype('float').values

In [6]:
print(X_train.shape)
Y_train = Y_train.reshape(-1,1)
# print(Y_train)
print(Y_train.shape)
print(X_test.shape)
Y_test = Y_test.reshape(-1,1)
print(Y_test.shape)

Y_train = Y_train/10
Y_test = Y_test/10

(56000, 784)
(56000, 1)
(14000, 784)
(14000, 1)


In [7]:
'''two layers. Layer 1 with 89 output neurons with tanh activation. Layer 2
with ten output neuron and sigmoid activation. use mean squared loss'''

'two layers. Layer 1 with 89 output neurons with tanh activation. Layer 2\nwith ten output neuron and sigmoid activation. use mean squared loss'

In [8]:
import util
from neuralNetwork import NeuralNetwork
from layer import DenseLayer

In [9]:
model1 = NeuralNetwork(layers=[DenseLayer(),DenseLayer()],neurons_per_layer=[89,10],activation_per_layer=[util.TanhLayer(),util.SigmoidLayer()],lr=0.01,batch_size=32,epochs=50,loss_obj=util.MeanSquaredLossLayer(),X=X_train,Y=Y_train)

model1.init_layers()

In [10]:
model1.train()

epoch 0: Loss = 4715.74632863737
epoch 1: Loss = 4183.560744894692
epoch 2: Loss = 3741.7165331112437
epoch 3: Loss = 3414.251581554747
epoch 4: Loss = 3175.7553961800645
epoch 5: Loss = 3003.4392170592964
epoch 6: Loss = 2881.0979363518077
epoch 7: Loss = 2790.100496947476
epoch 8: Loss = 2719.0895093828267
epoch 9: Loss = 2661.0539212805347
epoch 10: Loss = 2611.8509951991905
epoch 11: Loss = 2568.7837784197845
epoch 12: Loss = 2530.798317007969
epoch 13: Loss = 2497.4373550963564
epoch 14: Loss = 2467.299093358266
epoch 15: Loss = 2439.445756581318
epoch 16: Loss = 2413.7486525975614
epoch 17: Loss = 2389.843735323851
epoch 18: Loss = 2367.397181509692
epoch 19: Loss = 2346.5663730430297
epoch 20: Loss = 2326.8968864424833
epoch 21: Loss = 2307.875637755165
epoch 22: Loss = 2289.5682325911494
epoch 23: Loss = 2272.071027511528
epoch 24: Loss = 2255.344000910038
epoch 25: Loss = 2239.150421062362
epoch 26: Loss = 2223.272307731676
epoch 27: Loss = 2207.6790385701747
epoch 28: Loss = 

In [11]:
# test
Y_pred_test = model1.predict(X_test)
print(Y_pred_test[0])
print(Y_test[0])

[0.95741718 0.99991444 0.99998432 0.99999993 0.99641237 0.99993371
 0.99998898 0.95973866 0.99988836 0.99694461]
[0.8]


In [12]:
import numpy as np

In [13]:
Y_pred_test_labels = np.argmax(Y_pred_test,axis=0,keepdims=True)
print("accuracy:",np.sum(Y_pred_test_labels/10==Y_test)/Y_test.shape[0])

accuracy: 0.0


In [14]:
'''two layers. Layer 1 with 89 output neurons with tanh activation. Layer
2 with ten output neuron and linear activation. use softmax with cross
entropy loss.'''

'two layers. Layer 1 with 89 output neurons with tanh activation. Layer\n2 with ten output neuron and linear activation. use softmax with cross\nentropy loss.'

In [15]:
from sklearn.preprocessing import OneHotEncoder

In [16]:
encoder = OneHotEncoder(sparse_output=False,categories='auto')
# print((Y_train*10)[0])
Y_train_onehot = encoder.fit_transform(Y_train*10)
Y_test_onehot = encoder.fit_transform(Y_test*10)

In [17]:
print(Y_train_onehot.shape)
print(Y_test_onehot.shape)

(56000, 10)
(14000, 10)


In [18]:
model2 = NeuralNetwork(layers=[DenseLayer(),DenseLayer()],neurons_per_layer=[89,10],activation_per_layer=[util.TanhLayer(),util.SoftmaxLayer()],lr=0.01,batch_size=32,epochs=50,loss_obj=util.CrossEntropyLossLayer(),X=X_train,Y=Y_train_onehot)

model2.init_layers()

In [19]:
model2.train()

epoch 0: Loss = 10569.953321098761
epoch 1: Loss = 4414.026975803615
epoch 2: Loss = 3160.7903480119876
epoch 3: Loss = 2558.7869469514685
epoch 4: Loss = 2183.406521808038
epoch 5: Loss = 1919.0649945571934
epoch 6: Loss = 1719.543192934177
epoch 7: Loss = 1563.6090878366326
epoch 8: Loss = 1438.2443271224226
epoch 9: Loss = 1335.2346204963117
epoch 10: Loss = 1248.912289588836
epoch 11: Loss = 1175.7945794782497
epoch 12: Loss = 1112.9879148588839
epoch 13: Loss = 1058.8394367253588
epoch 14: Loss = 1011.5221108231782
epoch 15: Loss = 970.2121571096934
epoch 16: Loss = 934.0894871843017
epoch 17: Loss = 902.3146778726892
epoch 18: Loss = 874.275688209751
epoch 19: Loss = 849.6547765748913
epoch 20: Loss = 827.9320433672641
epoch 21: Loss = 808.7216866410737
epoch 22: Loss = 791.6876918282461
epoch 23: Loss = 776.4342142955438
epoch 24: Loss = 762.7963741708156
epoch 25: Loss = 750.4965524288463
epoch 26: Loss = 739.3214978617941
epoch 27: Loss = 729.1448609723843
epoch 28: Loss = 719

In [20]:
Y_pred_test_onehot = model2.predict(X_test)

In [21]:
Y_pred_test_labels = np.argmax(Y_pred_test_onehot,axis=1,keepdims=True)
print(Y_pred_test_labels[0])
print(Y_test[0])

[8]
[0.8]


In [22]:
# A = np.array([[1,2,3],
#              [4,5,6]])
# np.argmax(A,axis=1,keepdims=True)

In [23]:
print("accuracy:",np.sum(Y_pred_test_labels/10==Y_test)/Y_test.shape[0])

accuracy: 0.8567142857142858


In [24]:
model2.epochs = 100
model2.lr = 1
model2.train()

epoch 0: Loss = 613.52744825338
epoch 1: Loss = 610.546148644722
epoch 2: Loss = 607.6110672325952
epoch 3: Loss = 604.7308280638449
epoch 4: Loss = 601.9056426553574
epoch 5: Loss = 599.1311226062013
epoch 6: Loss = 596.4582983973057
epoch 7: Loss = 593.8502050789344
epoch 8: Loss = 591.2778187977278
epoch 9: Loss = 588.7530943534182
epoch 10: Loss = 586.2766778062157
epoch 11: Loss = 583.8466098784126
epoch 12: Loss = 581.4713768166761
epoch 13: Loss = 579.1271412325274
epoch 14: Loss = 576.8156769143134
epoch 15: Loss = 574.5673903879126
epoch 16: Loss = 572.3615613576516
epoch 17: Loss = 570.1840223353983
epoch 18: Loss = 568.0203105628051
epoch 19: Loss = 565.87965529728
epoch 20: Loss = 563.8179993699131
epoch 21: Loss = 561.7837075948837
epoch 22: Loss = 559.7668450906602
epoch 23: Loss = 557.7735739654753
epoch 24: Loss = 555.825012264384
epoch 25: Loss = 553.8979523962082
epoch 26: Loss = 551.9978502825559
epoch 27: Loss = 550.08783708496
epoch 28: Loss = 548.1633712050493
epo

In [25]:
Y_pred_test_onehot = model2.predict(X_test)
Y_pred_test_labels = np.argmax(Y_pred_test_onehot,axis=1,keepdims=True)
print(Y_pred_test_labels[0])
print(Y_test[0])
print("accuracy:",np.sum(Y_pred_test_labels/10==Y_test)/Y_test.shape[0])

[8]
[0.8]
accuracy: 0.8726428571428572
