In [0]:
import tensorflow as tf

In [0]:
from google.colab import drive

In [80]:
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [81]:
!ls -all  './drive/My Drive/SVHN_single_grey1.h5'

-rw------- 1 root root 491644096 Oct  8 05:09 './drive/My Drive/SVHN_single_grey1.h5'


In [0]:
File = './drive/My Drive/SVHN_single_grey1.h5'

In [83]:
File

'./drive/My Drive/SVHN_single_grey1.h5'

In [0]:
import h5py

In [0]:
f = h5py.File(File, 'r')

In [54]:
print("Keys: %s" % f.keys())
a_group_key = list(f.keys())[0]

Keys: KeysView(<HDF5 file "SVHN_single_grey1.h5" (mode r)>)


In [0]:
data = list(f[a_group_key])

In [86]:

#List the keys present in the dataset
list(f.keys())

['X_test', 'X_train', 'X_val', 'y_test', 'y_train', 'y_val']

In [87]:
#Printing the keys in the dataset
for key in f.keys():
  print("Key value ",key)

Key value  X_test
Key value  X_train
Key value  X_val
Key value  y_test
Key value  y_train
Key value  y_val


In [88]:
#Fetching the data for each of the keys present in the dataset
for i, key in enumerate(f.keys()):
  print('key value is ',i+1, key)
  X_train = f['X_train'].value
  if(key == 'X_test'):
    X_test = f['X_test'].value
  if(key == 'X_val'):
    X_val = f['X_val'].value
  if(key == 'y_test'):
    y_test = f['y_test'].value
  if(key == 'y_train'):
    y_train = f['y_train'].value
  if(key == 'y_val'):
    y_val = f['y_val'].value

key value is  1 X_test
key value is  2 X_train
key value is  3 X_val
key value is  4 y_test
key value is  5 y_train
key value is  6 y_val


In [0]:
#Creating a function to reshape 3D data to 2D data, as KNN will not accept 3D data
#Reashaping by keeping the number of samples as the first dimension and the product of x and y as the second dimension.

def reshape(X):
  number_of_samples, nx, ny = X.shape
  X = X.reshape((number_of_samples, nx*ny))
  print(X.shape)
  return X

In [90]:
X_train = reshape(X_train)
print("Shape of X_train after reshaping ",X_train.shape)
X_test = reshape(X_test)
print("Shape of X_test after reshaping ",X_test.shape)
X_val = reshape(X_val)
print("Shape of X_val after reshaping ", X_val.shape)

(42000, 1024)
Shape of X_train after reshaping  (42000, 1024)
(18000, 1024)
Shape of X_test after reshaping  (18000, 1024)
(60000, 1024)
Shape of X_val after reshaping  (60000, 1024)


In [0]:
import sklearn
from sklearn import neighbors
from sklearn.neighbors import KNeighborsClassifier

In [0]:
#Choosing optimum k value by square root of sample size by two (sqrt(42000)/2)

knn = KNeighborsClassifier(n_neighbors=103, weights='uniform', algorithm='auto', n_jobs = -1)

In [93]:
knn.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=-1, n_neighbors=103, p=2,
                     weights='uniform')

In [0]:
y_predict = knn.predict(X_test)

In [0]:
from sklearn.metrics import accuracy_score, classification_report, f1_score, confusion_matrix

In [96]:
print("Accuracy score %5.2f " %(accuracy_score(y_true=y_test, y_pred=y_predict)))
print("F1 score is %5.2f" %(f1_score(y_true=y_test, y_pred=y_predict, average='weighted')))
print("Classification report")
print(classification_report(y_test, y_predict))
print("Confusion matrix ")
print(confusion_matrix(y_test, y_predict))

Accuracy score  0.53 
F1 score is  0.53
Classification report
              precision    recall  f1-score   support

           0       0.43      0.71      0.53      1814
           1       0.44      0.73      0.55      1828
           2       0.66      0.50      0.57      1803
           3       0.51      0.43      0.47      1719
           4       0.65      0.64      0.65      1812
           5       0.55      0.41      0.47      1768
           6       0.54      0.40      0.46      1832
           7       0.67      0.63      0.65      1808
           8       0.51      0.39      0.44      1812
           9       0.54      0.47      0.50      1804

    accuracy                           0.53     18000
   macro avg       0.55      0.53      0.53     18000
weighted avg       0.55      0.53      0.53     18000

Confusion matrix 
[[1287   82   25   43   52   29   90   39   51  116]
 [ 118 1328   53   62   68   31   36   66   30   36]
 [ 105  254  906   71   61   53   31  161   60  101]
 [

In [0]:
#Normalising the data
X_train = X_train/1024
X_test = X_test/1024
X_val = X_val/1024

In [0]:
#converting the classes to categorical
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)
y_val = tf.keras.utils.to_categorical(y_val, num_classes=10)

In [0]:
#Using Neural Networks

model = tf.keras.models.Sequential()

In [100]:
#Batch normalisation and a dense layer with 200 neurons

model.add(tf.keras.layers.BatchNormalization())
tf.keras.layers.Dense(units=200, activation='relu', input_shape=(1024,))

<tensorflow.python.keras.layers.core.Dense at 0x7f5a5a66aa20>

In [0]:
#Adding more layers
model.add(tf.keras.layers.Dense(units=100, activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(units=50, activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(units=20, activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(units=10, activation='softmax'))

In [0]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [103]:
model.fit(x=X_train, y=y_train, batch_size=64, epochs=30, validation_data=(X_val, y_val))

Train on 42000 samples, validate on 60000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f5a59644da0>

In [0]:

y_pred = model.predict(X_test)

In [105]:
model.evaluate(X_test, y_test)



[0.536803988787863, 0.8425556]

In [0]:
#Differences and trade-offs between traditional and NN classifiers

#The KNN classifer took a lot of time to predict the test values.
#The accuracy score is just 53% which is very low when compared with the neural network model.
#The Neural network model with batch normalization, RELU as activation function and ADAM as optimizer took less than 6 minutes to train the dataset.
#The prediction and evaluation were even faster, unlike KNN which took more than 10 minutes to complete.
#The model was not only faster, it also came up with more and a better accuracy.
#The accuracy in the train data set was 85.15%, validation set was 87.58% and the test set gave an accuracy of 85.25% which is high when compared with the traditional ML models.