In [2]:
import tensorflow.compat.v1 as tf,numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense

In [3]:
tf.disable_eager_execution() # need to disable eager in TF2.x

In [4]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [5]:
indices = [i for i in range(10)]
sess = tf.Session()
x = np.array(sess.run(tf.one_hot(indices,depth=10,axis=0)))

new_y_train = []
for i in range(len(y_train)):
   new_y_train.append(x[y_train[i]])
y_train= np.array(new_y_train)

new_y_test = []
for i in range(len(y_test)):
   new_y_test.append(x[y_test[i]])
y_test= np.array(new_y_test)

In [6]:
x_train = x_train.reshape((60000,784))
x_test = x_test.reshape((10000,784))

In [7]:
input_width = 28
input_height = 28
input_channels = 1
input_pixels = 784

n_conv1 = 32
n_conv2 = 64
stride_conv1 = 1
stride_conv2 = 1
conv1_k = 5
conv2_k = 5
max_pool1_k = 2
max_pool2_k = 2

n_hidden = 1024
n_out = 10

input_size_to_hidden = ((input_width//(max_pool1_k*max_pool2_k))*(input_height//(max_pool1_k*max_pool2_k))) * n_conv2

In [8]:
weights = {
    "wc1" : tf.Variable(tf.random_normal([conv1_k, conv1_k, input_channels, n_conv1])),
    "wc2" : tf.Variable(tf.random_normal([conv2_k, conv2_k, n_conv1, n_conv2])),
    "wh" : tf.Variable(tf.random_normal([input_size_to_hidden, n_hidden])),
    "wo" : tf.Variable(tf.random_normal([n_hidden, n_out]))
}

biases = {
    "bc1" : tf.Variable(tf.random_normal([n_conv1])),
    "bc2" : tf.Variable(tf.random_normal([n_conv2])),
    "bh" : tf.Variable(tf.random_normal([n_hidden])),
    "bo" : tf.Variable(tf.random_normal([n_out]))
}

In [9]:
def conv(x, weights, bias, strides = 1):
    out = tf.nn.conv2d(x, weights, padding="SAME", strides = [1, strides, strides, 1]) 
                                                            # n   height   width   channel(depth)
    out = tf.nn.bias_add(out, bias)
    out = tf.nn.relu(out)
    return out

def max_pooling(x, k = 2):
    return tf.nn.max_pool(x, padding="SAME", ksize=[1, k, k, 1], strides=[1,k,k,1])

In [13]:
def cnn(x, weights, biases, rate): # forward propagation
    x = tf.reshape(x ,shape = [-1, input_height, input_width, input_channels]) # making our data 28X28
    conv1 = conv(x, weights["wc1"], biases["bc1"], stride_conv1)
    conv1_pool = max_pooling(conv1, max_pool1_k)
    
    conv2 = conv(conv1_pool, weights["wc2"], biases["bc2"], stride_conv2)
    conv2_pool = max_pooling(conv2, max_pool2_k)
    
    input_to_hidden = tf.reshape(conv2_pool, shape = [-1, input_size_to_hidden]) # making our data 784
    hidden_output_before_activation = tf.add(tf.matmul(input_to_hidden, weights["wh"]), biases["bh"])
    hidden_output_before_dropout = tf.nn.relu(hidden_output_before_activation)
    hidden_output = tf.nn.dropout(hidden_output_before_dropout, rate)
    
    output = tf.add(tf.matmul(hidden_output, weights["wo"]), biases["bo"])
    return output

In [14]:
x = tf.placeholder("float", [None, input_pixels])
y =tf.placeholder(tf.int32, [None, n_out])
rate = tf.placeholder("float") # this rate is keep probability i don't know why i changed it's name
pred = cnn(x, weights, biases, rate)

In [15]:
### COST FUNCTION
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels = y))

In [16]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
optimize = optimizer.minimize(cost)

In [17]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [18]:
batch_size = 100
for i in range(25):
    num_batches = int(x_train.shape[0]/batch_size)
    total_cost = 0
    for j in range(0,x_train.shape[0],batch_size):
        batch_x, batch_y = x_train[j:j+batch_size,:],y_train[j:j+batch_size,:]
        c, _ = sess.run([cost,optimize], feed_dict={x:batch_x , y:batch_y, rate:0.8})
        total_cost += c
    print(total_cost)

239041363.57055664
8846944.196121216
4337264.400260925
2763627.3579211235
2644537.413146019
1978453.887624749
1837075.8923841715
1833665.3843631744
1516048.2265343666
1576942.0040544271
1172571.2483730316
1475709.9027690887
1130433.8259372711
925901.2091350555
801599.5290699005
804746.5209592581
1041717.4315004349
743265.2135353088
618239.2305164337
624429.6088593006
672316.3002295494
755704.1105995178
502427.3979911804
564150.3385380507
596554.7871879339


In [19]:
predictions = tf.argmax(pred, 1)
correct_labels = tf.argmax(y, 1)
correct_predictions = tf.equal(predictions, correct_labels)
pre,correct_pre  = sess.run([predictions, correct_predictions], feed_dict={x:x_test,
                                              y:y_test,rate:1})
correct_pre.sum()

9824

### IMPLEMENTING THE SAME ARCHITECTURE USING KERAS

In [92]:
import keras
import tensorflow.compat.v1 as tf,numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Conv2D
from keras.layers import Conv1D
from keras.layers import MaxPooling2D
from keras.layers import MaxPooling3D
from keras.layers import Flatten
from keras.layers import Dropout
from keras.utils import to_categorical
tf.disable_eager_execution() 

In [95]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

x_train = x_train.reshape((60000,784))
x_test = x_test.reshape((10000,784))

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
fs = StandardScaler()
x_train = fs.fit_transform(x_train)
x_test = fs.transform(x_test)

x_train = x_train.reshape((60000,28,28))
x_test = x_test.reshape((10000,28,28))

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [98]:
img_rows=x_train[0].shape[0]
img_cols=x_test[0].shape[1]

x_train=x_train.reshape(x_train.shape[0],img_rows,img_cols,1)

x_test=x_test.reshape(x_test.shape[0],img_rows,img_cols,1)

In [99]:
x_train.shape, x_test.shape

((60000, 28, 28, 1), (10000, 28, 28, 1))

In [100]:
model = Sequential()
model.add(Conv2D(input_shape=(28,28,1),padding="same",strides=(1,1),filters=32,kernel_size=(5,5),data_format="channels_last"))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2),padding="same"))
model.add(Conv2D(padding="same",strides=(1,1),filters=64,kernel_size=(5,5)))
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2),padding="same"))
model.add(Flatten())
model.add(Dense(units=1024,activation="relu"))
model.add(Dropout(rate=0.2))
model.add(Dense(units=10,activation="sigmoid"))
model.summary()

Model: "sequential_20"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_22 (Conv2D)           (None, 28, 28, 32)        832       
_________________________________________________________________
max_pooling2d_22 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 14, 14, 64)        51264     
_________________________________________________________________
max_pooling2d_23 (MaxPooling (None, 7, 7, 64)          0         
_________________________________________________________________
flatten_12 (Flatten)         (None, 3136)              0         
_________________________________________________________________
dense_24 (Dense)             (None, 1024)              3212288   
_________________________________________________________________
dropout_9 (Dropout)          (None, 1024)            

In [101]:
model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])

In [102]:
model.fit(x_train, y_train, epochs=10, batch_size = 50, validation_data=(x_test,y_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1a9cfb84a90>

In [103]:
predictions = model.predict(X_test)
model.evaluate(X_test,y_test)

[nan, 0.098]