In [14]:

import tensorflow as tf
import numpy as np

from tensorflow.keras import datasets
import matplotlib.pyplot as plt


def printIMG(x,y, idx):
    label_dict = {
 0: 'T-shirt/top',
 1: 'Trouser',
 2: 'Pullover',
 3: 'Dress',
 4: 'Coat',
 5: 'Sandal',
 6: 'Shirt',
 7: 'Sneaker',
 8: 'Bag',
 9: 'Ankle boot',
}

    plt.imshow(x[idx])
    plt.xlabel(label_dict[idx])
# plt.imshow(x[0])

def preprocessing():
    (x_train, y_train), (x_test, y_test) = datasets.fashion_mnist.load_data() #tf.keras.datasets.mnist.load_data(path="mnist.npz") # 
    x_train =x_train/255  
    x_test = x_test/255 

    # Make categorical for loss
    y_train = tf.keras.utils.to_categorical(y_train)
    y_test = tf.keras.utils.to_categorical(y_test)

    # reshape for 3 dim
    x_train = x_train.reshape(-1, 28, 28, 1)
    x_test = x_test.reshape(-1,28,28,1)

    
    return (x_train, y_train, x_test, y_test)


class CNN:
    def __init__(self, num_input, layers ,lastDense, lr, dropout, debug=False):
        self.hiddenlayers =layers
        self.num_features = layers[0]
        self.num_classes = layers[-1]
        self.L =  len(layers) #len(self.Weights)+1  # len(layers)

        self.debug = debug

        self.lr = lr
        self.drouput = dropout

        # paramerters
        self.Weights = {}
        self.biases = {}
        self.dw = {}
        self.db = {}

        self.last_dense = lastDense
        # should happen at the beginning when initlizing the 
        self.initilizeWeight()

    def initilizeWeight(self):
        # print("initilizeWeight")
        for i in range(1, self.L):
            
            self.Weights[i] =  self.hiddenlayers[i]#tf.Variable(tf.random.normal(shape =(self.hiddenlayers[i], self.hiddenlayers[i-1]))) #, initializer =  tf.contrib.layers.xavier_initializer(seed = 0)))
            if self.debug: print(f"self.Weights[i].shape: {self.Weights[i]} ")




    def compute_loss(self, A, Y):
        # print("compute_loss")
        loss = tf.nn.softmax_cross_entropy_with_logits(Y,A)
        # Mean over multiple dim
        return tf.reduce_mean(loss)

    def forwardpass(self, X):
        # print("forwardpass")


        if self.debug: print(f"self.Weights: {len(self.Weights)}")

        
        A = tf.convert_to_tensor(X, dtype=tf.float32)
        for i in range(1, len(self.Weights)-2):

            if self.debug: 
                print(f" {i} {self.Weights[i].shape} ")  
                input()
            
            Z = tf.nn.conv2d(A, self.Weights[i], strides = [1,1,1,1], padding = 'SAME')
            

            A = tf.nn.relu(Z)
            

            A = tf.nn.max_pool(A, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME')
            

            if self.debug:
                print(f"A. tf.Max shape {i}: {A.shape}") 
                print(f"Z. tf.nn.conv2d shape {i}: {Z.shape}")
                input()

        # print(f"A shape before flat: {A.shape}")
        F = tf.compat.v1.layers.flatten(A)
        # print("Before Z2")
        # input()
        Z2 = tf.matmul(F,self.Weights[i+1]) 
        # print("After Z2")
        # input()
        
        #Z3 = tf.compat.v1.layers.dense(F, self.num_classes, activation=None)   #, activation_fn=None
        #Z3= tf.keras.layers.Dense(F, self.num_classes, activation=None)
        Z3 = Z2 = tf.matmul(Z2,self.Weights[i+2]) 
        # print(f"F shape: {F.shape}")
        # print(f"tf.transpose(self.last_dense): {tf.transpose(self.last_dense).shape}")                
        # print(f"self.last_dense: {self.last_dense.shape}")

        # input()

        # Z3 = tf.matmul(tf.transpose(F),self.last_dense) 
        # if self.debug: input()

        A = Z3    
        return A


    def updateParams(self):
        """ 
        We have all the weights and biases. now we need to update the weights with gd
        The formula is 
        """
        if self.debug:print("updateParams")
        
        # check also with assignning sub
        for i in range(1, len(self.Weights)):
            self.Weights[i].assign_sub(self.lr * self.dw[i])

    def printInfoModel(self):
        print(f"Number of features: {self.num_features}")
        print(f"Number of classes: {self.num_classes}")
        print(f"Number of self.Weights {len(self.Weights)}")

        for i in range(1, len(self.hiddenlayers)-1):
            print(f"Hidden Layer {i}: {self.hiddenlayers[i].shape}")

    def computeLoss(self, Y, Z):
        if self.debug:print("compute_loss")
        
        loss = tf.nn.softmax_cross_entropy_with_logits(Y, Z) # try to use softmax

        if self.debug: 
            print(f"tf.reduce_mean(loss): {tf.reduce_mean(loss)}")
            input()

        return (tf.reduce_mean(loss))

    def train(self, x_train, y_train, x_test, y_test, epochs, steps_per_epoch, batch_size):

        history = {
            'val_loss':[],
            'train_loss':[],
            'val_acc':[]
        }
        
        for e in range(0, epochs):
            epoch_train_loss = 0.
            print('Epoch{}'.format(e), end='.')
            for i in range(0, steps_per_epoch):

                x_batch = x_train[i*batch_size:(i+1)*batch_size]
                y_batch = y_train[i*batch_size:(i+1)*batch_size]
                if self.debug:
                    print(len(x_batch))
                    print(len(y_batch))
                    print(f"i*batch_size:(i+1)*batch_size: {[i*batch_size, (i+1)*batch_size]}")
                    input()
                batch_loss = self.trainOnBatch(x_batch, y_batch)
                epoch_train_loss += batch_loss
                
                if i%int(steps_per_epoch/10) == 0:
                    print(end='.')
                    
            history['train_loss'].append(epoch_train_loss/steps_per_epoch)
            val_A = self.forwardpass(x_test)

            val_loss = self.compute_loss(val_A, y_test).numpy()
            history['val_loss'].append(val_loss)
            val_preds = self.predict(x_test)
            val_acc =    np.mean(np.argmax(y_test, axis=1) == val_preds.numpy())
            history['val_acc'].append(val_acc)
            print('Val acc:',val_acc)
        return history

    def dropout(self, X):
        a = tf.random.uniform((X.shape[0], X.shape[1]), dtype=tf.dtypes.float32)
        b = tf.where(a<self.drouput , 0,1)
        b = tf.cast(b, tf.dtypes.float32)
        return (tf.math.multiply(b,X))


    def trainOnBatch(self, X, Y):

        X = tf.convert_to_tensor(X, dtype = tf.float32)
        Y = tf.convert_to_tensor(Y,dtype = tf.float32)

        with tf.GradientTape(persistent=True) as tape:
            for i in range(1, len(self.Weights)):
                if self.debug: print(f"tape: {i}")

                #X = self.dropout(X)
                Z = self.forwardpass(X)
                
                loss = self.computeLoss(Y, Z)
                self.dw[i] = tape.gradient(loss, self.Weights[i])
                if self.debug:print(f"self.dw[i]: {self.dw[i].shape}")
                

                if self.debug:
                    print(f"type(loss): {type(loss)}")
                    print(f"type(Weights[i]): {type(self.Weights[i])}")
                    print(f"type(self.dw[i]): {type(self.dw[i])}")

                #self.db[i] = tape.gradient(loss, self.biases[i])
        del tape
        self.updateParams()
        return loss.numpy()

    def predict(self, X):
        A = self.forwardpass(X)
        return tf.argmax(tf.nn.softmax(A), axis=1)
        


n_classes = 10
n_Input = 784
weights = [
    n_Input,
    tf.compat.v1.get_variable('W1', shape=(3,3,1,32), initializer=tf.keras.initializers.glorot_normal()),
     tf.compat.v1.get_variable('W2', shape=(3,3,32,64), initializer=tf.keras.initializers.glorot_normal()),
    #  tf.compat.v1.get_variable('W2', shape=(3,3,64,128), initializer=tf.keras.initializers.glorot_normal()),
    tf.compat.v1.get_variable('W3', shape=(7*7*64,120), initializer=tf.keras.initializers.glorot_normal()),
   tf.compat.v1.get_variable('W4', shape=(120,n_classes), initializer=tf.keras.initializers.glorot_normal()),
     n_classes
]

last = tf.compat.v1.get_variable('W2', shape=(120,n_classes), initializer=tf.keras.initializers.glorot_normal())


net = CNN(n_Input,weights,last, 3e-3, 0.3, debug = False)
net.printInfoModel()



(x_train, y_train, x_test, y_test) = preprocessing()

batch_size = 120
epochs = 25
steps_per_epoch = int(x_train.shape[0]/batch_size)
lr = 3e-3
print('Steps per epoch', steps_per_epoch)



tf.config.run_functions_eagerly(True)

history = net.train(
    x_train,y_train,
    x_test, y_test,
    epochs, steps_per_epoch,
    batch_size)



Number of features: 784
Number of classes: 10
Number of self.Weights 5
Hidden Layer 1: (3, 3, 1, 32)
Hidden Layer 2: (3, 3, 32, 64)
Hidden Layer 3: (3136, 120)
Hidden Layer 4: (120, 10)
Steps per epoch 500


  F = tf.compat.v1.layers.flatten(A)
  return layer.apply(inputs)


..........Val acc: 0.647
Epoch1...........Val acc: 0.7252
Epoch2...........Val acc: 0.7521
Epoch3...........Val acc: 0.7673
Epoch4...........Val acc: 0.7761
Epoch5...........Val acc: 0.7854
Epoch6...........Val acc: 0.7922
Epoch7...........Val acc: 0.7986
Epoch8...........Val acc: 0.8055
Epoch9...........Val acc: 0.8126
Epoch10...........Val acc: 0.8183
Epoch11...........Val acc: 0.825
Epoch12...........Val acc: 0.8303
Epoch13...........Val acc: 0.8338
Epoch14...........Val acc: 0.837
Epoch15...........Val acc: 0.8408
Epoch16...........Val acc: 0.8429
Epoch17...........Val acc: 0.8458
Epoch18...........Val acc: 0.8482
Epoch19...........Val acc: 0.8503
Epoch20...........Val acc: 0.8516
Epoch21...........Val acc: 0.8543
Epoch22...........Val acc: 0.8565
Epoch23...........Val acc: 0.8573
Epoch24...........Val acc: 0.859
