# Basic Tensorflow based NeuralNet
This python notebook is for studying how to make NeuralNet model using Tensorflow
Keras is just used for load the dataset
Reference:
1. https://www.kaggle.com/code/enriqueabad/using-tensorflow-from-scratch-without-keras/notebook
2. https://www.tensorflow.org/tutorials/quickstart/beginner?hl=ko

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf

import os

from matplotlib import pyplot as plt
from time import time
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder, scale
from sklearn.model_selection import train_test_split
from tqdm import tqdm
print("TensorFlow Version: ", tf.__version__)

TensorFlow Version:  2.10.0


In [3]:
"""
Load MNist Dataset from the keras
"""
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [4]:
"""Normalize the input to make it inside of range 0 to 1"""
x_train, x_test = x_train / 255.0, x_test / 255.0
print(type(x_train))
print(x_train.shape)
print(y_train.reshape(-1,1))
print(y_test.reshape(-1,1))

<class 'numpy.ndarray'>
(60000, 28, 28)
[[5]
 [0]
 [4]
 ...
 [5]
 [6]
 [8]]
[[7]
 [2]
 [1]
 ...
 [4]
 [5]
 [6]]


In [5]:
"""
Make label as an one hot vector
"""
label_encoder = OneHotEncoder(categories=[np.arange(10)])
"""
Need to make 1D array to 2D array So use ndarray.reshape()
We use -1 to indicate the length of row or column dynamically
Need to use .toarray method to make it as a form of [0,0,0,0,1,0,0,0]
"""
y_train = label_encoder.fit_transform(y_train.reshape(-1,1)).toarray()
y_test = label_encoder.fit_transform(y_test.reshape(-1,1)).toarray()

In [6]:
print(y_train)

[[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]]


# 1. Neural Network without hidden layers

In [7]:
"""
Make weight tensor
We have 784 pixel image so the weight tensor size should be 784 x 10
-> 784 pixel and out put is 10
tf.Variable => tensor in tensorflow is immutable.
So we use variables which is tensor whose value can be changed by running ops on it. 
"""
# weight_random = tf.Variable(tf.random.uniform([784,10]))
weight_random = tf.Variable(tf.zeros([784,10]))

In [8]:
"""
Make the loss function -> mean squared error loss and cross-entropy loss 
"""
def loss_MSE(target_y, predicted_y):
    return tf.reduce_mean(tf.square(target_y-predicted_y))

def loss_CrossEntrophy(target_y, predicted_y):
    """
    This calculate cross entrophy
    * operator is element wise multiply -> shape must be same
    """
    return -tf.reduce_sum(tf.reduce_mean(target_y*tf.math.log(predicted_y+ 1e-12),axis=0))

In [9]:
"""Define Flatten Layer"""
def flatten(t):
    """
    By using np.reshape, reshape tensor to 1D
    """
    return t.reshape(t.shape[0],-1)

In [10]:
print(flatten(x_train).shape)

(60000, 784)


## Make model
Make model using tf.Module makes it easier to build own model
tf.Module is Base neural network module class
Module  is an named container for tf.Variable s, other tf.Module s and functions which apply to user input.

In [11]:
class Model1(tf.Module):
    def __init__(self,**kwargs):
        super().__init__(**kwargs)
        """
        Weight starts with zeros 
        But it is better to start with random
        """
        self.w = tf.Variable(tf.zeros([784,10]))
        # self.w = tf.Variable(tf.random.uniform([784,10]))
        self.b = tf.Variable(0.0)
    
    def __call__(self, x):
        """ 
        With just call the class, we can predict
        y_predicted = Model1()(x_data)
        @ operator is matmul
        """
        x = flatten(x)
        return tf.nn.softmax((x @ self.w + self.b))


In [12]:
model = Model1()

In [13]:

def train(model,x,y,learning_rate):
    """
    This is training function
    For gradient I use GradientTape
    """
    with tf.GradientTape() as t:
        # make prediction and calculate the loss
        current_loss = loss_CrossEntrophy(y,model(x))

    #calculate the gradient. here calculate the weight and bias
    dw, db = t.gradient(current_loss, [model.w, model.b])

    #update the wieght and bias. Here I just use gradient descent with learning rate(or step size)
    model.w.assign_sub(learning_rate*dw)
    model.b.assign_sub(learning_rate*db)

In [14]:
"""
Actual Training part
"""
for i_epochs in range(50):
    #train the model
    train(model, x_train, y_train, learning_rate=0.2)

    #calculate loss from training
    train_loss = loss_CrossEntrophy(y_train, model(x_train))
    test_loss = loss_CrossEntrophy(y_test, model(x_test))

    #print result of trained model
    print(f"Training loss in epoch {i_epochs} = {train_loss.numpy()}. Test loss = {test_loss.numpy()}")

Training loss in epoch 0 = 2.0969409942626953. Test loss = 2.09299898147583
Training loss in epoch 1 = 1.92383873462677. Test loss = 1.916506290435791
Training loss in epoch 2 = 1.775111436843872. Test loss = 1.764846682548523
Training loss in epoch 3 = 1.647637128829956. Test loss = 1.634881615638733
Training loss in epoch 4 = 1.5384283065795898. Test loss = 1.5235811471939087
Training loss in epoch 5 = 1.444663405418396. Test loss = 1.428073525428772
Training loss in epoch 6 = 1.3638346195220947. Test loss = 1.345796823501587
Training loss in epoch 7 = 1.2937922477722168. Test loss = 1.2745518684387207
Training loss in epoch 8 = 1.232735276222229. Test loss = 1.2124978303909302
Training loss in epoch 9 = 1.1791810989379883. Test loss = 1.1581134796142578
Training loss in epoch 10 = 1.131913423538208. Test loss = 1.1101539134979248
Training loss in epoch 11 = 1.0899404287338257. Test loss = 1.0676029920578003
Training loss in epoch 12 = 1.0524502992630005. Test loss = 1.02962982654571

I face the challenge that if i do with the full batch, the RAM has been exceeded and system got down.
To solve this I have to make the model that has train with mini batch or some other method
-> Solved: it was error that my y_train was not in on one hot vector array. It was (idx,category) format.
So I changed it with using .toarray and It works well

# Neural Net with a hidden layer
include the hidden layer of 100 neuron

In [20]:
class Model2(tf.Module):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        #input layer
        self.w0 = tf.Variable(tf.zeros([784,100]))
        self.b0 = tf.Variable(0.0)

        #output of input layer is hidden layer. Here its size is 100

        #output layer
        self.w1 = tf.Variable(tf.zeros([100,10]))
        self.b1 = tf.Variable(0.0)

    def __call__(self, x0):
        x0 = flatten(x0)
        x1 = tf.nn.sigmoid(x0 @ self.w0 + self.b0)
        return tf.nn.softmax(x1 @ self.w1 + self.b1)

In [24]:
def train_hidden(model, x, y, learning_rate):
    with tf.GradientTape() as t:
        current_loss = loss_CrossEntrophy(y,model(x))
    
    dw0, db0, dw1, db1 = t.gradient(current_loss, [model.w0, model.b0, model.w1, model.b1])

    #input layer
    model.w0.assign_sub(learning_rate*dw0)
    model.b0.assign_sub(learning_rate*db0)

    #output layer
    model.w1.assign_sub(learning_rate*dw1)
    model.b1.assign_sub(learning_rate*db1)

In [26]:
Model_hidden = Model2()

for i_epochs in range(50):
    train_hidden(Model_hidden,x_train,y_train,learning_rate=0.2)
    train_loss = loss_CrossEntrophy(y_train,Model_hidden(x_train))
    test_loss = loss_CrossEntrophy(y_test,Model_hidden(x_test))
    print(f"Training loss in epoch {i_epochs} = {train_loss.numpy()}.   Test loss = {test_loss.numpy()}")

Training loss in epoch 0 = 2.3015265464782715.   Test loss = 2.3014349937438965
Training loss in epoch 1 = 2.3012466430664062.   Test loss = 2.301133871078491
Training loss in epoch 2 = 2.3011817932128906.   Test loss = 2.301055431365967
Training loss in epoch 3 = 2.301170825958252.   Test loss = 2.301028251647949
Training loss in epoch 4 = 2.301156997680664.   Test loss = 2.3010177612304688
Training loss in epoch 5 = 2.301154136657715.   Test loss = 2.301011562347412
Training loss in epoch 6 = 2.3011622428894043.   Test loss = 2.3010072708129883
Training loss in epoch 7 = 2.30117130279541.   Test loss = 2.301003932952881
Training loss in epoch 8 = 2.301147699356079.   Test loss = 2.30100154876709
Training loss in epoch 9 = 2.301157236099243.   Test loss = 2.300999164581299
Training loss in epoch 10 = 2.3011622428894043.   Test loss = 2.3009955883026123
Training loss in epoch 11 = 2.3011484146118164.   Test loss = 2.300992965698242
Training loss in epoch 12 = 2.3011581897735596.   Test

In [27]:
"""
Checking wieght trained
"""
print(Model_hidden.w0)

<tf.Variable 'Variable:0' shape=(784, 100) dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>


In [30]:
class Model3(tf.Module):
    """
    Model initialize all the weights with random
    """
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        #input layer
        self.w0 = tf.Variable(tf.random.uniform([784, 100]))
        self.b0 = tf.Variable(0.0)

        #output of input layer is hidden layer here its size is 100

        #output layer
        self.w1 = tf.Variable(tf.random.uniform([100, 10]))
        self.b1 = tf.Variable(0.0)

    def __call__(self, x0):
        x0 = flatten(x0)
        x1 = tf.nn.sigmoid(x0 @ self.w0 + self.b0)
        return tf.nn.softmax(x1 @ self.w1 + self.b1)

In [37]:
Model_hidden_random = Model3()

for i_epochs in range(50):
    train_hidden(Model_hidden_random,x_train,y_train,learning_rate=0.2)
    train_loss = loss_CrossEntrophy(y_train,Model_hidden_random(x_train))
    test_loss = loss_CrossEntrophy(y_test,Model_hidden_random(x_test))
    print(f"Training loss in epoch {i_epochs} = {train_loss.numpy()}.   Test loss = {test_loss.numpy()}")

Training loss in epoch 0 = 3.8706977367401123.   Test loss = 3.846771001815796
Training loss in epoch 1 = 3.3753533363342285.   Test loss = 3.3750085830688477
Training loss in epoch 2 = 2.820781946182251.   Test loss = 2.8151206970214844
Training loss in epoch 3 = 2.581589460372925.   Test loss = 2.5788307189941406
Training loss in epoch 4 = 2.402167797088623.   Test loss = 2.4005239009857178
Training loss in epoch 5 = 2.3747880458831787.   Test loss = 2.377129554748535
Training loss in epoch 6 = 2.3600807189941406.   Test loss = 2.3586246967315674
Training loss in epoch 7 = 2.352860927581787.   Test loss = 2.3544089794158936
Training loss in epoch 8 = 2.3531219959259033.   Test loss = 2.3520333766937256
Training loss in epoch 9 = 2.344426155090332.   Test loss = 2.345597743988037
Training loss in epoch 10 = 2.349003791809082.   Test loss = 2.3482115268707275
Training loss in epoch 11 = 2.339592695236206.   Test loss = 2.340461015701294
Training loss in epoch 12 = 2.346315383911133.   

Loss of model with hidden layer is bigger because model is not converged. if train with better algorithm(efficient algorithm than gradient descent) or more epoch, model performance will better