In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

### Part 1: Import the Housing data and do feature transformations

In [2]:
df= pd.read_csv('house_price_full.csv')
df.head()

Unnamed: 0,bedrooms,sqft_living,price
0,3,1340,313000
1,5,3650,2384000
2,3,1930,342000
3,3,2000,420000
4,4,1940,550000


In [3]:
X = df.copy()
# Remove target
Y = X.pop('price')

# perform a scaler transform of the input data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# perform log transformation of target variable (For Sandeep: Is this needed?)
Y = np.log(Y)

In [4]:
df_scaled = pd.DataFrame(X)
df_scaled

Unnamed: 0,0,1
0,-0.433198,-0.753258
1,1.675735,1.457330
2,-0.433198,-0.188649
3,-0.433198,-0.121661
4,0.621269,-0.179079
...,...,...
494,0.621269,0.873582
495,1.675735,2.299459
496,-0.433198,-0.724549
497,-0.433198,-0.179079


In [5]:
Y

0      12.653958
1      14.684290
2      12.742566
3      12.948010
4      13.217674
         ...    
494    13.380102
495    13.764217
496    12.128111
497    12.721886
498    12.254863
Name: price, Length: 499, dtype: float64

In [6]:
#Taking 1 sample: x0,x1
x1, x2 = df_scaled.iloc[0]

In [7]:
x = tf.constant([[x1,x2]], dtype=tf.float32)
x

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[-0.43319765, -0.7532575 ]], dtype=float32)>

### Part2: Forward Propagation with a single Neuron

The simplest way to describe a neural network is that we have some inputs , which get combined into an auxilliary variable . The auxilliary variable is passed through the activation function  and the result is the output.

Here is another image showing each step.
![](neuron.png)
Notice that the inputs are linearly combined according to some weights  and a bias . This transformation is also sometimes called an affine transformation. The perceptron transforms the weighted inputs according to the rule of the activation function. For a single perceptron, the output  is just the output from the perceptron. The linear transformation and activation of the neuron occurs within a single layer of the network (shown in the dotted box).

Let's see what the single-layer, single neuron network give us. We have a couple of choices to make:

We must choose some weights and some biases
We must choose an activation function
For now, we will manually specify the weights and biases.

We choose a sigmoid activation function

In [8]:
#weights
w1 = tf.Variable([0.2], dtype=tf.float32)
w2 = tf.Variable([0.15], dtype=tf.float32)
#bias
b = tf.Variable([0.1], dtype=tf.float32)

In [9]:
#Cumulative input
z = b + w1*x1 +w2*x2
h = tf.math.sigmoid(z)
print("The output from the first neuron is",h)

The output from the first neuron is tf.Tensor([0.47511354], shape=(1,), dtype=float32)


### Part3: Forward Propagation with multiple neurons

![](multiple_neurons.png)

In [10]:
## layer1 weights
# neuron1
b1 = tf.Variable([0.1])
w11 = tf.Variable([0.2])
w12 = tf.Variable([0.15])
#neuron2
b2 = tf.Variable([0.25])
w21 = tf.Variable([0.5])
w22 = tf.Variable([0.6])


In [11]:
## forward pass
# neuron 1
z1 = b1+w11*x1+w12*x2
h1 = tf.math.sigmoid(z1)
print("The output from the first neuron is",h1)

The output from the first neuron is tf.Tensor([0.47511354], shape=(1,), dtype=float32)


In [12]:
## forward pass
# neuron 2
z2 = b2+w21*x1+w22*x2
h2 = tf.math.sigmoid(z2)
print("The output from the second neuron is",h2)

The output from the second neuron is tf.Tensor([0.39686295], shape=(1,), dtype=float32)


In [13]:
## layer2 weights
b1 = tf.Variable([0.4])
w11 = tf.Variable([0.3])
w12 = tf.Variable([0.2])

In [14]:
## forward pass
# second layer
z1 = b1+w11*h1+w12*h2
h1 = z1
print("The output from the first neuron is",h1)

The output from the first neuron is tf.Tensor([0.62190664], shape=(1,), dtype=float32)


In [15]:
y_true = Y[0]
y_pred = h1.numpy()

In [16]:
#loss
L = 0.5*(y_true - y_pred)**2
print("The MSE error is",L)

The MSE error is [72.38514]


## Part 4: Forward pass matrix multiplication
![](multiple_neurons.png)

![](Matrix.gif)

This network can be described as follows:

- Input vector = $X = (x1,x2)$
- Weight Matrix (hidden layer) = $$W^1 = \begin{bmatrix}
w^1_{11}&&w^1_{12}\\
w^1_{21}&&w^1_{22}\\
\end{bmatrix}
$$
*note the subscripts are being mapped to weights in the figure

- Bias/offset Matrix (hidden layer) = $$
B^1_0 = \begin{bmatrix}
b^1_{1}\\
b^1_{2}\\
\end{bmatrix}
$$

Now the forward pass for the hidden layer can be described as

$$W^1 \times X^T + B^1_0= Z^1 = \begin{bmatrix}
z^1_{1}\\
z^1_{2}\\
\end{bmatrix}
$$

Applying the activation function $f$ over the matrix $Z$ will complete the forward pass.

$$f(W^1 \times X^T + B^1_0)= f(Z^1) = f(\begin{bmatrix}
z^1_{1}\\
z^1_{2}\\
\end{bmatrix}) =
\begin{bmatrix}
f(z^1_{1})\\
f(z^1_{2})\\
\end{bmatrix}
=
\begin{bmatrix}
h^1_1\\
h^1_2\\
\end{bmatrix}
= H^1
$$

For the output layer:

- The weight matrix is $$W^2 = \begin{bmatrix}
w^2_{11}&&w^2_{12}\\
\end{bmatrix}
$$

- The bias/offset matrix is $$B^2_0 = \begin{bmatrix}
b^2_{1}\\
\end{bmatrix}
$$

Now the forward pass can be written as:


$$ B_0^2+W^2 \times H^1$$


In [17]:
## layer 1 weights
W1 = tf.Variable([[0.2, 0.15],
                     [0.5, 0.6]], dtype=tf.float32)
## layer 1 bias
B1 = tf.Variable([[0.1],
                [0.25]], dtype=tf.float32)

In [18]:
## layer 2 weights
W2 = tf.Variable([[0.3, 0.2]], dtype=tf.float32)
#bias
B2 = tf.Variable([0.4], dtype=tf.float32)

In [19]:
## data
X = tf.constant([[x1,x2]], dtype=tf.float32)

In [20]:
## forward pass layer 1
Z1 = tf.matmul(W1, tf.transpose(X)) + B1
H1 = tf.math.sigmoid(Z1)
print(H1)

tf.Tensor(
[[0.47511354]
 [0.39686295]], shape=(2, 1), dtype=float32)


In [21]:
## forward pass layer 2
Z2 = tf.matmul(W2,H1)+B2

In [22]:
Z2

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.62190664]], dtype=float32)>

In [23]:
y_pred = Z2.numpy()
loss = 0.5*(y_true-y_pred)**2
print(loss)

[[72.38514]]


## Part5: Random Weight Initialization

![](multiple_neurons.png)

In [24]:
def random_init_params():
    w1 = tf.Variable(tf.random.uniform((2, 2)))
    b1 = tf.Variable(tf.random.uniform((1, 2)))
    w2 = tf.Variable(tf.random.uniform((2, 1)))
    b2 = tf.Variable(tf.random.uniform((1, 1)))
    return w1,b1,w2,b2

In [25]:
x = tf.constant([[x1,x2]], dtype=tf.float32)
y = Y[0]
w1,b1,w2,b2 = random_init_params()

In [26]:
print(" the initial 1st layer weights are:\n",w1.numpy())
print("--------------------------------------------------")
print(" the initial 2nd layer weights are:\n",w2.numpy())
print("--------------------------------------------------")
print(" the initial 1st layer bias are:\n",b1.numpy())
print("--------------------------------------------------")
print(" the initial 2nd layer bias are:\n",b2.numpy())

 the initial 1st layer weights are:
 [[0.52523744 0.83814704]
 [0.65423536 0.68385506]]
--------------------------------------------------
 the initial 2nd layer weights are:
 [[0.55842495]
 [0.8627372 ]]
--------------------------------------------------
 the initial 1st layer bias are:
 [[0.90821326 0.42807543]]
--------------------------------------------------
 the initial 2nd layer bias are:
 [[0.45185268]]


In [27]:
def forward_prop(x, w1, b1, w2, b2):
    z1 = tf.matmul(x,w1) + b1
    h1 = tf.math.sigmoid(z1)
    z2 = tf.matmul(h1,w2) + b2
    h2 = z2
    return h2

In [28]:
y_pred = forward_prop(x, w1, b1, w2, b2)
#loss
L = 0.5*(y - y_pred)**2
print("The MSE error is",L)

The MSE error is tf.Tensor([[66.82665]], shape=(1, 1), dtype=float32)


## Part6: Backpropagation

Find the value of x that minimises $y = x^2+4x$

Gradient descent update equation

$x_{new} := x_{old}-\eta\frac{dy}{dx}$

In [29]:
x = tf.Variable(0.0) ## add gradient tape
lr = eta = 0.1

In [30]:
with tf.GradientTape() as tape:
    y = x**2+4*x
grad = tape.gradient(y,x) ## dy/dx

In [31]:
grad.numpy() #dy/dx = 2x+4, x=0 => dy/dx = 4

4.0

In [32]:
x.assign_sub(lr*grad) ## x_new = x_old -lr*dy/dx

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=-0.4>

In [33]:
x.numpy()

-0.4

In [34]:
## full loop
x = tf.Variable(0.0) ## add gradient tape
lr = eta = 0.1
for i in range(10):
    with tf.GradientTape() as tape:
        y = x**2+4*x
    grad = tape.gradient(y,x)
    x.assign_sub(lr*grad)
    print(x.numpy())

-0.4
-0.72
-0.9760001
-1.1808001
-1.34464
-1.4757121
-1.5805696
-1.6644557
-1.7315645
-1.7852516



![](gradients.png)

In [35]:
x = tf.constant([[x1,x2]], dtype=tf.float32)
y = Y[0]

def random_init_params():
    w1 = tf.Variable(tf.random.uniform((2, 2)))
    b1 = tf.Variable(tf.random.uniform((1, 2)))
    w2 = tf.Variable(tf.random.uniform((2, 1)))
    b2 = tf.Variable(tf.random.uniform((1, 1)))
    return w1,b1,w2,b2

def forward_prop(x, w1, b1, w2, b2):
    z1 = tf.matmul(x,w1) + b1
    h1 = tf.math.sigmoid(z1)
    z2 = tf.matmul(h1,w2) + b2
    h2 = z2
    return h2

In [36]:
w1,b1,w2,b2 = random_init_params()

In [37]:
with tf.GradientTape() as tape:
    y_pred = forward_prop(x,w1,b1,w2,b2)
    loss = 0.5*(y-y_pred)**2

In [38]:
gw1, gb1, gw2, gb2 = tape.gradient(loss, [w1, b1, w2, b2])

In [39]:
gw1

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.65135807, 0.8959445 ],
       [1.1326016 , 1.557896  ]], dtype=float32)>

In [40]:
gb1

<tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[-1.5036048, -2.0682118]], dtype=float32)>

In [41]:
gw2

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[-5.101526],
       [-5.214977]], dtype=float32)>

In [42]:
gb2

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[-11.598295]], dtype=float32)>

In [43]:
lr=0.01
print(f"Value of w1 before gradient update is {w1}")
w1.assign_sub(lr*gw1)
print(f"Value of w1 after gradient update is {w1}")

Value of w1 before gradient update is <tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[0.9614403 , 0.6300657 ],
       [0.42678213, 0.06179011]], dtype=float32)>
Value of w1 after gradient update is <tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[0.9549267 , 0.6211062 ],
       [0.41545612, 0.04621115]], dtype=float32)>


In [44]:
lr=0.01
print(f"Value of b1 before gradient update is {b1}")
b1.assign_sub(lr*gb1)
print(f"Value of w1 after gradient update is {b1}")

Value of b1 before gradient update is <tf.Variable 'Variable:0' shape=(1, 2) dtype=float32, numpy=array([[0.4962052 , 0.11733341]], dtype=float32)>
Value of w1 after gradient update is <tf.Variable 'Variable:0' shape=(1, 2) dtype=float32, numpy=array([[0.51124126, 0.13801552]], dtype=float32)>


In [45]:
def train(x, y, w1, b1, w2, b2):
    y_true = y
    with tf.GradientTape() as g:
        y_pred = forward_prop(x, w1, b1, w2, b2)

        # loss
        loss = 0.5*(y_true - y_pred)** 2

    #Gradient calculation
    print("**************************************************")
    print("GRADIENTS")
    print("**************************************************")
    gw1, gb1, gw2, gb2 = g.gradient(loss, [w1, b1, w2, b2])
    print(" the gradient for 1st layer weights are:\n",gw1.numpy())
    print("--------------------------------------------------")
    print(" the gradient for 2nd layer weights are:\n",gw2.numpy())
    print("--------------------------------------------------")
    print(" the gradient for 1st layer bias are:\n",gb1.numpy())
    print("--------------------------------------------------")
    print(" the gradient for 2nd layer bias are:\n",gb2.numpy())
    print("--------------------------------------------------")

    # Gradient descent:
    lr=0.2
    w1.assign_sub(lr*gw1)
    b1.assign_sub(lr*gb1)
    w2.assign_sub(lr*gw2)
    b2.assign_sub(lr*gb2)
    print("**************************************************")
    print("NEW UPDATES")
    print("**************************************************")
    print(" the updated 1st layer weights are:\n",w1.numpy())
    print("--------------------------------------------------")
    print(" the updated 2nd layer weights are:\n",w2.numpy())
    print("--------------------------------------------------")
    print(" the updated 1st layer bias are:\n",b1.numpy())
    print("--------------------------------------------------")
    print(" the updated 2nd layer bias are:\n",b2.numpy())


    return w1, b1, w2, b2,loss

In [46]:
w1,b1,w2,b2 = random_init_params()
w1, b1, w2, b2,loss = train(x, y, w1, b1, w2, b2)

**************************************************
GRADIENTS
**************************************************
 the gradient for 1st layer weights are:
 [[0.948463  1.1721466]
 [1.649217  2.0381649]]
--------------------------------------------------
 the gradient for 2nd layer weights are:
 [[-4.957953 ]
 [-4.8140326]]
--------------------------------------------------
 the gradient for 1st layer bias are:
 [[-2.1894464 -2.7058008]]
--------------------------------------------------
 the gradient for 2nd layer bias are:
 [[-11.030389]]
--------------------------------------------------
**************************************************
NEW UPDATES
**************************************************
 the updated 1st layer weights are:
 [[ 0.23502232  0.52375096]
 [ 0.5141939  -0.39286116]]
--------------------------------------------------
 the updated 2nd layer weights are:
 [[1.7937484]
 [1.9601431]]
--------------------------------------------------
 the updated 1st layer bias are:
