# Creating our own Neural Networks

## Creating a simple Neural Network with One input and One output

In [None]:
wt = 10

In [None]:
def neural_network(ip, wt):
  pred = ip*wt
  return pred

In [None]:
num_of_centuries = [2, 4, 1, 15]

In [None]:
pred = neural_network(num_of_centuries[0], wt)
pred

## Creating a Neural Network with Multiple inputs and One output

In [None]:
wts = [0.2, 0.4, 1.0]

In [None]:
def w_sum(l1, l2):
  sum = 0
  if len(l1) == len(l2):
    for i in range(len(l1)):
      sum += l1[i]*l2[i]
  else:
    print('Incorrect input sizes.')
    print(l1)
    print(l2)
  return sum

In [None]:
def neural_network(ip, wt):
  pred = w_sum(ip, wt)
  return pred

In [None]:
num_of_centuries = [2, 4, 1, 15]
num_of_matches = [10, 12, 10, 25]
num_of_overs = [50, 70, 40, 120]

In [None]:
ip = [num_of_centuries[0], num_of_matches[0], num_of_overs[0]]
pred = neural_network(ip, wts)
pred

### Using Numpy to create a Neural Network with Multiple inputs and One output

In [None]:
import numpy as np

In [None]:
def neural_network(ip, wt):
  pred = np.dot(ip, wt)
  return pred

In [None]:
wts = np.array(wts)
num_of_centuries = np.array(num_of_centuries)
num_of_matches = np.array(num_of_matches)
num_of_overs = np.array(num_of_overs)

In [None]:
ip = np.array([num_of_centuries[0], num_of_matches[0], num_of_overs[0]])
pred = neural_network(ip, wts)
pred

## Trying some vector operations

In [None]:
def elemwiseMul(a, b):
  sum = []

  # Make sure a is the bigger of the two vectors
  if len(a) > len(b):
    temp = a
    a = b
    b = a
  
  for i in range(len(a)):
    if i < len(b):
      sum.append(a[i]*b[i])
    else:
      sum.append(0)
  
  return sum

In [None]:
def elemwiseSum(a, b):
  sum = []

  # Make sure a is the bigger of the two vectors
  if len(a) > len(b):
    temp = a
    a = b
    b = a
  
  for i in range(len(a)):
    if i < len(b):
      sum.append(a[i]+b[i])
    else:
      sum.append(a[i])
  
  return sum

In [None]:
def vecSum(a):
  sum = 0
  for i in a:
    sum += i
  return sum

In [None]:
def vecAvg(a):
  sum = 0
  for i in a:
    sum += i
  return sum/len(a)

In [None]:
elemwiseMul(num_of_centuries, num_of_matches)

In [None]:
elemwiseSum(num_of_centuries, num_of_matches)

In [None]:
vecSum(num_of_centuries)

In [None]:
vecAvg(num_of_centuries)

## Creating a Neural Network with One input Multiple outputs

In [None]:
def scaleVecMul(ip, wts):
  pred = []
  for i in wts:
    pred.append(ip*i)
  return pred

In [None]:
def neural_network(ip, wts):
  pred = scaleVecMul(ip, wts)
  return pred

In [None]:
ip = num_of_centuries[0]
ip

In [None]:
wts

In [None]:
neural_network(ip, wts)

## Creating a Neural Network with Multiple inputs Multiple outputs

### Creating the neural network with for loops

In [None]:
# 3 input layers and 4 output layers
wts = [[10, 5, 0.2],
       [2, 8, 5],
       [0.5, 0.9, 2.2],
       [10, 8, 0.9]]

In [None]:
def neural_network(ip, wts):
  pred = []
  for i in wts:
    pred.append(w_sum(ip, i))
  return pred

In [None]:
ip = [num_of_centuries[0], num_of_matches[0], num_of_overs[0]]
ip

In [None]:
neural_network(ip, wts)

### Numpy function

In [None]:
np.array(ip).dot(np.array(wts).T)

## Creating a Multiple Layer Neural Network

### Creating the deep neural network with for loops

In [None]:
# 3 inputs and 4 outputs
wts_l1 = [[42, 7, 9.5],
          [7, 2, 4],
          [7.1, 3.8, 1.8],
          [10, 8, 0.9]]

# 4 inputs and 5 outputs
wts_l2 =  [[10, 5, 0.2, 42],
           [5, 0.5, 2, 2],
           [2, 8, 5, 3.8],
           [0.5, 0.9, 2.2, 0.9],
           [7, 2, 4, 8]]

In [None]:
def layer(ip, wts):
  lay = []
  for i in wts:
    lay.append(w_sum(ip, i))
  return lay

In [None]:
def neural_network(ip, wts_l1, wts_l2):
  hid = layer(ip, wts_l1)
  pred = layer(hid, wts_l2)
  return pred

In [None]:
neural_network(ip, wts_l1, wts_l2)

### Using Numpy to create the same Deep Neural Network

In [None]:
wts_l1 = np.array(wts_l1)
wts_l2 = np.array(wts_l2)
ip = np.array([num_of_centuries[0], num_of_matches[0], num_of_overs[0]])

In [None]:
def neural_network(ip, wts_l1, wts_l2):
  hid = ip.dot(wts_l1.T)
  pred = hid.dot(wts_l2.T)
  return pred

In [None]:
neural_network(ip, wts_l1, wts_l2)

# Creating Gradient Descent

## Comparison for error calculation

In [None]:
def neural_network(ip, wt):
  pred = ip*wt
  return pred

In [None]:
def compare(wt, ip, act):
  pred = neural_network(ip, wt)
  error = (pred-act)**2 # Square error
  return error

In [None]:
wt, ip = 0.2, 20 # Prediction is 4
act = 5.2

compare(wt, ip, act)

## Comparison and error reduction

In [None]:
def compare(wt, ip, act, lr):
  pred = neural_network(ip, wt)
  error = (pred-act)**2

  # Increasing the weight
  pred_up = neural_network(ip, wt+lr)
  err_up = (pred_up - act)**2

  # Decreasing the weight
  pred_dn = neural_network(ip, wt-lr)
  err_dn = (pred_dn - act)**2

  if error > err_up or error > err_dn:
    if err_up < err_dn:
      wt += lr
    else:
      wt -= lr

  fin_pred = neural_network(ip, wt)
  fin_err = (fin_pred-act)**2
  
  return wt, fin_err # Returning the final weight and the error at that weight

In [None]:
wt, ip = 0.2, 20 # Prediction is 4
act = 6
lr = 0.002

compare(wt, ip, act, lr)

## Comparison, Error reduction and Weight Manipulation (Learning)

### Hot an Cold Learning

In [None]:
def learn(wt, ip, act, lr, iters):
  wt_vals = []
  err_vals = []
  for i in range(iters):
    nwt, nerror = compare(wt, ip, act, lr)
    wt_vals.append(nwt)
    err_vals.append(nerror)
    if nwt != wt:
      wt, error = nwt, nerror
      print(f'{i+1}.\tNew weight : {round(wt, 3)}\tNew error: {error}')
    else:
      print('Convergence reached')
      break
  return wt, error, [wt_vals, err_vals]

In [None]:
wt, ip = 0.2, 20 # Prediction is 4
act = 6
lr, iters = 0.02, 10

fin_wt, fin_err, wt_err_vals = learn(wt, ip, act, lr, iters)

In [None]:
neural_network(ip, fin_wt)

#### Plotting the error change with respect to weight change

In [None]:
from matplotlib import pyplot as plt

In [None]:
plt.plot(wt_err_vals[0], wt_err_vals[1])
plt.scatter(wt_err_vals[0], wt_err_vals[1])
plt.xlabel('Weights')
plt.ylabel('Error')

Take notice of the fact that the distribution is quite uniform. The model moves quite evenly down the error curve.

*Even so, we can consider oursleves lucky for our value to have converged because this is a very inefficient way when predicting values where the minima isn't a multiple of the learning rate. We usually get a close point which isn't the best one.*

### The Better Way

In [None]:
def compare(wt, ip, act, lr):
  pred = neural_network(ip, wt)
  pure_error = pred-act
  scaled_error = pure_error*ip*lr
  wt -= scaled_error # Change the weight depending on the error

  pred = neural_network(ip, wt)
  error = (pred-act)**2
  
  return wt, error # Returning the final weight and the error at that weight

In [None]:
wt, ip = 0, 20 # Prediction is 4
act = 6
lr, iters = 0.002, 100

fin_wt, fin_err, wt_err_vals = learn(wt, ip, act, lr, iters)

In [None]:
neural_network(ip, fin_wt)

It has definitely taken longer to converge but that is only because the rate of change starts to reduce when we reach closer to the actual value. This is because the error between the prediction and actual value starts to drop causing the scaled error value to also drop.

#### Plotting the error change with respect to weight change

In [None]:
plt.figure(figsize=(20, 4))
plt.plot(wt_err_vals[0], wt_err_vals[1])
plt.scatter(wt_err_vals[0], wt_err_vals[1])
plt.xlabel('Weights')
plt.ylabel('Error')

We can see the phenomenon here. The distance between the two points seems to be very huge in the beginning and it seems to reduce as we progress. we can practically see only 6-7 points when we actually converged after about 20-25 iterations.

## Applying Gradient Descent on a set of datapoints

### Standard approach

In [None]:
 def compare(wt, ips, acts, lr):
  for i in range(len(ips)):
    pred = neural_network(ips[i], wt)
    pure_error = pred-acts[i]
    scaled_error = pure_error*ips[i]*lr
    wt -= scaled_error # Change the weight depending on the error
  return wt, getMAE(ips, acts, wt)

def getMAE(ips, acts, wt):
  error = 0
  for i in range(len(ips)):
    pred = neural_network(ips[i], wt)
    error += abs(pred-acts[i])
  return error / len(ips)

def learn(wt, ip, act, lr, iters):
  wt_vals = []
  err_vals = []
  for i in range(iters):
    nwt, nerror = compare(wt, ip, act, lr)
    wt_vals.append(nwt)
    err_vals.append(nerror)
    if nwt != wt:
      wt, error = nwt, nerror
      print(f'{i+1}.\tNew weight : {round(wt, 5)}\tNew error: {error}')
    else:
      print('Convergence reached')
      break
  return wt, error, [wt_vals, err_vals]

In [None]:
ip = [1, 3, 5, 7, 9]
act = [5, 16, 24, 38, 45] # y = 5x
iters = 100
lr = 0.01
wt = -5

In [None]:
fin_wt, fin_err, wt_err_vals = learn(wt, ip, act, lr, iters)

In [None]:
plt.figure(figsize=(25, 5))
plt.plot(wt_err_vals[0], wt_err_vals[1])
plt.scatter(wt_err_vals[0], wt_err_vals[1])
plt.xlabel('Weights')
plt.ylabel('Error')

In [None]:
getMAE(ip, act, fin_wt)

We can quite clearly see the problem of overlearning above. Our best weight is just less than 5 but we shoot up because the learning rate is not rescaled. We can try to add a dynamic learning rate which can take car eof that issue for us.

### Exploring a dynamic Learning Rate

In [None]:
def learn(wt, ip, act, lr, iters):
  wt_vals = []
  err_vals = []
  # Consider the first weights to be the best ones
  best_wt, min_error = compare(wt, ip, act, 0)
  tol = 0
  perfection = 16
  change_factor = 2

  for i in range(iters):
    nwt, nerror = compare(best_wt, ip, act, lr)
    # If we find out that the best weights are better than the current ones
    if min_error < nerror:
      if round(lr, perfection) > 0:
        # Reduce the learning rate without changing the weights
        lr /= change_factor
        nwt, nerror = best_wt, min_error
        print(f'---------------------------------------------------------\nReduced learning rate to avoid overlearning. New value: {lr}\n---------------------------------------------------------')
        tol = 0

    # If the new error is lesser than the current one
    elif min_error > nerror:
      if round(min_error, perfection) == round(nerror, perfection):
        lr *= change_factor
        print(f'---------------------------------------------------------\nIncreased learning rate to avoid underlearning. New value: {lr}\n---------------------------------------------------------')
      else:
        best_wt, min_error = nwt, nerror
        print(f'{i+1}.\tNew weight : {round(best_wt, 5)}\tNew error: {min_error}')
        tol = 0
        wt_vals.append(best_wt)
        err_vals.append(min_error)
    
    # If the new error is equal to the current one and no learning rate value helps
    else:
      if tol > iters/10:
        print('Convergence reached')
        break
      else:
        tol += 1
  return best_wt, min_error, [wt_vals, err_vals]

In [None]:
ip = [1, 3, 5, 7, 9]
act = [5, 16, 24, 38, 45] # y = 5x
iters = 100
lr = 5.12
wt = -5

fin_wt, fin_err, wt_err_vals = learn(wt, ip, act, lr, iters)

In [None]:
plt.figure(figsize=(25, 5))
plt.plot(wt_err_vals[0], wt_err_vals[1])
plt.scatter(wt_err_vals[0], wt_err_vals[1])
plt.xlabel('Weights')
plt.ylabel('Error')

In [None]:
getMAE(ip, act, fin_wt)

In [None]:
ip = [1, 3, 5, 7, 9]
act = [5, 16, 24, 38, 45] # y = 5x
iters = 100
lr = 0.01
wt = -5

fin_wt, fin_err, wt_err_vals = learn(wt, ip, act, lr, iters)

In [None]:
plt.figure(figsize=(25, 5))
plt.plot(wt_err_vals[0], wt_err_vals[1])
plt.scatter(wt_err_vals[0], wt_err_vals[1])
plt.xlabel('Weights')
plt.ylabel('Error')

In [None]:
getMAE(ip, act, fin_wt)

From the two different values of learning rate, we can see clearly that it is very important to choose the right value for learning rate to minimise error as much as possible. Even a mutliple of the best learning rate doesn't always ensure the best result. In any case, we can be sure to get a better result by making the learning rate dynamic which is better than nothing.