In [1]:
import sklearn as sk
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Reading CSV File of f(x) = x + sin2(x)

In [2]:
data = pd.read_csv("Sin_Values.csv", nrows=5000)
data.head(5000)

Unnamed: 0,Value_of_X,Sin_Values
0,1,1.90930
1,2,1.24320
2,3,2.72058
3,4,4.98936
4,5,4.45598
...,...,...
4994,4995,4994.74000
4995,4996,4996.99000
4996,4997,4996.44000
4997,4998,4997.48000


In [3]:
value_of_x = data[['Value_of_X']].to_numpy()
sin_values = data['Sin_Values'].to_numpy()

# Train-Test split for Dataset (25% for testing, 75% for training)

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(value_of_x, sin_values, test_size = 0.25)

print('Training Set:', Y_train.size)
print('Test Set:', Y_test.size)

Training Set: 3749
Test Set: 1250


# Activation Functions

In [5]:
# Sigmoid Activator

def sigmoid_act(x, der=False):
    
    if (der==True) : 
        f = 1/(1+ np.exp(- x))*(1-1/(1+ np.exp(- x)))
    else : # sigmoid
        f = 1/(1+ np.exp(- x))
    
    return f

# Rectifier Linear Unit (ReLU)

def ReLU_act(x, der=False):
    
    if (der == True): 
        f = np.heaviside(x, 1)
    else :
        f = np.maximum(x, 0)
    
    return f

# Training the model through Forward Biase and Back Propagation

In [6]:
p=4       # Neurons at first Hidder layer 
q=8       # Neurons at second Hidden layer

eta =  0.001     # Learning rate

w1 = 2*np.random.rand(p , X_train.shape[1]) - 0.5 # Layer 1
b1 = np.random.rand(p)

w2 = 2*np.random.rand(q , p) - 0.5  # Layer 2
b2 = np.random.rand(q)

wOut = 2*np.random.rand(q) - 0.5  # Output Layer
bOut = np.random.rand(1)

save_error = []
output = []

for I in range(0, X_train.shape[0]):
    
    # Train each value 
    x = X_train[I] 
    
    # Forward Prpagation
    z1 = ReLU_act(np.dot(w1, x) + b1)    # output layer 1 
    z2 = ReLU_act(np.dot(w2, z1) + b2)   # output layer 2
    y = sigmoid_act(np.dot(wOut, z2) + bOut) # Output of the Output layer
    
    # Compute the output layer's error
    delta_Out =  (y-Y_train[I]) * sigmoid_act(y, der=True)
    
    # Backpropagate
    delta_2 = delta_Out * wOut * ReLU_act(z2, der=True)     # Second Layer Error
    delta_1 = np.dot(delta_2, w2) * ReLU_act(z1, der=True)  # First Layer Error
    
    
    # Gradient descent 
    wOut = wOut - eta*delta_Out*z2  # Output Layer
    bOut = bOut - eta*delta_Out
    
    w1 = w1 - eta*np.kron(delta_1, x).reshape(p, x.shape[0]) # Hidden Layer 1
    b1 = b1 - eta*delta_1
    
    w2 = w2 - eta*np.kron(delta_2, z1).reshape(q,p) # Hidden Layer 2
    b2 = b2 - eta*delta_2
    
    # Computation of the loss function
    save_error.append((1/2)*(y-Y_train[I])**2)
    output.append(y[0])
    
print('Error:', save_error)
print('Output:',output)

  return multiply(a.ravel()[:, newaxis], b.ravel()[newaxis, :], out)


Error: [array([1240076.26125]), array([629206.40205]), array([4701864.45125]), array([2761579.0098]), array([5726167.92845]), array([3702927.33845]), array([6247158.08645]), array([5928949.43045]), array([10222433.28]), array([5651656.4808]), array([1346145.1362]), array([10751951.03645]), array([1106729.78645]), array([8999191.00125]), array([4399912.80125]), array([55749.947528]), array([8078350.9058]), array([2054303.69045]), array([7227487.94045]), array([3525627.6882]), array([7524329.0738]), array([7291043.91125]), array([4626729.90125]), array([9210503.24045]), array([10242021.18245]), array([15337.1849405]), array([7949998.3752]), array([6978287.61125]), array([2270388.71405]), array([3160072.86005]), array([10108671.58845]), array([7910889.5378]), array([1274948.9928]), array([10900294.09605]), array([5074501.53125]), array([132919.617608]), array([608867.16005]), array([9412592.72]), array([4603882.71245]), array([360071.163272]), array([3783807.93245]), array([477548.8493405

# Batch Loss of Size 16

In [7]:
batch_loss = []

for i in range(0, 16):
    loss_avg = 0
    for m in range(0, 16):
        loss_avg += output[16*i+m]/16
        batch_loss.append(loss_avg)
        
print('Batch loss: ', batch_loss)

Batch loss:  [0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375, 0.5, 0.5625, 0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375, 0.5, 0.5625, 0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375, 0.5, 0.5625, 0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375, 0.5, 0.5625, 0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375, 0.5, 0.5625, 0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375, 0.5, 0.5625, 0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375, 0.5, 0.5625, 0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375, 0.5, 0.5625, 0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375, 0.5, 0.5625, 0.625, 0.

# Which Activavtion to use ?

ReLu activation function is used to restrict error possibilities and solve gradiant decent problems. So ReLU is used at the output of Hidden layer neurons.

While Sigmoid function is used at the output layer because it restricts our answer output from 0 to 1.

# Reference

I took help from the recorded lecture of AI Lab on 27th May