In [1]:
#Trying to predict petal_length from sepal length and sepal_width
#We are going to use gradient descent. So the batch size is the whole training dataset

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import math

In [3]:
#Load the iris dataset
df = pd.read_csv ("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", names = ["sepal_length", "sepal_width", "petal_length", "petal_width", "species"])
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
X = df.iloc[:,:2]
t = df.iloc[:,2]

In [5]:
#Split the data into train test datasets
X_train, X_test, t_train, t_test = train_test_split(X,t,test_size=0.30, random_state=42)

In [6]:
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)
t_train = np.asarray(t_train)
t_test = np.asarray(t_test)
t_train.shape

(105,)

In [7]:
#Now let's create the neural network
#We have one hidden layer
#Two input nodes, two nodes in the hidden layer and one node in the output layer
# The activation function of the hidden layer is going to be the sigmoid function 
# In the output layer there won't be an activation function, since we have standard regression
#    x1------>0------\
#                     0-->
#    x2------>0------/
#def init_params():
    #Lets now create the weight matrix W1 for the first layer. W1 is a 2x2 matrix
    # W1 = [[w11, w21]
    #       [w12, w22]]
W1 = np.random.normal(size=(2,2))

    #Lets now create the weight matrix W2 for the second layer. W2 is a 1x2 matrix
    # W2 = [[w11, w21]]

W2 = np.random.normal(size=(1,2))
    #return W1, W2
print(W1)
print(W2)

[[-0.08186617  0.4171579 ]
 [ 1.1889116  -0.1767902 ]]
[[0.59812466 0.59367936]]


In [8]:
def sigmoid(a: np.ndarray) -> np.ndarray:
    z = 1 / (1 + np.exp(-a))
    return z

In [9]:
#This function delivers the array of outcomes when we apply the current params on the data X
def forwardPassing(X: np.ndarray, W1: np.ndarray, W2: np.ndarray) -> tuple:
    A1 = np.dot(W1, X.T)
    Z1 = sigmoid(A1)
    Z2 = np.dot(W2, Z1)
    return A1, Z1, Z2

In [10]:
#A1, Z1, Z2 = forwardPassing(X_train, W1, W2)
#print((Z2 - t_train).shape)
#print(Z1[0].T.shape)
#print(((Z2 - t_train) * Z1[0]).shape)
#X_train.shape
#print(W2.T[0])
#dw1_1 = W2.T[0] * np.dot((2 * (Z2 - t_train) * Z1[0] * (1 - Z1[0])), X_train)
#dw1_2 = W2.T[1] * np.dot((2 * (Z2 - t_train) * Z1[1] * (1 - Z1[1])), X_train)
#dW1 = np.vstack((dw1_1, dw1_2))
#print(dW1)
#print(dw1_1)
#print(dw1_2)

In [11]:
def loss(X: np.ndarray, t: np.ndarray, W1: np.ndarray, W2: np.ndarray) -> float: 
    return np.sum(np.square(forwardPassing(X, W1, W2)[2] - t))

In [12]:
print(loss(X_train, t_train, W1, W2))

1177.6913811568188


In [13]:
def backPropagation(W1: np.ndarray, W2: np.ndarray, Z1: np.ndarray, Z2: np.ndarray, X: np.ndarray, t: np.ndarray) -> tuple:
    dW2 = 2* np.dot((Z2 - t), Z1.T)
    #print(dW2)
    dw1_1 = W2.T[0] * np.dot((2 * (Z2 - t_train) * Z1[0] * (1 - Z1[0])), X)
    dw1_2 = W2.T[1] * np.dot((2 * (Z2 - t_train) * Z1[1] * (1 - Z1[1])), X)
    dW1 = np.vstack((dw1_1, dw1_2))
    #print(dW1)
    return dW1, dW2
    

In [14]:
#dW1, dW2 = backPropagation(W1, W2, Z1, Z2, X_train, t_train)

In [15]:
def updateParams(W1: np.ndarray, W2: np.ndarray, dW1: np.ndarray, dW2: np.ndarray, alpha: float) -> tuple:
    W1 = W1 - alpha * dW1 
    W2 = W2 - alpha * dW2 
    return W1, W2

In [16]:
#print(W1)
#print(dW1)
#updateParams(W1, W2, dW1, dW2, 1)

In [17]:
def gradientDescent(X: np.ndarray, t: np.ndarray, W1: np.ndarray, W2: np.ndarray, iterations: int, alpha: float) -> np.ndarray:
    for i in range(iterations):
        A1, Z1, Z2 = forwardPassing(X, W1, W2)
        dW1, dW2 = backPropagation(W1, W2, Z1, Z2, X, t)
        W1, W2 = updateParams(W1, W2, dW1, dW2, alpha)
        if i % 50 == 0:
            print("The loss is:")
            print(loss(X, t, W1, W2))
            print("")
    return W1, W2

In [18]:
W1, W2 = gradientDescent(X_train, t_train, W1, W2, 10000, 0.0001)

The loss is:
1097.0566634915153

The loss is:
314.7472494444189

The loss is:
303.80779240750036

The loss is:
302.5534251540462

The loss is:
300.5259229547915

The loss is:
295.2153209099308

The loss is:
253.37240965828877

The loss is:
207.26313821083758

The loss is:
175.35022997396274

The loss is:
148.25066480961593

The loss is:
124.9339250787313

The loss is:
106.50257509286513

The loss is:
93.44620079501274

The loss is:
84.98793401329668

The loss is:
79.77164366198448

The loss is:
76.59606725870944

The loss is:
74.6327027029278

The loss is:
73.3722709796813

The loss is:
72.51821381957286

The loss is:
71.90175170866148

The loss is:
71.42737908138221

The loss is:
71.04084470292783

The loss is:
70.71104795585713

The loss is:
70.41993870094942

The loss is:
70.15687985734418

The loss is:
69.91547664075198

The loss is:
69.69177481920381

The loss is:
69.48322738624545

The loss is:
69.28809863030446

The loss is:
69.10512047131489

The loss is:
68.93329564342064

The

In [19]:
print(loss(X_test, t_test, W1, W2))

15.289257559869506


In [20]:
print(forwardPassing(X_test, W1, W2)[2].T.shape)
print(len(t_test))

(45, 1)
45


In [21]:
from sklearn.metrics import r2_score, mean_squared_error
print(r2_score(forwardPassing(X_test, W1, W2)[2].T, t_test))
print(mean_squared_error(forwardPassing(X_test, W1, W2)[2].T, t_test))

0.8780721375081509
0.33976127910821124
