In [126]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from matplotlib.pyplot import plot

In [127]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [128]:
df.shape
df.dtypes

age           int64
sex           int64
cp            int64
trestbps      int64
chol          int64
fbs           int64
restecg       int64
thalach       int64
exang         int64
oldpeak     float64
slope         int64
ca            int64
thal          int64
target        int64
dtype: object

In [129]:


# y (output) is our target 
# y is matrix with length x.row (is equals X.shape[0]) and 1 column
Y = df['target'].values.reshape(X.shape[0], 1)
X = df.drop(["target"], axis=1)

sc = StandardScaler()
X = sc.fit_transform(X)
X.shape
X

array([[ 0.9521966 ,  0.68100522,  1.97312292, ..., -2.27457861,
        -0.71442887, -2.14887271],
       [-1.91531289,  0.68100522,  1.00257707, ..., -2.27457861,
        -0.71442887, -0.51292188],
       [-1.47415758, -1.46841752,  0.03203122, ...,  0.97635214,
        -0.71442887, -0.51292188],
       ...,
       [ 1.50364073,  0.68100522, -0.93851463, ..., -0.64911323,
         1.24459328,  1.12302895],
       [ 0.29046364,  0.68100522, -0.93851463, ..., -0.64911323,
         0.26508221,  1.12302895],
       [ 0.29046364, -1.46841752,  0.03203122, ..., -0.64911323,
         0.26508221, -0.51292188]])

In [130]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, train_size=0.5)

In [131]:
class ActivationFunction():
    
    def __init__(self, z):
        self.z = z

    def relu(self, z):
        """
        The ReLu activation function is equals 0 where values less than zero. (x=0 when x<=0 and x=x whan x>0)
        """
        return np.maximum(0, z)

    def sigmoid (self, z):
        """
        The sigmoid function takes in real numbers in any range and return output between 0 and 1.
        """
        return 1 / (1 + np.exp(-z))

    def d_sigmoid(self, z):
        """
        The derivative of sigmoid function
        """
        return (1 - self.sigmoid(z)) * self.sigmoid(z)

    def tanh (self, z):
        """
        The range of the tanh function is from (-1 to 1).
        """
        return np.tanh(z)

    def d_tanh(self, z):
        """
        The derivative of tanh function
        """
        return 1 - np.power(np.self.tanh(z), 2)

In [132]:
class LossFunction():

    def __init__(self, y, a):
        self.y = y
        self.a = a

    def cross_entropy_loss(self, y, a):
        """
        For a binary classification task (i.e. C=2), the cross-entropy loss function (-sum i[1-2] y1*log(y_predict)) becomes like return formula.
        1/len(y) and np.sum mean that return the average loss with respect to all the inputs. That is, the combined loss from all the samples
        and not the individual losses.
        """
        return -1 / len(y) * (np.sum(np.multiply(y, np.log(a)) + np.multiply((1 - y), np.log(1 - a))))


    def d_cross_entropy_loss(self, y, a):
        """
        The derivative of Cross Entropy function function
        """
        return (a - y)/(a*(1 - a))


    def mse(self, y, a):
        """
        Mean square error is measured as the average of squared difference between predictions and actual observations.
        """
        return 1/len(y) * np.sum(np.power((y - a), 2))


    def d_mse(self, y, a):
        """
        The derivative of MSE function
        """
        return a-y

In [133]:
class Gradient(LossFunction):
   
    def __init__(self, x, y):
        self.x = x
        self.y = y


    def grad_W_mse(self, x, y):
        """
        The function updating the gradients using mean squared error loss for W
        x is input data or previous activation
        Cost = (a-y)^2
        a = sigmoid(z)
        z = w*x + b
        D_cost/D_waight = D_cost/D_a  is (d_mse)   D_a/D_z  is (d_sigmoid)   D_z/D_w  is (x)
        Return The gradient for D_cost/D_waight 
        """
        perceptron = self.perceptron(x)
        y_predict = self.sigmoid(perceptron)
        return self.d_mse(y, y_predict) * self.d_sigmoid(perceptron) * x 


    def grad_B_mse(self, x, y):
        """
        The function updating the gradients using mean squared error loss for b
        x is input data or previous activation
        Cost = (a-y)^2
        a = sigmoid(z)
        z = w*x + b
        D_cost/D_waight = D_cost/D_a  is (d_mse)   D_a/D_z  is (d_sigmoid)   D_z/D_b  is (x)
        Return The gradient for D_cost/D_waight 
        """

        # I use the function grad_w_mse and divide to x because in formula D_cost/D_b  D_z/D_w=0 and D_z/D_b = 1 
        return self.grad_W_mse(x, y) / x


    def grad_W_ce(self, x, y):
        """
        The function updating the gradients using cross entropy loss for W
        x is input data or previous activation
        """
        y_predict = self.sigmoid(self.perceptron(x))

        if y == 0:
            return y_predict * x
        elif y == 1:
            return -1 * (1 - y_predict) * x


    def grad_B_ce(self, x, y):
        """
        The function updating the gradients using mean squared error loss for b
        x is input data or previous activation
        """
        y_predict = self.sigmoid(self.perceptron(x))
        
        if y == 0:
            return y_predict 
        elif y == 1:
            return -1 * (1 - y_predict)

In [134]:
class NeuralNetworks (ActivationFunction, Gradient):
    def __init__(self):
        self.w = None
        self.b = None


    def perceptron(self, x):
        """
        x is input data in the fist layer, for the second layers is neurons.
        return Z which equals w*x+b or w*a+b
        """
        return np.dot(x, self.w.T) + self.b
    

    def fit(self, X, Y, iters= 2000, lr = 0.01, loss_fn = "mse"):
        """
        X is input
        Y is Labels
        loss_fn is choose which loss function use
        The function uptade weights and biases
        """

        self.w = np.random.randn(1, X.shape[1])
        self.b = 0

        for iter in range(iters):
            dw = 0
            db = 0
            for x, y in zip(X, Y):
                if loss_fn == "mse":
                    dw += self.grad_W_mse(x, y)
                    db += self.grad_B_mse(x, y)
                elif loss_fn == "ce":
                    dw += self.grad_W_ce(x, y)
                    db += self.grad_B_ce(x, y)

            size = X.shape[1]
            self.w -= lr * dw / size
            self.b -= lr * db / size


    def predict(self, X):
        """
        The function compute the forward pass (z and a) of each input with the trained model 
        and send back a numpy array which contains the predicted value of each input data.
        """
        Y_predict = []
        
        for x in X:
            y_predict = self.sigmoid(self.perceptron(x))
            for i in range(len(y_predict)):
                if y_predict[i]>=0.5:
                    y_predict[i]=1
                else:
                    y_predict[i]=0
            Y_predict.append(y_predict)
        
        return np.array(Y_predict)


In [135]:
nn = NeuralNetworks ()
nn.fit(X_train, y_train)

y_predict_test = nn.predict(X_test)
y_test = y_test.reshape(-1)
y_predict_test = y_predict_test.reshape(-1)

accuracy = 0
for i in range(len(y_test)):
    if int(y_predict_test[i]) == y_test[i]:
    
        accuracy +=1
prob = accuracy / len(y_test)
print(prob)

0.506578947368421
