# Logistic binary classifier - Gradient descent

### Steps of the algorithm
1. Get the features and targets from the dataset as numpy arrays
2. Select random weights and bias
3. Loop through the dataset
    - Predict each target with the chosen weights and bias
    - Find error
    - Calculate the partial derivates of the error function with respect the weights and the derivate
4. Use the average of cumulative derivates to update the weight and the bias
5. Measure error function and accuracy
6. Repeat many epochs for better accuracy

In [117]:
import pandas as pd
import numpy as np

data = pd.read_csv('data.csv', header=None, names=['x1', 'x2', 'y'])

In [118]:
data.head()

Unnamed: 0,x1,x2,y
0,0.78051,-0.063669,1
1,0.28774,0.29139,1
2,0.40714,0.17878,1
3,0.2923,0.4217,1
4,0.50922,0.35256,1


In [119]:
# Formatting the data for the classifier
features = data.iloc[:, :2].values
target = data.iloc[:, 2].values

In [120]:
# Helper functions
def sigmoid(x):
    return np.divide(1, 1 + np.exp(-x))

def predict(X, W, b):
    return sigmoid(np.dot(X, W) + b)

def errorFunction(y, pred_y):
    return np.mean(- y * np.log(pred_y) - (1 - y) * np.log(1 - pred_y))

In [156]:
np.random.seed(42)
n_epochs = 100
alpha = 0.01 #Learning rate

def train(features, target, n_epochs, alpha):
    
    n_records, n_features = features.shape
    weights = np.random.random(size=n_features)
    #weights = np.random.normal(scale=1 / n_features**.5, size=n_features)
    b = 0
    
    for e in range(1, n_epochs + 1):
        d_w = np.zeros(weights.shape)
        d_b = 0
        predictions = predict(features, weights, b)
        for X, y, y_hat in zip(features, target, predictions):
            weights = weights + alpha * (y - y_hat) * X
            b = b + alpha * (y - y_hat)
        p_labels = predictions > 0.5
        if e % 10 == 0:
            print('\n############ Epoch: {} #############\n'.format(e))
            print('The error is: {}'.format(errorFunction(target, predictions)))
            print('Accuracy: {}%'.format(np.mean(target == p_labels) * 100))


In [157]:
train(features, target, n_epochs, alpha)


############ Epoch: 10 #############

The error is: 0.672116920549725
Accuracy: 50.0%

############ Epoch: 20 #############

The error is: 0.5926443672008321
Accuracy: 75.0%

############ Epoch: 30 #############

The error is: 0.5310641090335984
Accuracy: 89.0%

############ Epoch: 40 #############

The error is: 0.48274744202499553
Accuracy: 92.0%

############ Epoch: 50 #############

The error is: 0.4442313214493449
Accuracy: 93.0%

############ Epoch: 60 #############

The error is: 0.4130121666075204
Accuracy: 93.0%

############ Epoch: 70 #############

The error is: 0.3872994534050107
Accuracy: 94.0%

############ Epoch: 80 #############

The error is: 0.36580786617007777
Accuracy: 94.0%

############ Epoch: 90 #############

The error is: 0.3476045761453937
Accuracy: 94.0%

############ Epoch: 100 #############

The error is: 0.33200289193341853
Accuracy: 94.0%
