In [1]:
import numpy as np
import pandas as pd
np.random.seed(5)

In [2]:
iris = pd.read_csv('Iris.csv')
iris.drop('Id',axis=1,inplace=True)
iris.Species.replace({'Iris-virginica':0,'Iris-versicolor':1,'Iris-setosa':2},inplace=True)
iris = iris[iris.Species.isin([0,1])]
iris = iris.sample(frac=1)
cols = iris.columns
X,Y = iris[cols[:-1]].values,np.expand_dims(iris[cols[-1]].values,axis=-1)
X.shape,Y.shape

((100, 4), (100, 1))

In [3]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

def sigmoid_derivative(z):
    s = sigmoid(z)
    return s*(1-s)

In [4]:
def initialize_weights_and_bais(n):
    W = np.random.randn(n,1) #(n,1)
    b = 0
    return W,b

In [5]:
def cost_function(p,Y):
    "cross entropy loss"
    m = len(Y)
    loss = -( Y*np.log(p) + (1-Y)*np.log(1-p) )
    cost = np.sum(loss)/m
    return cost

#### Probabality of event is defined as

$$ prob_i = p_i = \sigma( X_i.W + b ) = \frac{ e^{ X_i.W + b} }{1 + e^{ X_i.W + b}} $$

$ 1 - p_i = \frac{1}{1 + e^{ X_i.W + b}}, and \frac{p_i}{1-p_i} = e^{ X_i.W + b} $

$$ Cost(J) = CrossEntropy = - \sum_{i=0}^m Y_i log(p_i) - \sum_{i=0}^m (1-Y_i)log(1-p_i)  $$

$$ J = -\sum_{i=0}^m log(1-p_i) - \sum_{i=0}^m Y_i log( \frac{ p_i }{1-p_i} ) $$

$$ J = -\sum_{i=0}^m log( \frac{1}{1 + e^{ X_i.W + b}}) - \sum_{i=0}^m Y_i log(e^{X_i.W + b}) $$

$$ J = \sum_{i=0}^m log( 1 + e^{ X_i.W + b} ) - \sum_{i=0}^m Y_i (X_i.W+b) $$

### Lets take out derivative

$$ \frac{\partial J}{ \partial W_j } = \sum_{i=0}^m \frac{1}{1+e^{ X_i.W + b}}e^{ X_i.W+b} X_{ij} - \sum_{i=0}^m Y_i.X_{ij} = - \sum_{i=0}^m (Y_i - p_i )X_{ij}$$

$$ \frac{\partial J}{\partial b} = \sum_{i=0}^m \frac{1}{1+e^{ X_i.W + b}}e^{ X_i.W+b} - \sum_{i=0}^m Y_i = - \sum_{i=0}^m(Y_i - p_i) $$

In [6]:
def get_probs(X,W,b):
    return sigmoid( np.dot(X,W)+b )

def optimize(W,b,X,Y,lr=0.1,lmda=0.1):
    """
    W = weights
    b = bias
    lr = learning rate
    lmda = regularization parameter
    """
    m = len(Y)
    y = get_probs(X,W,b)
    
    dW = -np.dot( X.T, (Y-y) )/m # (4,1)
    db = -np.sum(Y-y)/m
    
    W = W - lr* ( dW + (lmda/m)*W ) 
    b = b - lr*db
    return W,b

In [7]:
W,b = initialize_weights_and_bais(X.shape[1])
last_cost,tol,i = float('inf'),0.001,0
while True:
    p = get_probs(X,W,b)
    cost = cost_function(p,Y)
    acc = ((p>.5)*1 == Y).sum()/len(Y)
    print(f"Iteration: {i}, Cost: {cost:.3f}, Accuracy: {acc:.2f}")
    W,b = optimize(W,b,X,Y,lr=1e-1,lmda=0.5)
    if last_cost - cost < tol: break
    else: last_cost, i = cost, i+1

Iteration: 0, Cost: 1.095, Accuracy: 0.50
Iteration: 1, Cost: 0.881, Accuracy: 0.50
Iteration: 2, Cost: 0.814, Accuracy: 0.54
Iteration: 3, Cost: 0.792, Accuracy: 0.45
Iteration: 4, Cost: 0.775, Accuracy: 0.49
Iteration: 5, Cost: 0.765, Accuracy: 0.42
Iteration: 6, Cost: 0.756, Accuracy: 0.42
Iteration: 7, Cost: 0.749, Accuracy: 0.38
Iteration: 8, Cost: 0.743, Accuracy: 0.37
Iteration: 9, Cost: 0.738, Accuracy: 0.38
Iteration: 10, Cost: 0.733, Accuracy: 0.37
Iteration: 11, Cost: 0.728, Accuracy: 0.39
Iteration: 12, Cost: 0.724, Accuracy: 0.38
Iteration: 13, Cost: 0.719, Accuracy: 0.39
Iteration: 14, Cost: 0.715, Accuracy: 0.38
Iteration: 15, Cost: 0.711, Accuracy: 0.41
Iteration: 16, Cost: 0.707, Accuracy: 0.41
Iteration: 17, Cost: 0.703, Accuracy: 0.43
Iteration: 18, Cost: 0.699, Accuracy: 0.42
Iteration: 19, Cost: 0.695, Accuracy: 0.48
Iteration: 20, Cost: 0.691, Accuracy: 0.44
Iteration: 21, Cost: 0.687, Accuracy: 0.48
Iteration: 22, Cost: 0.684, Accuracy: 0.47
Iteration: 23, Cost: 