In [1]:
import numpy as np
import pandas as pd
np.random.seed(5)

In [2]:
iris = pd.read_csv('Iris.csv')
iris.drop('Id',axis=1,inplace=True)
iris.Species.replace({'Iris-virginica':0,'Iris-versicolor':1,'Iris-setosa':2},inplace=True)
iris = iris[iris.Species.isin([0,1])]
iris = iris.sample(frac=1)
cols = iris.columns
X,Y = iris[cols[:-1]].values,np.expand_dims(iris[cols[-1]].values,axis=-1)
X.shape,Y.shape

((100, 4), (100, 1))

In [3]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

def sigmoid_derivative(z):
    s = sigmoid(z)
    return s*(1-s)

In [4]:
def initialize_weights_and_bais(n):
    W = np.random.randn(n,1) #(n,1)
    b = 0
    return W,b

In [5]:
def cost_function(p,Y):
    "cross entropy loss"
    m = len(Y)
    loss = -( Y*np.log(p) + (1-Y)*np.log(1-p) )
    cost = np.sum(loss)/m
    return cost

1. $ probs(p) = \sigma( X.W + b ) $
2. $ cost(J) = -{(y\log(p) + (1 - y)\log(1 - p))} $
3. $ \large \frac{dJ}{dw} = \frac{1}{m} \small (X^{T} . (p-Y)) $
4. $ \large \frac{dJ}{db} = \frac{1}{m} \small \sum (p-Y) $

In [20]:
def get_probs(X,W,b):
    return sigmoid( np.dot(X,W)+b )

def optimize(W,b,X,Y,lr=0.1,lmda=0.1):
    """
    W = weights
    b = bias
    lr = learning rate
    lmda = regularization parameter
    """
    m = len(Y)
    y = get_probs(X,W,b)
    
    dW = np.dot( X.T, (y-Y) )/m # (4,1)
    db = np.sum(y-Y)/m
    
    W = W - lr* ( dW + (lmda/m)*W ) 
    b = b - lr*db
    return W,b

In [29]:
W,b = initialize_weights_and_bais(X.shape[1])
for i in range(1000):
    p = get_probs(X,W,b)
    cost = cost_function(p,Y)
    acc = ((p>.5)*1 == Y).sum()/len(Y)
    print(f"Iteration: {i}, Cost: {cost:.3f}, Accuracy: {acc:.2f}")
    W,b = optimize(W,b,X,Y,lr=1e-3,lmda=0.5)

Iteration: 0, Cost: 1.715, Accuracy: 0.50
Iteration: 1, Cost: 1.696, Accuracy: 0.50
Iteration: 2, Cost: 1.677, Accuracy: 0.50
Iteration: 3, Cost: 1.659, Accuracy: 0.50
Iteration: 4, Cost: 1.640, Accuracy: 0.50
Iteration: 5, Cost: 1.621, Accuracy: 0.50
Iteration: 6, Cost: 1.603, Accuracy: 0.50
Iteration: 7, Cost: 1.585, Accuracy: 0.50
Iteration: 8, Cost: 1.566, Accuracy: 0.50
Iteration: 9, Cost: 1.548, Accuracy: 0.50
Iteration: 10, Cost: 1.530, Accuracy: 0.50
Iteration: 11, Cost: 1.513, Accuracy: 0.50
Iteration: 12, Cost: 1.495, Accuracy: 0.50
Iteration: 13, Cost: 1.478, Accuracy: 0.50
Iteration: 14, Cost: 1.460, Accuracy: 0.50
Iteration: 15, Cost: 1.443, Accuracy: 0.50
Iteration: 16, Cost: 1.426, Accuracy: 0.50
Iteration: 17, Cost: 1.409, Accuracy: 0.50
Iteration: 18, Cost: 1.392, Accuracy: 0.50
Iteration: 19, Cost: 1.376, Accuracy: 0.50
Iteration: 20, Cost: 1.360, Accuracy: 0.50
Iteration: 21, Cost: 1.343, Accuracy: 0.50
Iteration: 22, Cost: 1.327, Accuracy: 0.50
Iteration: 23, Cost: 

Iteration: 417, Cost: 0.614, Accuracy: 0.80
Iteration: 418, Cost: 0.614, Accuracy: 0.80
Iteration: 419, Cost: 0.614, Accuracy: 0.80
Iteration: 420, Cost: 0.614, Accuracy: 0.80
Iteration: 421, Cost: 0.614, Accuracy: 0.80
Iteration: 422, Cost: 0.613, Accuracy: 0.80
Iteration: 423, Cost: 0.613, Accuracy: 0.80
Iteration: 424, Cost: 0.613, Accuracy: 0.80
Iteration: 425, Cost: 0.613, Accuracy: 0.80
Iteration: 426, Cost: 0.613, Accuracy: 0.80
Iteration: 427, Cost: 0.613, Accuracy: 0.80
Iteration: 428, Cost: 0.613, Accuracy: 0.80
Iteration: 429, Cost: 0.613, Accuracy: 0.80
Iteration: 430, Cost: 0.613, Accuracy: 0.80
Iteration: 431, Cost: 0.613, Accuracy: 0.80
Iteration: 432, Cost: 0.613, Accuracy: 0.80
Iteration: 433, Cost: 0.613, Accuracy: 0.80
Iteration: 434, Cost: 0.613, Accuracy: 0.80
Iteration: 435, Cost: 0.613, Accuracy: 0.80
Iteration: 436, Cost: 0.613, Accuracy: 0.80
Iteration: 437, Cost: 0.613, Accuracy: 0.80
Iteration: 438, Cost: 0.613, Accuracy: 0.80
Iteration: 439, Cost: 0.613, Acc

In [9]:
last_cost,cost

(1.0953797493276713, 1.833086900942072)

In [28]:
W,b = initialize_weights_and_bais(X.shape[1])
last_cost,tol,i = float('inf'),0.001,0
while True:
    p = get_probs(X,W,b)
    cost = cost_function(p,Y)
    acc = ((p>.5)*1 == Y).sum()/len(Y)
    print(f"Iteration: {i}, Cost: {cost:.3f}, Accuracy: {acc:.2f}")
    W,b = optimize(W,b,X,Y,lr=1e-3,lmda=0.5)
    if last_cost - cost < tol: print(last_cost - cost);break
    else: last_cost, i = cost, i+1

Iteration: 0, Cost: 2.412, Accuracy: 0.50
Iteration: 1, Cost: 2.396, Accuracy: 0.50
Iteration: 2, Cost: 2.381, Accuracy: 0.50
Iteration: 3, Cost: 2.365, Accuracy: 0.50
Iteration: 4, Cost: 2.350, Accuracy: 0.50
Iteration: 5, Cost: 2.334, Accuracy: 0.50
Iteration: 6, Cost: 2.319, Accuracy: 0.50
Iteration: 7, Cost: 2.303, Accuracy: 0.50
Iteration: 8, Cost: 2.288, Accuracy: 0.50
Iteration: 9, Cost: 2.272, Accuracy: 0.50
Iteration: 10, Cost: 2.257, Accuracy: 0.50
Iteration: 11, Cost: 2.241, Accuracy: 0.50
Iteration: 12, Cost: 2.226, Accuracy: 0.50
Iteration: 13, Cost: 2.210, Accuracy: 0.50
Iteration: 14, Cost: 2.195, Accuracy: 0.50
Iteration: 15, Cost: 2.180, Accuracy: 0.50
Iteration: 16, Cost: 2.164, Accuracy: 0.50
Iteration: 17, Cost: 2.149, Accuracy: 0.50
Iteration: 18, Cost: 2.134, Accuracy: 0.50
Iteration: 19, Cost: 2.118, Accuracy: 0.50
Iteration: 20, Cost: 2.103, Accuracy: 0.50
Iteration: 21, Cost: 2.088, Accuracy: 0.50
Iteration: 22, Cost: 2.072, Accuracy: 0.50
Iteration: 23, Cost: 