In [1]:
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt

In [2]:
iris = datasets.load_iris()
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [3]:
X = iris["data"][:,3:]
y = (iris["target"] == 2).astype(int)
print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

X shape: (150, 1)
y shape: (150,)


In [5]:
# normalize
X = (X - X.mean())/X.std()

# WITH SKLEARN
---


In [6]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression()
log_reg.fit(X,y)

LogisticRegression()

In [7]:
y_pred = log_reg.predict(X)

In [8]:
(y_pred == y).mean()

0.96

# WITH NUMPY
---

In [9]:
# initialise theta and X
X = np.concatenate((np.zeros([len(X), 1]), X), axis=1)
theta = np.random.randn(2).reshape(-1,1)

In [10]:
print(X.shape, theta.shape)

(150, 2) (2, 1)


### create the sigmoid fct 
$p_k=\sigma(X\theta)$

In [11]:
def sigmoid(t):
    return 1/(1+np.exp(-t))

In [14]:
n_iter = 1000
lr = 0.1
m = X.shape[0]
epsilon = 1e-7

for i in range(n_iter):
    linear_model = X.dot(theta)
    y_prob = sigmoid(linear_model)
    cost_fct = -np.mean(y*np.log(y_prob+epsilon) + (1-y)*np.log(1-y_prob+epsilon))
    gradient = -(2/m) * X.T.dot(y_pred-y)
    theta -= lr * gradient

ValueError: non-broadcastable output operand with shape (2,1) doesn't match the broadcast shape (2,2)

In [16]:
gradient

array([-0.       ,  0.0017785])

### predict

$ p_k(x) > 0.5 → y=1$  
$ p_k(x) < 0.5 → y=0$  

In [217]:
def predict(y_prob):
    return ( y_prob > .5).astype(int)

predict(y_prob)[:5]

array([[1],
       [1],
       [1],
       [1],
       [1]])

### cost fct 
$J(\theta)=-\dfrac{1}{m}\sum_{i=0}^{m}[ y^{(i)}\log(p_{k}^{(i)}) + (1-y_{(i)})\log(1-p_{k}^{(i)})]$

In [218]:
epsilon = 1e-7

In [219]:
def cost_function(y_prob, y):
    return -np.mean(np.sum(y*np.log(y_prob+ epsilon)+(1-y)*np.log(1 - y_prob + epsilon)))

cost_function(y_prob, y)

15658.2331720867

In [220]:
# gradient 
m = X.shape[0] 
def gradient(y_prob, y):
    residual = y_prob - y[0]

    return (2/m)*(X.T.dot(residual))

gradient(y_prob, y).shape

(2, 1)

## Gradient descent
---

In [221]:
def gradient_descent(X, y, max_iter=1000, learning_rate=0.005):
    theta = np.random.randn(X.shape[1]).reshape(-1,1)
    for step in range(max_iter):
        y_prob = create_model(X, theta)
        grad = gradient(y_prob, y)
        theta = theta - learning_rate * grad
        if step % 250 == 0 :
            print(f"Cost:{cost_function(y_prob, y)}")
            
    return theta

In [222]:
theta = gradient_descent(X, y)

Cost:16904.988641937096
Cost:15996.761766860966
Cost:15712.952008528795
Cost:15629.525321803247


In [226]:
y_prob = create_model(X, theta)
y_pred = predict(y_prob)
# theta

In [227]:
y_pred.ravel()

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [228]:
(y != y_pred).mean()

0.4666666666666667