[View in Colaboratory](https://colab.research.google.com/github/alexyarats/ml/blob/master/logistic_regression.ipynb)

In [0]:
from sklearn import datasets
import matplotlib.pyplot as plt
import numpy as np 
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.metrics import accuracy_score
%matplotlib inline

**Prepare data**

In [0]:
breast_cancer = datasets.load_breast_cancer()

In [0]:
x_train, x_valid = breast_cancer.data[:450], breast_cancer.data[450:]
y_train, y_valid = breast_cancer.target[:450], breast_cancer.target[450:]

**Logistic Regression**

In [0]:
class Param(object):
  def __init__(self, shape):
    self.data = np.zeros(shape)
    self.grad = np.zeros(shape)

In [0]:
class NumpyLogisticRegression(object):
  def __init__(self):
    self.W = Param(30)
    self.b = Param(1)
    
  def parameters(self):
    return [self.W, self.b]
  
  def forward(self, X):
    h = X.dot(self.W.data) + self.b.data
    return 1./(1. + np.exp(-h))
  
  def backward(self, X, y, y_hat):
    self.W.grad = -np.average(X * (y - y_hat).reshape(y.shape[0], 1), axis=0)
    self.b.grad = -np.average(y - y_hat)

In [0]:
class SGD(object):
  def __init__(self, params, lr=0.00001):
    self.params = params
    self.lr = lr
  
  def zero_grad(self):
    for p in self.params:
      p.grad.fill(0.)
    
  def step(self):
    for p in self.params:
      p.data -= self.lr * p.grad

**Loss and predict**

In [0]:
eps=1e-6

In [0]:
def loss(y, y_hat):
  return -np.average(y * np.log(y_hat.clip(eps, 1 - eps)) +
                     (1 - y) * np.log(1 - y_hat.clip(eps, 1 - eps)))

In [0]:
def predict(model, X):
  return np.array(model.forward(X)) > 0.5

**Model with batches**

In [0]:
m = NumpyLogisticRegression()
optim = SGD(m.parameters(), lr=0.001)

batch_size = 64
loss_valid = None

#idx = np.arange(len(x_train))

print("Learning rate: %.8f" % optim.lr)

for epoch in range(200):
  
  idx = np.arange(len(x_train))
  np.random.shuffle(idx)
  batch = idx[0: batch_size]
    
  optim.zero_grad()
  y_hat = m.forward(x_train[batch])
  m.backward(x_train[batch], y_train[batch], y_hat)
  optim.step()
  
  loss_v = loss(y_valid, m.forward(x_valid))
  
  if epoch % 5 == 0:
    loss_v = loss(y_valid, m.forward(x_valid))
    #print("Loss: %.5f" % loss_v)
    if loss_valid is None or loss_v > loss_valid:
      optim.lr /= 2
      print("Learning rate: %.8f" % optim.lr)
    loss_valid = loss_v
    
  if optim.lr < 1e-8:
    break
    
  y_pred = predict(m, x_valid)
  #print("Epoch: %d, accuracy = %.8f" % (epoch + 1, accuracy_score(y_valid, y_pred)))

print("Loss: %.5f" % loss_v)  
  
print("-------------------------")
forward = m.forward(x_valid)
y_pred = np.array(forward) > 0.5

print("Accuracy: %.5f" % accuracy_score(y_valid, y_pred))

Learning rate: 0.00100000
Learning rate: 0.0005000
Learning rate: 0.0002500
Learning rate: 0.0001250
Learning rate: 0.0000625
Learning rate: 0.0000313
Learning rate: 0.0000156
Learning rate: 0.0000078
Learning rate: 0.0000039
Learning rate: 0.0000020
Learning rate: 0.0000010
Learning rate: 0.0000005
Learning rate: 0.0000002
Learning rate: 0.0000001
Learning rate: 0.0000001
Learning rate: 0.0000000
Learning rate: 0.0000000
Learning rate: 0.0000000
Loss: 1.38447
-------------------------
Accuracy: 0.79832


  # This is added back by InteractiveShellApp.init_path()


**Model withoit batches**

In [0]:
m = NumpyLogisticRegression()
optim = SGD(m.parameters(), lr=0.00001)

loss_valid = None

print("Learning rate: %.8f" % optim.lr)

for epoch in range(50):
    
  optim.zero_grad()
  y_hat = m.forward(x_train)
  m.backward(x_train, y_train, y_hat)
  optim.step()
  
  loss_v = loss(y_valid, m.forward(x_valid))
  
  if epoch % 5 == 0:
    loss_v = loss(y_valid, m.forward(x_valid))
    #print("Loss: %.5f" % loss_v)
    if loss_valid is None or loss_v > loss_valid:
      optim.lr /= 2
      print("Learning rate: %.8f" % optim.lr)
    loss_valid = loss_v
    
  if optim.lr < 1e-8:
    break
    
  y_pred = predict(m, x_valid)
  #print("Epoch: %d, accuracy = %.8f" % (epoch + 1, accuracy_score(y_valid, y_pred)))

print("Loss: %.5f" % loss_v)  
  
print("-------------------------")
forward = m.forward(x_valid)
y_pred = np.array(forward) > 0.5

print("Accuracy: %.5f" % accuracy_score(y_valid, y_pred))

Learning rate: 0.00001000
Learning rate: 0.00000500
Learning rate: 0.00000250
Loss: 0.65052
-------------------------
Accuracy: 0.50420
