# 逻辑回归

![逻辑回归](../images/逻辑回归/逻辑斯蒂回归.jpg)

In [1]:
import time
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [2]:
data = load_breast_cancer()
X, y = data['data'], data['target']
train_X, test_X, train_y, test_y = train_test_split(X, y)
print('train size: %s' % len(train_X))
print('test size: %s' % len(test_X))

train size: 426
test size: 143


In [3]:
class LogisticRegression:
    def __init__(self, train_X, train_y, lr=0.0001, iters=200):
        self.train_X = train_X
        self.train_y = train_y
        self.lr = lr
        self.iters = iters
        self.__train()
        
    def __train(self):
        train_X = np.hstack([self.train_X, np.ones(self.train_X.shape[0]).reshape(-1, 1)])
        w = np.zeros(train_X.shape[1])
        lr = self.lr
        iters = self.iters
        
        for i in range(iters):
            for j in range(len(train_X)):
                wx = np.dot(w, train_X[j])
                yi = self.train_y[j]
                xi = train_X[j]
                w += lr * (xi * yi - (np.exp(wx) * xi) / (1+np.exp(wx)))
        self.w = w
    
    def __predict(self, x):
        x = np.array(list(x) + [1])
        wx = np.dot(self.w, x)
        p = np.exp(wx) / (1+np.exp(wx))
        return 1 if p > 0.5 else 0
    
    def predict(self, test_X):
        preds = []
        for i in test_X:
            preds.append(self.__predict(i))
            
        return preds

In [4]:
%time
lr = LogisticRegression(train_X, train_y)
print('算法准确率: %s' % (sum(lr.predict(test_X) == test_y) / len(test_X)))

CPU times: user 2 µs, sys: 1 µs, total: 3 µs
Wall time: 6.91 µs
算法准确率: 0.8881118881118881
