In [1]:
import numpy as np
class MyLogisticRegression:
    def __init__(self,learning_rate=0.001,max_iter=10000):
        self._theta = None
        self.intercept_ = None
        self.coef_ = None
        self.learning_rate = learning_rate
        self.max_iter = max_iter
    
    def _sigmoid(self,z):
        return 1. / (1. + np.exp(-z))
    
    def fit(self,x_train,y_train):
        def J(theta, X_b, y_train):
            y_hat = self._sigmoid(X_b.dot(theta))
            return - np.sum(y_train*np.log(y_hat) + (1-y_train)*np.log(1-y_hat)) / len(y_train)
        
        def dJ(theta, X_b, y_train):
            y_hat = self._sigmoid(X_b.dot(theta))
            return X_b.T.dot(y_hat - y_train) / len(y_train)
        
        
        X_b = np.hstack([np.ones((len(x_train), 1)), x_train])
        self._theta = np.random.randn(X_b.shape[1]) #这里我用了随机初始化，初始化为正态分布
        iter_num = 0
        while iter_num < self.max_iter:
            iter_num += 1
            last_theta = self._theta
            self._theta = self._theta - self.learning_rate * dJ(self._theta,X_b,y_train)
            if (abs(J(self._theta,X_b,y_train) - J(last_theta,X_b,y_train)) < 1e-7):
                break
        
        self.intercept_ = self._theta[0]
        self.coef_ = self._theta[1:]
        return self
    
    def predict(self,x_predict):
        X_b = np.hstack([np.ones((len(x_predict), 1)), x_predict])
        y_predict = self._sigmoid(X_b.dot(self._theta))
        y_predict = np.array(y_predict >= 0.5 , dtype = 'int')
        return y_predict
    
    def score(self,x_test,y_test):
        y_predict = self.predict(x_test)
        sum_acc = np.sum(y_predict==y_test)
        return sum_acc / len(y_test)
        
    def __repr__(self):
        return "LogisticRegression()"

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [3]:
cancer = load_breast_cancer()
x = cancer.data
y = cancer.target

In [4]:
x_train , x_test , y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=666666)

In [5]:
lr = MyLogisticRegression()
lr.fit(x_train,y_train)
y_predict = lr.predict(x_test)
y_test

  # This is added back by InteractiveShellApp.init_path()
  app.launch_new_instance()
  app.launch_new_instance()


array([1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1,
       1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0,
       1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1,
       0, 1, 1, 0])

In [6]:
lr.score(x_test,y_test)

  # This is added back by InteractiveShellApp.init_path()


0.9035087719298246

### 对比了一下效果，还是没人家好

In [7]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(x_train,y_train)
clf.score(x_test,y_test)



0.956140350877193