In [100]:
import scipy.io as sio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.optimize as opt

In [86]:
data = sio.loadmat('ex3data1.mat')
raw_X, raw_y = data['X'], data['y']
raw_X.shape, raw_y.shape

((5000, 400), (5000, 1))

In [88]:
X = raw_X
y = raw_y.reshape(raw_y.shape[0])
X.shape, y.shape

((5000, 400), (5000,))

In [96]:
# X增加theta_0 bias
X = np.insert(raw_X, 0, 1, axis=1)
y[y==10] = 0
X.shape, np.unique(y)

((5000, 401), array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8))

In [115]:
# 输出y向量标签化，后面需要用标签化后的向量求 theta_10
y_label = []
for k in range(10):
    y_label.append((y==k).astype(int))
y_label_matrix = np.array(y_label)
y_label_matrix.shape

(10, 5000)

In [124]:
y_label_matrix.T[4900:4902,:]

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])

In [97]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def cost(theta, X, y):
    return np.mean(-y * np.log(sigmoid(X @ theta)) - (1 - y) * np.log(1-sigmoid(X @ theta)))

def gradient(theta, X, y):
    return (1 / len(X)) * X.T @(sigmoid(X @ theta) - y)

def regularized_cost(theta, X, y, lambda_=1):
    theta_1_to_n = theta[1:]
    regularized_term = (lambda_ / ( 2* len(X))) * np.power(theta_1_to_n, 2).sum()
    
    return cost(theta, X, y) + regularized_term

def regularized_gradient(theta, X, y, lambda_=1):
    theta_1_to_n = theta[1:]
    regularized_theta = (lambda_ / len(X)) * theta_1_to_n
    regularized_term = np.concatenate([np.array([0]), regularized_theta])
    
    return gradient(theta, X, y) + regularized_term

def logistic_regression(X, y, lambda_=1):
    theta = np.zeros(X.shape[1])
    result = opt.minimize(fun=regularized_cost,
                          x0=theta,
                          args=(X,y,lambda_),
                          method="TNC",
                          jac=regularized_gradient,
                          options={"disp": True}
    )
    best_theta = result.x
    return best_theta

def predict(x, theta):
    prob = sigmoid(x @ theta)
    return (prob>=.5).astype(int)

In [102]:
theta_10 = np.array([logistic_regression(X, y_label_matrix[k]) for k in range(10)])
theta_10.shape

(10, 401)

In [107]:
prob_matrix = sigmoid(X @ theta_10.T)
prob_matrix = (prob_matrix>=.5).astype(int)
prob_matrix.shape

(5000, 10)

In [108]:
np.unique(prob_matrix)

array([0, 1])

In [123]:
prob_matrix[4900:4902,:]

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])

In [122]:
result = (y_label_matrix.T == prob_matrix).astype(int)
result = np.sum(result, axis=1) - 10
result = (result==0).astype(int)
np.mean(result)

0.8656

### 另一种比较结果的方法，将pro_matrix逆标签转换成[0,1,2,...9]之后进行比较

In [125]:
result_2 = np.argmax(prob_matrix, axis=1)
result_2.shape

(5000,)

In [126]:
np.unique(result_2)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64)

In [127]:
np.mean((y == result_2).astype(int))

0.8816