In [17]:
# 国际惯例，先导入相应的包，并且重命名
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt     

import plotly as py
import plotly.graph_objs as go
import plotly.express as px
from plotly import tools

首先看看数据如何

In [18]:
data=pd.read_csv('ex2/ex2data1.txt', names=['exm1', 'exm2','result'])
data.head()

Unnamed: 0,exm1,exm2,result
0,34.62366,78.024693,0
1,30.286711,43.894998,0
2,35.847409,72.902198,0
3,60.182599,86.308552,1
4,79.032736,75.344376,1


再看看统计数据情况

In [19]:
data.describe()

Unnamed: 0,exm1,exm2,result
count,100.0,100.0,100.0
mean,65.644274,66.221998,0.6
std,19.458222,18.582783,0.492366
min,30.058822,30.603263,0.0
25%,50.919511,48.179205,0.0
50%,67.032988,67.682381,1.0
75%,80.212529,79.360605,1.0
max,99.827858,98.869436,1.0


## 1.1 可视化数据

把数据可视化

In [20]:
T = data[data['result'] == 1]
F = data[data['result'] == 0]

fig = px.scatter(x=data.exm1, y=data.exm2, symbol=data.result,color=(data.result*-0.8),height=600,width=1000)
fig.update_layout(xaxis = dict(range = [0,130]), yaxis = dict(range = [0,130]))
fig.show()


实现Sigmoid函数
$$
g(z)=\frac{1}{1+e^{-z}}
$$

In [21]:
def sigmoid(z):
    return 1 / ( 1 + np.exp(-z))
    # return 1 if (1 / ( 1 + np.exp(-z)))>=0.5 else 0   
    #np.exp输入为数组时，可返回数组对应所有值的结果，即结果也为同维度的数组，在这里用于向量计算

可以看到当输入值大于 `36` 之后，计算结果已经为 `1`


In [22]:
print(1 / ( 1 + np.exp(-36)),' ',1 / ( 1 + np.exp(-37)))
print(sigmoid(36),' ',sigmoid(37))

0.9999999999999998   1.0
0.9999999999999998   1.0


看看Sigmoid的曲线

In [23]:
t_X = np.arange(-50, 50, 0.1)
fig = px.line(x=t_X,y=sigmoid(t_X),height=600, width=1000)
fig.show()

## 1.2 梯度下降法

逻辑回归选用新的代价函数
$$
J\left( \theta  \right)=\frac{1}{m}\sum\limits_{i=1}^{m}{Cost(h_{\theta }(x),y)} \\
$$

$$
Cost(h_{\theta }(x),y) = \left( {{h}_{\theta }}\left( {{x}^{(i)}} \right)-{{y}^{(i)}} \right)
$$

$$
Cost(h_{\theta }(x),y) = 
\begin{cases}
-log(h_{\theta }(x))\quad\quad\quad, y=1 \\
-log(1-h_{\theta }(x))\quad\quad,y=0
\end{cases}
$$

优化代价函数使得其成为一个公式：
$$
Cost(h_{\theta }(x),y) = -y*log(h_{\theta }(x)) -(1-y)*log(1-h_{\theta }(x))
$$

In [24]:
def costFunction(theta, X, y):
    theta = np.matrix(theta)
    part1 = np.multiply(-y,np.log(sigmoid(X @ theta.T)))     # X @ theta 相当于矩阵乘法
    part2 = np.multiply(1-y,np.log(1-sigmoid(X @ theta.T)))

    return np.mean(part1 - part2)

处理初始数据,并检查格式

In [25]:
if 'ones' not in data.columns:
    data.insert(0,'ones',1)

X = np.matrix(data.loc[:,['ones','exm1','exm2']])
y = np.matrix(data.loc[:,['result']].values)

theta = np.zeros(X.shape[1])

X.shape,y.shape,theta.shape

((100, 3), (100, 1), (3,))

#### 迭代参数
$${{\theta }_{j}}:={{\theta }_{j}}-\alpha \frac{\partial }{\partial {{\theta }_{j}}}J\left( \theta  \right)$$
求导：
$${{\theta }_{j}}:={{\theta }_{j}}-\alpha\frac{1}{m}\sum\limits_{i=1}^{m}{{{\left(\left( {{h}_{\theta }}\left( {{x}^{(i)}} \right)-{{y}^{(i)}} \right)x_j^{(i)}\right)}}}$$

In [26]:
def GD(X, y, theta, alpha, iters):
    theta = np.matrix(theta)
    theta_temp = np.matrix(np.zeros(theta.shape))  # 存储每一轮迭代的theta参数
    history_cost = np.zeros(iters)

    for i in range(iters):
        distance = sigmoid(X @ theta.T) - y
        theta_temp = theta_temp - alpha*np.mean(np.multiply(distance,X),axis=0)

        theta = theta_temp
        history_cost[i] = costFunction(theta, X, y)

    return theta, history_cost
        

初始化参数，计算初始代价

In [27]:
iters=50000
alpha=0.001

costFunction(theta, X, y)

0.6931471805599453

In [28]:
g, history_cost = GD(X, y, theta, alpha, iters)
g

matrix([[-2.84946254,  0.03070169,  0.0227595 ]])

In [29]:

diedai= np.linspace(1, iters, iters)    #制作迭代次数数据
fig = px.scatter(x=diedai,y=history_cost,height=600,width=1000)
fig.show()

## 1.3 高级优化法 学习$\theta$参数
  
  在Octave中使用`fminunc`来优化函数、计算成本和梯度参数，而在Python中用`scipy.optimize`来替代，  
  关于`scipy.optimize`的用法也对cost函数有一定要求，要求theta作为第一参数，且theta必须为一个数组格式  
  即shape为(n,)的格式，需要注意  
  具体使用参考以下文章
  ```
  https://www.jb51.net/article/181639.htm
  ```

首先重写一个<font color='#db692c' style=''>**计算梯度**</font>的方法

In [30]:
def cul_Gradient(theta, X, y):
    theta = np.matrix(theta)
    return np.mean(np.multiply(sigmoid(X @ theta.T) - y,X),axis=0)

In [31]:
cul_Gradient(theta, X, y)

matrix([[ -0.1       , -12.00921659, -11.26284221]])

#### 调用`scipy.optimize`

In [32]:
import scipy.optimize as opt

result = opt.fmin_tnc(func=costFunction, x0=theta, fprime=cul_Gradient, args=(X, y))
result

(array([-25.16131863,   0.20623159,   0.20147149]), 36, 0)

计算优化后的参数代价

In [33]:
theta = result[0]
costFunction(theta, X, y)

0.20349770158947458

### 1.4 评论逻辑回归
  
完成$\theta$参数的学习，即将用得到的参数进行预估学生录取情况  
  
通过用我们的训练数据来计算我们所得模型的精准度  
  
逻辑回归模型的假设函数：
$$h_θ(x) = \frac{1}{1+e^{-θ^TX}}$$
  
当$h_θ$大于等于0.5时，预测 y=1

当$h_θ$小于0.5时，预测 y=0 。

In [34]:
def predict(theta, X):

    predictions = sigmoid(X @ np.matrix(theta).T)
    return np.rint(predictions)

In [35]:
# 计算预测精度
predictions = predict(theta, X)
correct = [1 if a==b else 0 for (a, b) in zip(predictions, y)]
accuracy = sum(correct) / len(X)
print('精确度为：',accuracy)

精确度为： 0.89


### 绘制决策边界
  
该条边界满足：
$$
\theta_{0}+\theta_{1}x_{1}+\theta_{2}x_{2} = 0
$$

In [36]:
x1 = np.arange(130, step=0.1)
x2 = -(theta[0] + theta[1]*x1) / theta[2]

fig = px.scatter(x=data.exm1, y=data.exm2, symbol=data.result,color=(data.result*-0.8),height=600,width=1000)
fig.add_trace(go.Scatter(x=x1, y=x2, mode='lines'))
fig.update_layout(xaxis = dict(range = [10,120]), yaxis = dict(range = [10,120]))
fig.show()

### 1.5 多元分类

在面对多元分类需求时，可用多个分类器对每一个需要区分类型训练分类，选出分类效果最好的，概率最高的一个
  
<div align=center>
<img src="https://cdn.jsdelivr.net/gh/YINGHAIDADA/imageStorege@main/image/1651395095773-2022-5-116:51:36.png" alt="吴恩达" style="zoom:60%;"/>
</div>

# 2. 正则化逻辑回归
通过加入正则项提升逻辑回归算法。简单来说，正则化是成本函数中的一个术语，它使参数形成的模型更倾向于“更简单”的模型（在这种情况下，模型将更小的系数）。这个理论助于减少过拟合，提高模型的泛化能力。

$$
\lambda
$$

In [37]:
data2 = pd.read_csv('ex2/ex2data2.txt', names=['test1', 'test2', 'result'])
data2draw = pd.read_csv('ex2/ex2data2.txt', names=['test1', 'test2', 'result'])
data2.head()

Unnamed: 0,test1,test2,result
0,0.051267,0.69956,1
1,-0.092742,0.68494,1
2,-0.21371,0.69225,1
3,-0.375,0.50219,1
4,-0.51325,0.46564,1


### 2.1 可视化

In [38]:
result2color={
    1:"ok",
    0:"notok"
}
data2draw["result"]=data2draw["result"].map(result2color)
fig = px.scatter(data2draw,x="test1", y="test2", symbol="result",color='result',color_discrete_sequence=["red","blue"],height=600,width=1000)

fig.show()

In [39]:
data2.head()

Unnamed: 0,test1,test2,result
0,0.051267,0.69956,1
1,-0.092742,0.68494,1
2,-0.21371,0.69225,1
3,-0.375,0.50219,1
4,-0.51325,0.46564,1


### 2.2 特征映射
更好地拟合数据的一种方法是从每个数据点创建更多的特性。在提供的函数feature_mapping，我们将把特征映射到x1和x2的所有多项式项直到特征的6次方。

In [40]:
def feature_mapping(x1, x2, power):
    data = {}
    for i in np.arange(power + 1):
        for j in np.arange(i + 1):
            data["x1^{} x2^{}".format(i - j, j)] = np.power(x1, i - j) * np.power(x2, j)

    return pd.DataFrame(data)

In [41]:
x1 = data2['test1'].values
x2 = data2['test2'].values

In [42]:
newData = feature_mapping(x1,x2,6)
newData.head()

Unnamed: 0,x1^0 x2^0,x1^1 x2^0,x1^0 x2^1,x1^2 x2^0,x1^1 x2^1,x1^0 x2^2,x1^3 x2^0,x1^2 x2^1,x1^1 x2^2,x1^0 x2^3,...,x1^2 x2^3,x1^1 x2^4,x1^0 x2^5,x1^6 x2^0,x1^5 x2^1,x1^4 x2^2,x1^3 x2^3,x1^2 x2^4,x1^1 x2^5,x1^0 x2^6
0,1.0,0.051267,0.69956,0.002628,0.035864,0.489384,0.000135,0.001839,0.025089,0.342354,...,0.0009,0.012278,0.167542,1.81563e-08,2.477505e-07,3e-06,4.6e-05,0.000629,0.008589,0.117206
1,1.0,-0.092742,0.68494,0.008601,-0.063523,0.469143,-0.000798,0.005891,-0.043509,0.321335,...,0.002764,-0.020412,0.150752,6.362953e-07,-4.699318e-06,3.5e-05,-0.000256,0.001893,-0.013981,0.103256
2,1.0,-0.21371,0.69225,0.045672,-0.147941,0.47921,-0.009761,0.031616,-0.102412,0.331733,...,0.015151,-0.049077,0.15897,9.526844e-05,-0.0003085938,0.001,-0.003238,0.010488,-0.033973,0.110047
3,1.0,-0.375,0.50219,0.140625,-0.188321,0.252195,-0.052734,0.07062,-0.094573,0.12665,...,0.01781,-0.023851,0.03194,0.002780914,-0.003724126,0.004987,-0.006679,0.008944,-0.011978,0.01604
4,1.0,-0.51325,0.46564,0.263426,-0.23899,0.216821,-0.135203,0.122661,-0.111283,0.10096,...,0.026596,-0.024128,0.02189,0.0182799,-0.01658422,0.015046,-0.01365,0.012384,-0.011235,0.010193


### 2.3 代价函数和梯度

$$
{{\theta }_{0}}:={{\theta }_{0}}-\alpha\frac{1}{m}\sum\limits_{i=1}^{m}{{{\left(\left( {{h}_{\theta }}\left( {{x}^{(i)}} \right)-{{y}^{(i)}} \right)x_0^{(i)}\right)}}}
$$
  
$${{\theta }_{j}}:={{\theta }_{j}(1-\frac{\lambda}{m})}-\alpha\frac{1}{m}\sum\limits_{i=1}^{m}{{{\left(\left( {{h}_{\theta }}\left( {{x}^{(i)}} \right)-{{y}^{(i)}} \right)x_j^{(i)}\right)}}}\\j>1$$ 

In [43]:
def costFunctionReg(theta, X, y, lamta):
    _theta=theta[1:]
    reg = (lamta / (2 * len(X))) *(_theta @ _theta)

    return costFunction(theta,X,y)+reg

In [46]:
X = np.matrix(newData)
y = np.matrix(data2.loc[:,['result']].values)

theta = np.zeros(X.shape[1])
X.shape, y.shape, theta.shape

((118, 28), (118, 1), (28,))

In [47]:
costFunctionReg(theta, X, y, 1)

0.6931471805599454

In [48]:
def cul_Gradient_reg(theta, X, y, lamta):
    reg = (lamta / len(X)) * theta
    reg[0] = 0  
    return cul_Gradient(theta, X, y) + reg

In [49]:
cul_Gradient_reg(theta, X, y, 1)

matrix([[8.47457627e-03, 1.87880932e-02, 7.77711864e-05, 5.03446395e-02,
         1.15013308e-02, 3.76648474e-02, 1.83559872e-02, 7.32393391e-03,
         8.19244468e-03, 2.34764889e-02, 3.93486234e-02, 2.23923907e-03,
         1.28600503e-02, 3.09593720e-03, 3.93028171e-02, 1.99707467e-02,
         4.32983232e-03, 3.38643902e-03, 5.83822078e-03, 4.47629067e-03,
         3.10079849e-02, 3.10312442e-02, 1.09740238e-03, 6.31570797e-03,
         4.08503006e-04, 7.26504316e-03, 1.37646175e-03, 3.87936363e-02]])

### 2.4 学习参数


In [57]:
result2 = opt.fmin_tnc(func=costFunctionReg, x0=theta, fprime=cul_Gradient_reg, args=(X, y, 2))
result2

costFunctionReg(result2[0],X,y,1)

0.5432132702250876

### 2.5 评估逻辑回归正则化


In [58]:
final_theta = result2[0]
predictions = predict(final_theta, X)
correct = [1 if a==b else 0 for (a, b) in zip(predictions, y)]
accuracy = sum(correct) / len(correct)
accuracy

0.8305084745762712

In [59]:
from sklearn.metrics import classification_report

print(classification_report(y, predictions))

              precision    recall  f1-score   support

           0       0.92      0.73      0.81        60
           1       0.77      0.93      0.84        58

    accuracy                           0.83       118
   macro avg       0.84      0.83      0.83       118
weighted avg       0.85      0.83      0.83       118



### 2.6 决策边界

In [100]:
x = np.linspace(-1, 1.5, 250)
xx, yy = np.meshgrid(x, x)
z = feature_mapping(xx.ravel(), yy.ravel(), 6).values
z = z @ final_theta
z = z.reshape(xx.shape)

array([[ -5.62156343,  -5.50740428,  -5.39655278, ..., -18.56406855,
        -19.11427955, -19.67994208],
       [ -5.5338253 ,  -5.41994185,  -5.30936171, ..., -18.45663888,
        -19.00620321, -19.57120941],
       [ -5.44863502,  -5.33502176,  -5.22470754, ..., -18.35229131,
        -18.90122196, -19.465585  ],
       ...,
       [-17.97995038, -17.83019533, -17.68486511, ..., -44.93906558,
        -45.67872575, -46.43588279],
       [-18.51283583, -18.36239049, -18.21638831, ..., -45.72015594,
        -46.46305018, -47.22347223],
       [-19.06092171, -18.90977969, -18.76309936, ..., -46.52030521,
        -47.26647414, -48.03020209]])

In [112]:
colorscale = [[0, 'gold'], [0.5, 'mediumturquoise'], [1, 'black']]

fig = px.scatter(data2draw,x="test1", y="test2", symbol="result",color='result',color_discrete_sequence=["red","blue"],height=600,width=1000)
fig.add_trace(go.Contour(z=z,x=x,y=x,colorscale=colorscale, contours_coloring='lines'))
fig.show()