In [1]:
%config ZMQInteractiveShell.ast_node_interactivity = "all"
%pprint

Pretty printing has been turned OFF


## logistic

logistic是一种处理2分类的线性模型，其中，$x \in R^D, y \in {0, 1}$，我们希望找到一个非线性函数$g()$，使得$R^D --> {0, 1}$，以此来预测后验概率$P(Y=1|X)$
- 模型：$P(y=1|X) = \frac{1}{1+e^{-w^Tx}}$，$P(y=0|X) = \frac{e^{-w^Tx}}{1+e^{-w^Tx}}$
- 损失函数：$L = -\frac{1}{N} \sum_{n=1}^N y^{(n)} log(\hat y^{(n)}) + (1 - y^{(n)}) log(1 - \hat y^{(n)})$
- 梯度：
    - $\frac{\partial L}{\partial w} = \frac{1}{N} \sum_{n=1}^Nx^{n} (\hat y^{(n)} - y^{(n)})$
    - $\frac{\partial L}{\partial b} = \frac{1}{N} \sum_{n=1}^N (\hat y^{(n)} - y^{(n)})$

以下使用sklearn的鸢尾花数据集来实现logistic

### numpy版

In [127]:
import sys
sys.path.append("../d2l_func/")
import numpy as np
import pandas as pd
from utils import data_iter
from sqdm import sqdm
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [128]:
def bootstrap(x, y):
    """自助法实现"""
    data_num = len(y)
    
    # 训练集的index和测试集的index
    batch_index = np.random.choice(data_num, size=data_num, replace=True)
    out_index = np.array(list(set(range(data_num)).difference(set(batch_index))))
    
    # 训练集
    xtrain, ytrain = x[batch_index], y[batch_index]
    # 测试集
    xtest, ytest = x[out_index], y[out_index]
    
    return xtrain, ytrain, xtest, ytest

In [129]:
# 处理数据集
iris = load_iris()
iris_data = np.hstack((iris.data, np.expand_dims(iris.target, 1)))
iris_data = iris_data[iris.target < 2]
xtrain, ytrain, xtest, ytest = bootstrap(iris_data[:, :4], iris_data[:, 4])

In [130]:
class LogisticModel(object):
    def __init__(self, alpha=0.01, weight_decay=0):
        self.w = None
        self.b = 0
        self.alpha = alpha
        self.weight_decay = weight_decay
        self.count = 0
        
    def linreg(self, X):
        return X@self.w + self.b
    
    def sigmoid(self, y):
        return 1 / (1 + np.exp(-y))
    
    def entropy_loss(self, y_pred, y):
        y_pred = np.where(y==0, 1-y_pred, y_pred)
        loss = -(np.log(y_pred).sum())/len(y)
        return loss
        
    def fit(self, X, y):
        fea_num = int(X.size / len(y))
        if self.count == 0:
            self.w = np.zeros(fea_num)
        
        # reshape X and y
        X = X.reshape(len(y), fea_num)
        y = y.reshape(-1)
        
        # predict
        y_pred = self.predict_prob(X)
        
        # update grad
        dw = (X.T@(y_pred - y)).sum()/len(y) + self.weight_decay*self.w
        db = (y_pred - y).sum()/len(y) + self.weight_decay*self.b
        self.w -= self.alpha * dw
        self.b -= self.alpha * db
        self.count += 1
    
    def predict_prob(self, X):
        y_pred = self.sigmoid(self.linreg(X)).reshape(-1)
        return y_pred
    
    def predict(self, X):
        y_pred = self.predict_prob(X)
        y_pred = np.where(y_pred>0.5, 1, 0)
        return y_pred
    
    def score(self, X, y):
        y_pred = self.predict(X)
        acc = (y_pred == y).sum()/len(y)
        return acc

In [131]:
params={
    "model": LogisticModel(alpha=0.02, weight_decay=0),
    "epoch_num": 100,
    "batch_size": 1,
}

process_bar = sqdm()
def train(model, epoch_num, batch_size):
    for epoch in range(epoch_num):
        print(f"Epoch [{epoch+1}/{epoch_num}]")
        for xdata, ydata in data_iter(batch_size, xtrain, ytrain):
            model.fit(xdata, ydata)
            
            # train
            train_pred = model.predict_prob(xdata)
            train_loss = round(model.entropy_loss(train_pred, ydata.reshape(train_pred.shape)), 5)
            train_acc = model.score(xdata, ydata)
            
            # test
            test_pred = model.predict_prob(xtest)
            test_loss = round(model.entropy_loss(test_pred, ytest.reshape(test_pred.shape)), 5)
            test_acc = model.score(xtest, ytest)

            process_bar.show_process(len(ytrain), batch_size, train_loss=train_loss, 
                                    test_loss=test_loss, train_score=train_acc, test_score=test_acc)
            
        print("\n")
    return model
    
model = train(**params)

Epoch [1/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1455, train_score: 1.00, test_loss: 0.9576, test_score: 0.44

Epoch [2/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1454, train_score: 1.00, test_loss: 0.9418, test_score: 0.44

Epoch [3/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1452, train_score: 1.00, test_loss: 0.9263, test_score: 0.44

Epoch [4/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1451, train_score: 1.00, test_loss: 0.9111, test_score: 0.44

Epoch [5/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1449, train_score: 1.00, test_loss: 0.8963, test_score: 0.44

Epoch [6/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1448, train_score: 1.00,

100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1337, train_score: 1.00, test_loss: 0.4966, test_score: 0.64

Epoch [49/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1333, train_score: 1.00, test_loss: 0.4914, test_score: 0.64

Epoch [50/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1329, train_score: 1.00, test_loss: 0.4863, test_score: 0.64

Epoch [51/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1325, train_score: 1.00, test_loss: 0.4813, test_score: 0.67

Epoch [52/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1321, train_score: 1.00, test_loss: 0.4765, test_score: 0.67

Epoch [53/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1317, train_score: 1.00, test_los

100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1164, train_score: 1.00, test_loss: 0.3381, test_score: 0.92

Epoch [96/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1161, train_score: 1.00, test_loss: 0.3360, test_score: 0.92

Epoch [97/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1158, train_score: 1.00, test_loss: 0.3339, test_score: 0.92

Epoch [98/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1155, train_score: 1.00, test_loss: 0.3318, test_score: 0.92

Epoch [99/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1152, train_score: 1.00, test_loss: 0.3298, test_score: 0.92

Epoch [100/100]
100/100 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.1149, train_score: 1.00, test_lo

In [132]:
# 测试集结果
model.predict(iris_data[:, :4])
model.score(iris_data[:, :4], iris_data[:, 4])

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

0.93

## softmax

softmax可以看成是一个多项的logistic，实际上softmax是一种条件最大熵模型
- 对于某个样本属于第c类的概率为：
    - $P(y=c|x) = \frac{exp(w^T_cx)}{\sum_{c'=1}^{C}exp(w^T_{c'}x)}$
    - 决策函数为
        - $\hat y = \underset{c}{arg min} \  P(y=c|x) = \underset{c}{arg min}\  w^T_cx$
- 损失函数：$L = - \frac{1}{N} \sum_{n=1}^N \sum_{c=1}^{C} y_c^{(n)}log(\hat y_c^{(n)})= - \sum_{n=1}^N  (y^{(n)})^Tlog(\hat y^{(n)})$
- 梯度：
    - $\frac{\partial L}{\partial w} = \frac{1}{N} \sum_{n=1}^N x^{(n)}(\hat y^{(n)} - y^{(n)})^T$
    - $\frac{\partial L}{\partial b} = \frac{1}{N} \sum_{n=1}^N (\hat y^{(n)} - y^{(n)})^T$

还是先以鸢尾花的例子来实现

In [164]:
# 处理数据集
iris = load_iris()
iris_data = np.hstack((iris.data, np.expand_dims(iris.target, 1)))
xtrain, ytrain, xtest, ytest = bootstrap(iris_data[:, :4], iris_data[:, 4])

# 处理标签--> (0 --> [1, 0, 0])
label_dict = {
    0: [1, 0, 0],
    1: [0, 1, 0],
    2: [0, 0, 1]
}

data = np.array(list(map(lambda x: label_dict[x], iris_data[:, 4])))
ytrain = np.array(list(map(lambda x: label_dict[x], list(ytrain))))
ytest = np.array(list(map(lambda x: label_dict[x], list(ytest))))

In [166]:
class SoftmaxModel(object):
    """实现softmax"""
    def __init__(self, fea_num, cate_num, alpha=0.01, weight_decay=0):
        self.w = np.zeros([fea_num, cate_num])
        self.b = np.zeros(cate_num)
        self.fea_num = fea_num
        self.cate_num = cate_num
        self.alpha = alpha
        self.weight_decay = weight_decay
        self.count = 0
        
    def linreg(self, X):
        return X@self.w + self.b
    
    def softmax(self, y):
        return np.exp(y)/np.expand_dims(np.exp(y).sum(axis = 1), 1)
    
    def entropy_loss(self, y_pred, y):
        loss = -(y*np.log(y_pred)).sum()/len(y)
        return loss
    
    def cal_grad(self, X, y_diff):
        result = np.zeros([self.fea_num, self.cate_num])
        for i in range(len(X)):
            result += np.outer(X.T[:, i], y_diff[i, :])
        return result / len(X)
        
        
    def fit(self, X, y):
        # predict
        y_pred = self.predict_prob(X)
        
        # update_grad
        dw = self.cal_grad(X, (y_pred-y)) + self.weight_decay * self.w
        db = (y_pred-y).sum(axis=0)/len(y) + self.weight_decay * self.b
        self.w -= self.alpha * dw
        self.b -= self.alpha * db
        self.count += 1
        
            
    def predict_prob(self, X):
        y_pred = self.softmax(self.linreg(X))
        return y_pred
    
    def predict(self, X):
        y_pred = self.predict_prob(X)
        pred_index = np.argmax(y_pred, axis=1)
        return pred_index
    
    def score(self, X, y):
        y_pred = self.predict_prob(X)
        pred_index = np.argmax(y_pred, axis=1)
        label_index = np.argmax(y, axis = 1)
        acc = (pred_index == label_index).sum()/len(y)
        return acc

In [193]:
params={
    "model": SoftmaxModel(fea_num=4, cate_num=3, alpha=0.01, weight_decay=0),
    "epoch_num": 100,
    "batch_size": 1,
}

process_bar = sqdm()
def train(model, epoch_num, batch_size):
    for epoch in range(epoch_num):
        print(f"Epoch [{epoch+1}/{epoch_num}]")
        for xdata, ydata in data_iter(batch_size, xtrain, ytrain):
            model.fit(xdata, ydata)
            
            # train
            train_pred = model.predict_prob(xdata)
            train_loss = model.entropy_loss(train_pred, ydata.reshape(train_pred.shape))
            train_acc = model.score(xdata, ydata)
            
            # test
            test_pred = model.predict_prob(xtest)
            test_loss = model.entropy_loss(test_pred, ytest.reshape(test_pred.shape))
            test_acc = model.score(xtest, ytest)

            process_bar.show_process(len(ytrain), batch_size, train_loss=train_loss, 
                                    test_loss=test_loss, train_score=train_acc, test_score=test_acc)
            
        print("\n")
    return model
    
model = train(**params)

Epoch [1/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 1.3278, train_score: 0.00, test_loss: 0.7245, test_score: 0.69

Epoch [2/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 1.2187, train_score: 0.00, test_loss: 0.5962, test_score: 0.69

Epoch [3/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 1.1511, train_score: 0.00, test_loss: 0.5335, test_score: 0.69

Epoch [4/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 1.1013, train_score: 0.00, test_loss: 0.4920, test_score: 0.69

Epoch [5/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 1.0615, train_score: 0.00, test_loss: 0.4608, test_score: 0.71

Epoch [6/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 1.0283, train_score: 0.00,

150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.7161, train_score: 0.00, test_loss: 0.2254, test_score: 0.92

Epoch [49/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.7146, train_score: 0.00, test_loss: 0.2245, test_score: 0.92

Epoch [50/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.7132, train_score: 0.00, test_loss: 0.2237, test_score: 0.92

Epoch [51/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.7118, train_score: 0.00, test_loss: 0.2229, test_score: 0.90

Epoch [52/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.7106, train_score: 0.00, test_loss: 0.2221, test_score: 0.90

Epoch [53/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.7093, train_score: 0.00, test_los

150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.6851, train_score: 1.00, test_loss: 0.2079, test_score: 0.92

Epoch [96/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.6849, train_score: 1.00, test_loss: 0.2077, test_score: 0.92

Epoch [97/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.6847, train_score: 1.00, test_loss: 0.2076, test_score: 0.92

Epoch [98/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.6845, train_score: 1.00, test_loss: 0.2075, test_score: 0.92

Epoch [99/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.6843, train_score: 1.00, test_loss: 0.2074, test_score: 0.92

Epoch [100/100]
150/150 [>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] - train_loss: 0.6841, train_score: 1.00, test_lo

In [194]:
# 测试集结果
model.predict(iris_data[:, :4])
model.score(iris_data[:, :4], data)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 1, 2, 1,
       2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

0.9466666666666667