# 导入numpy计算库

In [1]:
import numpy as np

# 任务1：实现线性模型

## 1.1 Softmax计算

- 输入x：一个K维的向量
- 输出o：一个K维的向量
- 计算过程
    - $o = \exp(x)$ 这是一个向量
    - $sum\_o = \text{sum}(o)$ 这是一个标量
    - $o = \frac{o}{sum\_o}$
- 使用的函数请参考文档，包括np.exp、np.sum等

In [2]:
def softmax(x):
    # TODO: 计算输入Numpy向量x的softmax函数计算结果
    o = np.exp(x)
    o = o / np.sum(o)
    return o

## 1.2 验证softmax模块

输出结果大致为$[0.032,  0.087,  0.237,  0.644]$

In [3]:
x = np.array([1, 2, 3, 4])
o = softmax(x)
print(o)

[0.0320586  0.08714432 0.23688282 0.64391426]


## 1.3 模型正向预测
- 输入x：一个K维的向量
- 输入weight：一个N * K维的矩阵
- 输入bias：一个N维的向量
- 输出o：一个N维的概率向量
- 计算过程：
    - 利用线性模型得到输出
    - 用你实现的softmax对输出进行处理，得到概率向量
    
可能用到np.matmul等函数。

In [4]:
def forward(x, weight, bias):
    # TODO: 给定模型参数，计算模型预测结果o，并返回概率向量
    o = softmax(np.matmul(weight, np.transpose(x)) + bias)
    return o

## 1.4 验证正向预测过程

输出结果大致为[9.11e-04 9.99e-01]

In [5]:
w = np.array([[1, 2], [3, 4]])
b = np.array([1, 2])
x = np.array([1, 2])
y = forward(x, w, b)
print(y)

[9.11051194e-04 9.99088949e-01]


# 任务2：计算损失函数
- 输入y：一个整数，表示真实标签
- 输入o：一个N维向量，表示模型的预测概率（已经经过softmax处理了的模型输出结果）
- 输出loss：一个实数，表示损失函数的计算结果

这里可能用到np.log等函数

In [6]:
def cross_entropy_loss(y, o):
    label_num = o.shape[0]
    y_onehot = np.zeros(label_num)
    # TODO: 把y转换到y_onehot上，然后基于模型的输出o计算交叉熵loss
    y_onehot[y] = 1
    loss = np.sum(-y_onehot*np.log(o))
    return loss

## 验证损失函数计算
输出结果大致为0.6931

In [7]:
y = 0
o = np.array([0.5, 0.25, 0.25])
print(cross_entropy_loss(y, o))

0.6931471805599453


# 任务3：实现梯度计算过程

- 输入x：一个K维的向量
- 输入o：一个N维的向量
- 输入y：一个整数的标签
- 输入weight：一个N * K维的矩阵
- 输入bias：一个N维的向量
- 输出weight_grad：一个N * K维的矩阵，是由loss计算到的weight的梯度
- 输出bias_grad：一个N维的向量，是由loss计算到的bias的梯度

这里可能会用到np.outer函数。

In [8]:
def backward(x, o, y, weight, bias):
    label_num = o.shape[0]
    y_onehot = np.zeros(label_num)
    
    # TODO: 把y变成one-hot向量，然后利用PPT中的公式计算模型参数的梯度
    y_onehot[y] = 1
    weight_grad = np.outer(o - y_onehot, x)
    bias_grad = o - y_onehot

    
    
    return weight_grad, bias_grad

## 验证梯度计算公式

输出结果大致为(array([[ 0.00091105,  0.0018221 ],
       [-0.00091105, -0.0018221 ]]), array([ 0.00091105, -0.00091105]))

In [9]:
w = np.array([[1, 2], [3, 4]])
b = np.array([1, 2])
x = np.array([1, 2])
y = 1
o = forward(x, w, b)
print(backward(x, o, y, w, b))

(array([[ 0.00091105,  0.0018221 ],
       [-0.00091105, -0.0018221 ]]), array([ 0.00091105, -0.00091105]))


# 任务4：模型训练



## 准备步骤

加载数据，初始化模型参数，定义关键变量

In [10]:
label_num = 3
input_size = 13

epoches = 1000
learning_rate = 0.002

trainset = np.load("trainset.npy", allow_pickle=True)
testset = np.load("testset.npy", allow_pickle=True)

weight = np.random.uniform(-0.1, 0.1, (label_num, input_size))
bias = np.random.uniform(-0.1, 0.1, label_num)

## 模型训练

如果上述三个模块实现正确，loss将越来越小、逐渐收敛。

In [11]:
for epoch in range(1, epoches+1):
    
    total_loss = total_weight_grad = total_bias_grad = 0
    count = 0
    
    for x, y in trainset:
        
        # 得到模型预测结果o
        o = forward(x, weight, bias)
        
        # 计算损失函数loss
        loss = cross_entropy_loss(y, o)
        
        # 计算梯度
        weight_grad, bias_grad = backward(x, o, y, weight, bias)
        
        count += 1
        total_loss += loss
        total_weight_grad += weight_grad
        total_bias_grad += bias_grad
    
    # TODO：利用total_weight_grad和weight_grad对weight和bias做更新，实现SGD公式
    weight = weight - learning_rate * total_weight_grad
    bias = bias - learning_rate * total_bias_grad
    
    avg_loss = total_loss / count
    print('epoch=%d, averaged loss=%.3f' % (epoch, avg_loss))
    
    

epoch=1, averaged loss=1.120
epoch=2, averaged loss=1.091
epoch=3, averaged loss=1.067
epoch=4, averaged loss=1.046
epoch=5, averaged loss=1.027
epoch=6, averaged loss=1.008
epoch=7, averaged loss=0.991
epoch=8, averaged loss=0.974
epoch=9, averaged loss=0.957
epoch=10, averaged loss=0.941
epoch=11, averaged loss=0.926
epoch=12, averaged loss=0.911
epoch=13, averaged loss=0.896
epoch=14, averaged loss=0.882
epoch=15, averaged loss=0.868
epoch=16, averaged loss=0.855
epoch=17, averaged loss=0.841
epoch=18, averaged loss=0.829
epoch=19, averaged loss=0.816
epoch=20, averaged loss=0.804
epoch=21, averaged loss=0.793
epoch=22, averaged loss=0.781
epoch=23, averaged loss=0.770
epoch=24, averaged loss=0.760
epoch=25, averaged loss=0.749
epoch=26, averaged loss=0.739
epoch=27, averaged loss=0.729
epoch=28, averaged loss=0.720
epoch=29, averaged loss=0.710
epoch=30, averaged loss=0.701
epoch=31, averaged loss=0.692
epoch=32, averaged loss=0.684
epoch=33, averaged loss=0.675
epoch=34, averaged 


epoch=183, averaged loss=0.268
epoch=184, averaged loss=0.267
epoch=185, averaged loss=0.266
epoch=186, averaged loss=0.265
epoch=187, averaged loss=0.265
epoch=188, averaged loss=0.264
epoch=189, averaged loss=0.263
epoch=190, averaged loss=0.262
epoch=191, averaged loss=0.261
epoch=192, averaged loss=0.260
epoch=193, averaged loss=0.260
epoch=194, averaged loss=0.259
epoch=195, averaged loss=0.258
epoch=196, averaged loss=0.257
epoch=197, averaged loss=0.256
epoch=198, averaged loss=0.256
epoch=199, averaged loss=0.255
epoch=200, averaged loss=0.254
epoch=201, averaged loss=0.253
epoch=202, averaged loss=0.253
epoch=203, averaged loss=0.252
epoch=204, averaged loss=0.251
epoch=205, averaged loss=0.250
epoch=206, averaged loss=0.250
epoch=207, averaged loss=0.249
epoch=208, averaged loss=0.248
epoch=209, averaged loss=0.248
epoch=210, averaged loss=0.247
epoch=211, averaged loss=0.246
epoch=212, averaged loss=0.245
epoch=213, averaged loss=0.245
epoch=214, averaged loss=0.244
epoch=2

# 任务5：验证模型准确度

## 实现准确度计算模块

In [12]:
def calc_accuracy(weight, bias, dataset):
    # 返回模型在数据集上的预测准确率
    
    correct = 0
    count = 0
    for x, y in dataset:
        
        # TODO1：获取模型预测结果
        o = forward(x, weight, bias)
        
        # 取出模型置信度最大是哪个维度
        result = np.argmax(o)
        
        # TODO2：如果result等于y，则correct+1；count不管判断正确与否始终+1
        if result == y:
            correct += 1
        count += 1

    acc = correct / count
    return acc

## 计算准确度 
计算并输出模型在trainset和test上的预测准确率

In [13]:
train_acc = calc_accuracy(weight, bias, trainset)
test_acc = calc_accuracy(weight, bias, testset)

print(train_acc, test_acc)

0.9866666666666667 1.0
