# 导入numpy计算库

In [1]:
import numpy as np

# 任务1：实现线性模型

## 1.1 Softmax计算

- 输入x：一个K维的向量
- 输出o：一个K维的向量
- 计算过程
    - $o = \exp(x)$ 这是一个向量
    - $sum\_o = \text{sum}(o)$ 这是一个标量
    - $o = \frac{o}{sum\_o}$
- 使用的函数请参考文档，包括np.exp、np.sum等

In [2]:
def softmax(x):
    # TODO: 计算输入Numpy向量x的softmax函数计算结果
    x_max = np.max(x)
    x = x - x_max
    o = np.exp(x - np.max(x))
    sum_o = np.sum(o)
    o = o/sum_o
    return o

## 1.2 验证softmax模块

输出结果大致为$[0.032,  0.087,  0.237,  0.644]$

In [3]:
x = np.array([1, 2, 3, 4])
o = softmax(x)
print(o)

[0.0320586  0.08714432 0.23688282 0.64391426]


## 1.3 模型正向预测
- 输入x：一个K维的向量
- 输入weight：一个N * K维的矩阵
- 输入bias：一个N维的向量
- 输出o：一个N维的概率向量
- 计算过程：
    - 利用线性模型得到输出
    - 用你实现的softmax对输出进行处理，得到概率向量
    
可能用到np.matmul等函数。

In [4]:
def forward(x, weight, bias):
    # TODO: 给定模型参数，计算模型预测结果o
    f = np.matmul(weight,x) + bias
    o = softmax(f)
    return o

## 1.4 验证正向预测过程

输出结果大致为[9.11e-04 9.99e-01]

In [5]:
w = np.array([[1, 2], [3, 4]])
b = np.array([1, 2])
x = np.array([1, 2])
y = forward(x, w, b)
print(y)

[9.11051194e-04 9.99088949e-01]


# 任务2：计算损失函数
- 输入y：一个整数，表示真实标签
- 输入o：一个N维向量，表示模型的预测概率（已经经过softmax处理了的模型输出结果）
- 输出loss：一个实数，表示损失函数的计算结果

这里可能用到np.log等函数

In [6]:
def cross_entropy_loss(y, o):
    label_num = o.shape[0]
    y_onehot = np.zeros(label_num)
    
    # TODO: 把y转换到y_onehot上，然后计算交叉熵loss
    y_onehot[y] = 1
    loss = np.sum(-np.matmul(y_onehot,np.log(o)))
    return loss

## 验证损失函数计算
输出结果大致为0.6931

In [7]:
y = 0
o = np.array([0.5, 0.25, 0.25])
print(cross_entropy_loss(y, o))

0.6931471805599453


# 任务3：实现梯度计算过程

- 输入x：一个K维的向量
- 输入o：一个N维的向量
- 输入y：一个整数的标签
- 输入weight：一个N * K维的矩阵
- 输入bias：一个N维的向量
- 输出weight_grad：一个N * K维的矩阵，是由loss计算到的weight的梯度
- 输出bias_grad：一个N维的向量，是由loss计算到的bias的梯度

这里可能会用到np.outer函数。

In [8]:
def backward(x, o, y, weight, bias):
    label_num = o.shape[0]
    y_onehot = np.zeros(label_num)
    
    # TODO: 把y转换到y_onehot上，然后计算模型参数的梯度
    y_onehot[y] = 1   
    f = forward(x, weight, bias)
    weight_grad = - np.outer(y_onehot - f,x)
    bias_grad = - (y_onehot - f)
    return weight_grad, bias_grad

## 验证梯度计算公式

输出结果大致为(array([[ 0.00091105,  0.0018221 ],
       [-0.00091105, -0.0018221 ]]), array([ 0.00091105, -0.00091105]))

In [9]:
w = np.array([[1, 2], [3, 4]])
b = np.array([1, 2])
x = np.array([1, 2])
y = 1
o = forward(x, w, b)
print(backward(x, o, y, w, b))

(array([[ 0.00091105,  0.0018221 ],
       [-0.00091105, -0.0018221 ]]), array([ 0.00091105, -0.00091105]))


# 任务4：模型训练



## 准备步骤

加载数据，初始化模型参数，定义关键变量

In [10]:
label_num = 3
input_size = 13

# epoches = 1000
epoches = 100
learning_rate = 0.01

trainset = np.load("trainset.npy", allow_pickle=True)
testset = np.load("testset.npy", allow_pickle=True)

weight = np.random.uniform(-0.1, 0.1, (label_num, input_size))
bias = np.random.uniform(-0.1, 0.1, label_num)

## 模型训练

如果上述三个模块实现正确，loss将越来越小、逐渐收敛。

In [11]:
for epoch in range(1, epoches+1):
    
    total_loss = total_weight_grad = total_bias_grad = 0
    count = 0
    
    for x, y in trainset:
        
        # TODO: 得到模型预测结果o
        o = forward(x,weight,bias)
        
        # TODO: 计算损失函数loss
        loss = cross_entropy_loss(y, o)
        
        # TODO: 计算梯度
        weight_grad, bias_grad = backward(x, o, y, weight, bias)
        
        count += 1
        total_loss += loss
        total_weight_grad += weight_grad
        total_bias_grad += bias_grad
    
    # TODO：实现SGD公式
    weight = weight - total_weight_grad
    bias = bias - total_bias_grad
    
    avg_loss = total_loss / count
    print('epoch=%d, averaged loss=%.3f' % (epoch, avg_loss))
    
    

epoch=1, averaged loss=1.115
epoch=2, averaged loss=14.940
epoch=3, averaged loss=140.572
epoch=4, averaged loss=184.728
epoch=5, averaged loss=74.109
epoch=6, averaged loss=165.228
epoch=7, averaged loss=24.811
epoch=8, averaged loss=24.039
epoch=9, averaged loss=49.186
epoch=10, averaged loss=50.616
epoch=11, averaged loss=16.507
epoch=12, averaged loss=40.704
epoch=13, averaged loss=19.816
epoch=14, averaged loss=32.606
epoch=15, averaged loss=10.149
epoch=16, averaged loss=7.238
epoch=17, averaged loss=1.616
epoch=18, averaged loss=0.547
epoch=19, averaged loss=0.426
epoch=20, averaged loss=0.385
epoch=21, averaged loss=0.356
epoch=22, averaged loss=0.328
epoch=23, averaged loss=0.458
epoch=24, averaged loss=0.415
epoch=25, averaged loss=0.418
epoch=26, averaged loss=0.306
epoch=27, averaged loss=0.293
epoch=28, averaged loss=0.227
epoch=29, averaged loss=0.179
epoch=30, averaged loss=0.186
epoch=31, averaged loss=0.335
epoch=32, averaged loss=0.460
epoch=33, averaged loss=0.206
ep

# 任务5：验证模型准确度

## 实现准确度计算模块

In [12]:
def calc_accuracy(weight, bias, dataset):
    # 返回模型在数据集上的预测准确率
    
    correct = 0
    count = 0
    for x, y in dataset:
        
        # TODO：获取模型预测结果
        o = forward(x,weight,bias)
        # print(o)
        # print(y)
        # TODO: 取出模型置信度最大是哪个维度
        maxdim = np.argmax(o)
        # TODO：比较模型预测结果，计算correct（正确计数）和count（总量计数）
        if y == maxdim:
            correct += 1
        count += 1

    acc = correct / count
    return acc

## 计算准确度 
计算并输出模型在trainset和test上的预测准确率

In [13]:
# TODO: 统计训练集和测试集上的准确率
train_acc = calc_accuracy(weight, bias, trainset)
test_acc = calc_accuracy(weight, bias, testset)

print(train_acc, test_acc)

1.0 1.0
