In [38]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import time

# 设置平台  GPU/ CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
num_workers=16
start_time = time.time()



cuda


In [None]:
# 定义前馈神经网络模型
class FeedforwardNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FeedforwardNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out


# 手动实现dropout

In [39]:
#drop_prob=0.5 ##定义丢弃率
def dropout(X,drop_prob):
    X=X.float()
    #检查丢弃概率是否在0到1之间
    assert 0<=drop_prob<=1
    keep_prob = 1-drop_prob
    #这种情况下吧全部元素都丢弃
    if keep_prob == 0:
        return torch.zeros_like(X)
    #生成mask矩阵（向量）
    mask=(torch.rand(X.shape)<keep_prob).float()
    mask = mask.to(X.device)
    #按照mask进行对X进行变换
    return mask*X/keep_prob

# 定义使用dropout的网络模型，两个隐藏层的丢弃率分别为0.2和0.5

In [40]:
# 定义前馈神经网络模型
class FeedforwardNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes, is_training=True,dropout_prob=0.2):
        super(FeedforwardNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.is_training = is_training
        self.fc2 = nn.Linear(hidden_size, num_classes)
        self.dropout_prob = dropout_prob


    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        if self.is_training and self.dropout_prob > 0:  # 如果是在训练中使用dropout且丢弃概率大于0
            out = dropout(out, drop_prob=self.dropout_prob)
        out = self.fc2(out)
        return out

In [41]:
# 定义超参数
input_size = 28 * 28  # 输入特征的维度
hidden_size = 128  # 隐藏层的大小
num_classes = 10  # 类别的数量
learning_rate = 0.001  # 学习率
num_epochs = 10  # 迭代次数
batch_size = 64  # 批次大小


In [42]:
# 加载数据集
train_dataset = MNIST(root='~/Datasets/MNIST', train=True, transform=ToTensor(), download=True)
test_dataset = MNIST(root='~/Datasets/MNIST', train=False, transform=ToTensor(), download=False)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True,num_workers=num_workers)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False,num_workers=num_workers)
# 初始化模型
model = FeedforwardNN(input_size, hidden_size, num_classes).to(device)




## 分别实现momentum、rmsprop、adam优化器

In [35]:
def init_momentum_states(params):
    v_w1,v_b1,v_w2,v_b2 = torch.zeros(params[0].shape),torch.zeros(params[1].shape),torch.zeros(params[2].shape),torch.zeros(params[3].shape)
    return (v_w1,v_b1,v_w2,v_b2)
def sgd_momentum(params,states,lr,momentum):
    for p,v in zip(params,states):
        with torch.no_grad():
            v[:]=momentum*v - p.grad
            p[:]+=lr*v
        p.grad.data.zero_()

In [36]:
def init_rmsprop_states(params):
    s_w1, s_b1, s_w2, s_b2 = torch.zeros(params[0].shape), torch.zeros(params[1].shape), torch.zeros(params[2].shape), torch.zeros(params[3].shape)
    return (s_w1, s_b1, s_w2, s_b2)


def rmsprop(params, states, lr,gamma):
    gamma, eps = gamma , 1e-6
    for p, s in zip(params, states):
        with torch.no_grad():
            s[:] = gamma * s + (1 - gamma) * torch.square(p.grad)
            p[:] -= lr * p.grad / torch.sqrt(s + eps)
        p.grad.data.zero_()

In [41]:
def init_adam_states(params):
    v_w1,v_b1,v_w2,v_b2=torch.zeros(params[0].shape),torch.zeros(params[1].shape),torch.zeros(params[2].shape),torch.zeros(params[3].shape)
    s_w1,s_b1,s_w2,s_b2 = torch.zeros(params[0].shape),torch.zeros(params[1].shape),torch.zeros(params[2].shape),torch.zeros(params[3].shape)
    return ((v_w1,s_w1),(v_b1,s_b1),(v_w2,s_w2),(v_b2,s_b2))
def adam(params,states,lr,t):
    beta1,beta2,eps = 0.9,0.999,1e-6
    t+=1#更新迭代次数
    
    for p,(v,s) in zip(params,states):
        with torch.no_grad():
        #补全  
            # 更新动量和二阶动量
            v.data = beta1 * v + (1 - beta1) * p.grad.data
            s.data = beta2 * s + (1 - beta2) * p.grad.data**2
            # 进行偏差修正
            v_hat = v / (1 - beta1**t)
            s_hat = s / (1 - beta2**t)
            # 更新参数
            p.data -= lr * v_hat / (torch.sqrt(s_hat) + eps)
        p.grad.data.zero_()
    t+=1

## 使用torch.nn实现adam优化器

In [43]:


# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# 训练模型
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.view(-1, input_size).to(device)
        targets=targets.to(device)
        # 前向传播
        outputs = model(data)
        loss = criterion(outputs, targets)
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (batch_idx+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
    
    # 在测试集上评估模型
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for data, targets in test_loader:
            data = data.view(-1, input_size).to(device)
            targets=targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
        
        accuracy = correct / total
        print(f'Test Accuracy: {accuracy:.4f}')


Epoch [1/10], Step [100/938], Loss: 0.4915
Epoch [1/10], Step [200/938], Loss: 0.3527
Epoch [1/10], Step [300/938], Loss: 0.4273
Epoch [1/10], Step [400/938], Loss: 0.2500
Epoch [1/10], Step [500/938], Loss: 0.1820
Epoch [1/10], Step [600/938], Loss: 0.2745
Epoch [1/10], Step [700/938], Loss: 0.2237
Epoch [1/10], Step [800/938], Loss: 0.2364
Epoch [1/10], Step [900/938], Loss: 0.1363
Test Accuracy: 0.9394
Epoch [2/10], Step [100/938], Loss: 0.1590
Epoch [2/10], Step [200/938], Loss: 0.2235
Epoch [2/10], Step [300/938], Loss: 0.1372
Epoch [2/10], Step [400/938], Loss: 0.1229
Epoch [2/10], Step [500/938], Loss: 0.1700
Epoch [2/10], Step [600/938], Loss: 0.1567
Epoch [2/10], Step [700/938], Loss: 0.0472
Epoch [2/10], Step [800/938], Loss: 0.2243
Epoch [2/10], Step [900/938], Loss: 0.0812
Test Accuracy: 0.9522
Epoch [3/10], Step [100/938], Loss: 0.0891
Epoch [3/10], Step [200/938], Loss: 0.0889
Epoch [3/10], Step [300/938], Loss: 0.1128
Epoch [3/10], Step [400/938], Loss: 0.0776
Epoch [3/1

## 使用torch.nn实现rmsprop做优化器

In [None]:


# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)

# 训练模型
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.view(-1, input_size).to(device)
        targets=targets.to(device)
        # 前向传播
        outputs = model(data)
        loss = criterion(outputs, targets)
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (batch_idx+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
    
    # 在测试集上评估模型
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for data, targets in test_loader:
            data = data.view(-1, input_size).to(device)
            targets=targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
        
        accuracy = correct / total
        print(f'Test Accuracy: {accuracy:.4f}')


## 使用torch.nn实现momentum做优化器

In [51]:
learning_rate=0.001
num_epochs=50

momentum=0.8
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum)
# 训练模型
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.view(-1, input_size).to(device)
        targets=targets.to(device)
        # 前向传播
        outputs = model(data)
        loss = criterion(outputs, targets)
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (batch_idx+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
    
    # 在测试集上评估模型
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for data, targets in test_loader:
            data = data.view(-1, input_size).to(device)
            targets=targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
        
        accuracy = correct / total
        print(f'Test Accuracy: {accuracy:.4f}')


Epoch [1/50], Step [100/938], Loss: 2.2493
Epoch [1/50], Step [200/938], Loss: 2.1096
Epoch [1/50], Step [300/938], Loss: 2.0142
Epoch [1/50], Step [400/938], Loss: 1.8844
Epoch [1/50], Step [500/938], Loss: 1.6792
Epoch [1/50], Step [600/938], Loss: 1.6195
Epoch [1/50], Step [700/938], Loss: 1.5504
Epoch [1/50], Step [800/938], Loss: 1.1844
Epoch [1/50], Step [900/938], Loss: 1.0321
Test Accuracy: 0.7985
Epoch [2/50], Step [100/938], Loss: 0.9886
Epoch [2/50], Step [200/938], Loss: 0.7797
Epoch [2/50], Step [300/938], Loss: 0.7063
Epoch [2/50], Step [400/938], Loss: 0.8301
Epoch [2/50], Step [500/938], Loss: 0.7299
Epoch [2/50], Step [600/938], Loss: 0.7665
Epoch [2/50], Step [700/938], Loss: 0.7775
Epoch [2/50], Step [800/938], Loss: 0.6572
Epoch [2/50], Step [900/938], Loss: 0.8585
Test Accuracy: 0.8606
Epoch [3/50], Step [100/938], Loss: 0.6768
Epoch [3/50], Step [200/938], Loss: 0.6606
Epoch [3/50], Step [300/938], Loss: 0.4216
Epoch [3/50], Step [400/938], Loss: 0.7141
Epoch [3/5

Epoch [20/50], Step [900/938], Loss: 0.2430
Test Accuracy: 0.9295
Epoch [21/50], Step [100/938], Loss: 0.2879
Epoch [21/50], Step [200/938], Loss: 0.4706
Epoch [21/50], Step [300/938], Loss: 0.1725
Epoch [21/50], Step [400/938], Loss: 0.2831
Epoch [21/50], Step [500/938], Loss: 0.3509
Epoch [21/50], Step [600/938], Loss: 0.2923
Epoch [21/50], Step [700/938], Loss: 0.1936
Epoch [21/50], Step [800/938], Loss: 0.3400
Epoch [21/50], Step [900/938], Loss: 0.1445
Test Accuracy: 0.9303
Epoch [22/50], Step [100/938], Loss: 0.3303
Epoch [22/50], Step [200/938], Loss: 0.2277
Epoch [22/50], Step [300/938], Loss: 0.2351
Epoch [22/50], Step [400/938], Loss: 0.1011
Epoch [22/50], Step [500/938], Loss: 0.3336
Epoch [22/50], Step [600/938], Loss: 0.2213
Epoch [22/50], Step [700/938], Loss: 0.4092
Epoch [22/50], Step [800/938], Loss: 0.4772
Epoch [22/50], Step [900/938], Loss: 0.3888
Test Accuracy: 0.9315
Epoch [23/50], Step [100/938], Loss: 0.3413
Epoch [23/50], Step [200/938], Loss: 0.0878
Epoch [23/

Epoch [40/50], Step [600/938], Loss: 0.1674
Epoch [40/50], Step [700/938], Loss: 0.1886
Epoch [40/50], Step [800/938], Loss: 0.1216
Epoch [40/50], Step [900/938], Loss: 0.1443
Test Accuracy: 0.9472
Epoch [41/50], Step [100/938], Loss: 0.0861
Epoch [41/50], Step [200/938], Loss: 0.1039
Epoch [41/50], Step [300/938], Loss: 0.1540
Epoch [41/50], Step [400/938], Loss: 0.1482
Epoch [41/50], Step [500/938], Loss: 0.1390
Epoch [41/50], Step [600/938], Loss: 0.2241
Epoch [41/50], Step [700/938], Loss: 0.1770
Epoch [41/50], Step [800/938], Loss: 0.2754
Epoch [41/50], Step [900/938], Loss: 0.1539
Test Accuracy: 0.9486
Epoch [42/50], Step [100/938], Loss: 0.1371
Epoch [42/50], Step [200/938], Loss: 0.0990
Epoch [42/50], Step [300/938], Loss: 0.1778
Epoch [42/50], Step [400/938], Loss: 0.1020
Epoch [42/50], Step [500/938], Loss: 0.2769
Epoch [42/50], Step [600/938], Loss: 0.1611
Epoch [42/50], Step [700/938], Loss: 0.1086
Epoch [42/50], Step [800/938], Loss: 0.2256
Epoch [42/50], Step [900/938], L

In [44]:
# 在测试集上评估模型
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for data, targets in test_loader:
        data = data.view(-1, input_size)
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy:.4f}')

Test Accuracy: 0.9136


In [11]:
# 加载数据集
train_dataset = MNIST(root='~/Datasets/MNIST', train=True, transform=ToTensor(), download=True)
test_dataset = MNIST(root='~/Datasets/MNIST', train=False, transform=ToTensor(), download=False)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)



## 手动实现momentum 做优化器

In [12]:


momentum=0.9
##定义损失函数
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()

params=list(model.parameters())
states = init_momentum_states(params)
for epoch in range(num_epochs):
    train_l_sum,train_acc_sum,n, c = 0.0,0.0,0,0
    for X, y in train_loader:
        # 将图像数据展平为向量
        X = X.view(-1, input_size)

        y_hat = model.forward (X)
        l = criterion(y_hat,y).sum()
        l.backward()
        sgd_momentum(model.parameters(),states,learning_rate,momentum)
        for param in model.parameters():
            param.grad.data.zero_()
            
        train_l_sum += l.item()
        train_acc_sum += (y_hat.argmax(dim=1) == y ).sum( ).item()
        n += y.shape[0]
        c += 1
        
        # 清空梯度
        for param in model.parameters():
            param.grad.data.zero_()
    # 打印每个 epoch 的训练损失和准确率
    train_loss = train_l_sum / n
    train_acc = train_acc_sum / n
    print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")


Epoch [1/10] - Train Loss: 0.0205, Train Acc: 0.7039
Epoch [2/10] - Train Loss: 0.0079, Train Acc: 0.8713
Epoch [3/10] - Train Loss: 0.0062, Train Acc: 0.8909
Epoch [4/10] - Train Loss: 0.0056, Train Acc: 0.8996
Epoch [5/10] - Train Loss: 0.0052, Train Acc: 0.9058
Epoch [6/10] - Train Loss: 0.0049, Train Acc: 0.9106
Epoch [7/10] - Train Loss: 0.0047, Train Acc: 0.9146
Epoch [8/10] - Train Loss: 0.0045, Train Acc: 0.9180
Epoch [9/10] - Train Loss: 0.0043, Train Acc: 0.9219
Epoch [10/10] - Train Loss: 0.0041, Train Acc: 0.9244


In [39]:
# 加载数据集
train_dataset = MNIST(root='~/Datasets/MNIST', train=True, transform=ToTensor(), download=True)
test_dataset = MNIST(root='~/Datasets/MNIST', train=False, transform=ToTensor(), download=False)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)



## 手动实现RMSprop 做优化器

In [23]:
###使用RMSprop优化算法
##定义损失函数
# 定义损失函数和优化器

gamma=0.9
criterion = nn.CrossEntropyLoss()

params=list(model.parameters())
states = init_rmsprop_states(params)
##初始化
for epoch in range(num_epochs):
    train_l_sum,train_acc_sum,n, c = 0.0,0.0,0,0
    for X, y in train_loader:
        # 将图像数据展平为向量
        X = X.view(-1, input_size)

        y_hat = model.forward (X)
        l = criterion(y_hat,y).sum()
        l.backward()
        rmsprop(model.parameters(),states,learning_rate,gamma)
        for param in model.parameters():
            param.grad.data.zero_()
            
        train_l_sum += l.item()
        train_acc_sum += (y_hat.argmax(dim=1) == y ).sum( ).item()
        n += y.shape[0]
        c += 1
        
        # 清空梯度
        for param in model.parameters():
            param.grad.data.zero_()
    # 打印每个 epoch 的训练损失和准确率
    train_loss = train_l_sum / n
    train_acc = train_acc_sum / n
    print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")


Epoch [1/10] - Train Loss: 0.0055, Train Acc: 0.9043
Epoch [2/10] - Train Loss: 0.0026, Train Acc: 0.9523
Epoch [3/10] - Train Loss: 0.0018, Train Acc: 0.9673
Epoch [4/10] - Train Loss: 0.0014, Train Acc: 0.9745
Epoch [5/10] - Train Loss: 0.0011, Train Acc: 0.9799
Epoch [6/10] - Train Loss: 0.0009, Train Acc: 0.9824
Epoch [7/10] - Train Loss: 0.0008, Train Acc: 0.9852
Epoch [8/10] - Train Loss: 0.0007, Train Acc: 0.9875
Epoch [9/10] - Train Loss: 0.0006, Train Acc: 0.9894
Epoch [10/10] - Train Loss: 0.0005, Train Acc: 0.9907


# 在测试集上评估模型

In [25]:
# 在测试集上评估模型
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for data, targets in test_loader:
        data = data.view(-1, input_size)
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy:.4f}')

Test Accuracy: 0.9785


## 手动实现Adam做优化器

In [42]:

###使用Adam优化算法
##定义损失函数
# 定义损失函数和优化器

t=0
criterion = nn.CrossEntropyLoss()

params=list(model.parameters())
states = init_adam_states(params)
##初始化
for epoch in range(num_epochs):
    train_l_sum,train_acc_sum,n, c = 0.0,0.0,0,0
    for X, y in train_loader:
        # 将图像数据展平为向量
        X = X.view(-1, input_size)

        y_hat = model.forward (X)
        l = criterion(y_hat,y).sum()
        l.backward()
        adam(model.parameters(),states,learning_rate,t)
        for param in model.parameters():
            param.grad.data.zero_()
            
        train_l_sum += l.item()
        train_acc_sum += (y_hat.argmax(dim=1) == y ).sum( ).item()
        n += y.shape[0]
        c += 1
        
        # 清空梯度
        for param in model.parameters():
            param.grad.data.zero_()
    # 打印每个 epoch 的训练损失和准确率
    train_loss = train_l_sum / n
    train_acc = train_acc_sum / n
    print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")


Epoch [1/10] - Train Loss: 0.0053, Train Acc: 0.9052
Epoch [2/10] - Train Loss: 0.0033, Train Acc: 0.9403
Epoch [3/10] - Train Loss: 0.0027, Train Acc: 0.9516
Epoch [4/10] - Train Loss: 0.0023, Train Acc: 0.9588
Epoch [5/10] - Train Loss: 0.0019, Train Acc: 0.9649
Epoch [6/10] - Train Loss: 0.0017, Train Acc: 0.9700
Epoch [7/10] - Train Loss: 0.0015, Train Acc: 0.9734
Epoch [8/10] - Train Loss: 0.0013, Train Acc: 0.9758
Epoch [9/10] - Train Loss: 0.0012, Train Acc: 0.9781
Epoch [10/10] - Train Loss: 0.0011, Train Acc: 0.9806


In [10]:
params

[Parameter containing:
 tensor([[-0.0122,  0.0327,  0.0321,  ...,  0.0134, -0.0317, -0.0327],
         [-0.0354,  0.0173, -0.0207,  ...,  0.0203, -0.0192, -0.0034],
         [ 0.0129, -0.0164, -0.0002,  ...,  0.0019, -0.0271, -0.0014],
         ...,
         [-0.0139,  0.0092, -0.0231,  ...,  0.0089, -0.0245,  0.0085],
         [ 0.0206, -0.0255,  0.0088,  ..., -0.0238,  0.0314, -0.0311],
         [-0.0239, -0.0125, -0.0284,  ...,  0.0146, -0.0071,  0.0175]],
        requires_grad=True),
 Parameter containing:
 tensor([ 0.0092, -0.0109, -0.0352,  0.0090,  0.0267, -0.0016,  0.0292,  0.0075,
         -0.0229, -0.0115,  0.0184, -0.0191, -0.0315, -0.0332, -0.0198,  0.0305,
         -0.0140,  0.0231,  0.0070,  0.0120, -0.0244,  0.0092,  0.0350, -0.0111,
         -0.0304, -0.0224,  0.0123,  0.0063, -0.0074,  0.0230,  0.0049,  0.0190,
          0.0242, -0.0157,  0.0172, -0.0150,  0.0133,  0.0147,  0.0052,  0.0344,
         -0.0252, -0.0259, -0.0036, -0.0356, -0.0063, -0.0309, -0.0224, -0.0153

In [2]:
# 保存模型
torch.save(model.state_dict(), 'multi_class_model2.pth')

# 在多分类任务实验中分别手动和用torch.nn实现L2正则化

In [18]:
# 自定义数据集类和模型类，请根据你的数据和模型进行相应的定义

# 定义手动实现 L2 正则化的损失函数
def manual_l2_loss(model, l2_lambda):
    l2_reg = 0.0
    for param in model.parameters():
        l2_reg += torch.sum(param**2)
    return l2_lambda * l2_reg

# 其他代码不变，只修改损失函数的计算和反向传播部分

# 定义损失函数和优化器
wd = 0.001
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 训练模型
start_time = time.time()
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.view(-1, input_size).to(device)
        targets = targets.to(device)
        # 前向传播
        outputs = model(data)
        loss = criterion(outputs, targets)
        
        # 添加 L2 正则化项
        loss += manual_l2_loss(model, wd)
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (batch_idx + 1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}')

    # 在训练集上计算准确率并记录
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for data, targets in train_loader:
            data = data.view(-1, input_size).to(device)
            targets = targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
        train_accuracy = correct / total
        train_accuracy_list.append(train_accuracy)

    # 在测试集上评估模型并计算准确率并记录
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for data, targets in test_loader:
            data = data.view(-1, input_size).to(device)
            targets = targets.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
        
        test_accuracy = correct / total
        test_accuracy_list.append(test_accuracy)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Test Accuracy: {test_accuracy:.4f}')

end_time = time.time()
training_time = end_time - start_time
print("模型的training_time为" + str(round(training_time, 2)) + "秒")

# 绘制训练集和测试集准确率图表
plt.figure(figsize=(8, 6))
epochs = range(1, num_epochs + 1)
plt.plot(epochs, train_accuracy_list, label='Train Accuracy', color='blue')
plt.plot(epochs, test_accuracy_list, label='Test Accuracy', color='red')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Test Accuracy')
plt.legend()
plt.show()


AttributeError: 'FeedforwardNN' object has no attribute 'dropout_prob'

## 以前馈神经网络为例，当使用dropout时，前馈神经网络隐藏层中的隐藏单元hi有一定概率被丢弃掉。

# 设丢弃概率为p，那么有p的概率hi会被清0，有1-p的概率hi会被除以1-p做拉伸。由此定义进行dropout操作的函数

In [4]:
def dropout(X,drop_prob):
    X=X.float()
    #检查丢弃概率是否在0到1之间
    assert 0<=drop_prob<=1
    keep_prob = 1-drop_prob
    #这种情况下吧全部元素都丢弃
    if keep_prob == 0:
        return torch.zeros_like(X)
    #生成mask矩阵（向量）
    mask=(torch.rand(X.shape)<keep_prob).float()
    #按照mask进行对X进行变换
    return mask*X/keep_prob



# 初始化一个向量X，对X进行dropout,分别设置丢弃率为0、0.5、1.实验结果如下：

In [2]:
import torch
X = torch.arange(10).view(2,5)

# 快捷键P+M可以把单元格切换成markdown的格式

In [5]:
print(dropout(X,0),'\n')
print(dropout(X,0.5),'\n')
print(dropout(X,1),'\n')

tensor([[0., 1., 2., 3., 4.],
        [5., 6., 7., 8., 9.]]) 

tensor([[ 0.,  2.,  0.,  6.,  8.],
        [ 0., 12.,  0., 16.,  0.]]) 

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]) 



In [None]:
drop_prob1,drop_prob2=0.2,0.5

def net(X,is_training=True):
    X=X.view(-1,num_inputs)
    H1 = (torch.)