In [None]:
!nvidia-smi
!nvcc --version

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
# import torchvision.transforms as transforms
import torchsummary
from tqdm import tqdm

In [None]:
# 1) GPU？
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

# <1> 数据预处理
transform = torchvision.transforms.Compose([
        torchvision.transforms.Resize((224, 224)),
        torchvision.transforms.ToTensor()  ])

# <2> 加载MNIST数据集
dataset = torchvision.datasets.ImageFolder(root="/kaggle/input/imgnet-10/ImageNet-10", transform=transform)
train_size = int(0.8 * len(dataset))  # 训练集占80%，1,300张*0.8=1040张
test_size = len(dataset) - train_size # 测试集占20%，1,300张-1040张=260张
torch.manual_seed(42)  # 为了保证每次划分的数据集一致
trainset, testset = torch.utils.data.random_split(dataset, [train_size, test_size])
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=4)
testloader = torch.utils.data.DataLoader(testset, batch_size=1000, shuffle=False, num_workers=4)

In [None]:
# <3> 定义AlexNet模型
alexnet = nn.Sequential(
    nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=1),  nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Conv2d(96, 256, kernel_size=5, padding=2),           nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Conv2d(256, 384, kernel_size=3, padding=1),          nn.ReLU(),
    nn.Conv2d(384, 384, kernel_size=3, padding=1),          nn.ReLU(),
    nn.Conv2d(384, 256, kernel_size=3, padding=1),          nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Flatten(),
    nn.Linear(6400, 4096),                                  nn.ReLU(),
    nn.Dropout(p=0.5),  # 防止过拟合，丢弃概率为50%
    nn.Linear(4096, 4096),                                  nn.ReLU(),
    nn.Dropout(p=0.5),
    nn.Linear(4096, 10),
).to(device)  # 2) 将模型加载到GPU上

# torchsummary
torchsummary.summary(alexnet, (3, 224, 224))

In [None]:
# <4> 损失函数
criterion = nn.CrossEntropyLoss()

# <5> 优化器
optimizer = optim.Adam(alexnet.parameters(), lr=0.0001)

In [None]:
# <6> 自定义计算准确率函数
def test(model, testloader):
    # <6-1> 进入评估模式
    model.eval()

    # <6-2> 计算输出
    correct = total = 0
    with torch.no_grad():   # 关闭梯度计算
        # 从测试集中一批一批地取数据, 每批64个, 一共取260/64=4次+1次
        for images, labels in testloader:

            # 4) 将测试数据加载到GPU上
            images, labels = images.to(device), labels.to(device)

            # 计算预测值
            outputs = model(images)

            # <6-3> 计算准确率
            # 按行(=1)取最大值，返回最大值、最大值的索引(预测结果)
            _, predicted = torch.max(outputs.data, 1)

            # 预测值与真实值比较后, 再求和
            correct += (predicted == labels).sum().item()
            # 测试集总数
            total += labels.size(0)

    # 准确率
    accuracy = correct / total

    # 返回：准确率
    return accuracy

In [None]:
# <7> 训练模型
# 初始化训练过程中的指标：训练精度、测试精度、损失，用于绘图
train_acc_history, test_acc_history, loss_history = [], [], []

for epoch in tqdm(range(10)):  # 训练10个epoch
    alexnet.train()
    epoch_loss = 0.0        # 每epoch的损失
    running_loss = 0.0      # 每64批次batch的损失

    # 从0开始计数，每次取一个batch，一共取1040/64=16.25次
    for i, data in enumerate(trainloader, 0):
        X_train, y_train = data

        # 3) 将训练数据加载到GPU上
        X_train, y_train = X_train.to(device), y_train.to(device)

        optimizer.zero_grad()
        outputs = alexnet(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        running_loss += loss.item()

        # 每100个batch打印一次损失
        if i % 100 == 99:
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 100:.8f}")
            running_loss = 0.0

    # 更新绘图指标
    train_acc_history.append(test(alexnet, trainloader))
    test_acc_history.append(test(alexnet, testloader))
    loss_history.append(epoch_loss / len(trainloader))
    print(f"Epoch {epoch + 1} loss: {epoch_loss / len(trainloader):.8f}, train accuracy: {train_acc_history[-1]:.3%}, test accuracy: {test_acc_history[-1]:.3%}")

# 清空CUDA缓存
torch.cuda.empty_cache()

# 打印CUDA内存使用情况
print(f"CUDA memory allocated: {torch.cuda.memory_allocated(device) / 1024**2:.0f} MB")

print("Finished Training")

In [None]:
# <8> 绘制训练过程中的指标
import matplotlib.pyplot as plt
from matplotlib import rcParams  # 设置全局参数，为了设置中文字体


# 定义绘制函数
def draw_plot(train_acc_history, test_acc_history, loss_history):
    plt.figure()
    plt.plot(train_acc_history, label="train accuracy")
    plt.plot(test_acc_history, label="test accuracy")
    plt.plot(loss_history, label="loss")
    plt.legend()  # 显示图例
    plt.xlabel("迭代次数")
    plt.show()


draw_plot(train_acc_history, test_acc_history, loss_history)
print(f"最终训练精度: {train_acc_history[-1]:.2%}")
print(f"最终测试精度: {test_acc_history[-1]:.2%}")

In [None]:
from PIL import Image
import torch
import torchvision.transforms as transforms
import torchvision.models as models

# 打开图像文件
image_path = "/kaggle/input/predict/chicken.jpg"
image = Image.open(image_path)
display(image)

# 图像预处理
image = transform(image)
image = image.unsqueeze(0)  # Add batch dimension

# 设置评估模式
alexnet.eval()

# 预测
with torch.no_grad():
    output = alexnet(image.to(device))

# 预测结果
_, predicted = torch.max(output, 1)
print(f"预测类别: {predicted.item()}")

In [None]:
# <3> 定义ResNet34模型
resnet34 = models.resnet34()  # 加载预训练的ResNet34模型
# 修改最后的全连接层以适应10个类别
num_ftrs = resnet34.fc.in_features
resnet34.fc = nn.Linear(num_ftrs, 10)
# 将模型移动到GPU
resnet34 = resnet34.to(device)  # 2) 将模型加载到GPU上

# torchsummary
torchsummary.summary(resnet34, (3, 224, 224))

In [None]:
# <4> 损失函数
criterion = nn.CrossEntropyLoss()

# <5> 优化器
optimizer = optim.Adam(resnet34.parameters(), lr=0.0001)

In [None]:
# <7> 训练模型
# 初始化训练过程中的指标：训练精度、测试精度、损失，用于绘图
train_acc_history, test_acc_history, loss_history = [], [], []

print("Total number of images for training: ", len(trainset))

for epoch in range(10):  # 训练10个epoch
    resnet34.train()
    epoch_loss = 0.0  # 每epoch的损失
    running_loss = 0.0  # 每64批次batch的损失

    # 从0开始计数，每次取一个batch，一共取10400/16=650次
    for i, data in enumerate(trainloader, 0):
        X_train, y_train = data

        # 3) 将训练数据加载到GPU上
        X_train, y_train = X_train.to(device), y_train.to(device)

        optimizer.zero_grad()
        outputs = resnet34(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        running_loss += loss.item()

        # 每100个batch打印一次损失
        if i % 100 == 99:
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 100:.8f}")
            running_loss = 0.0

    # 更新绘图指标
    train_acc_history.append(test(resnet34, trainloader))
    test_acc_history.append(test(resnet34, testloader))
    loss_history.append(epoch_loss / len(trainloader))
    print(
        f"Epoch {epoch + 1} loss: {epoch_loss / len(trainloader):.8f}, train accuracy: {train_acc_history[-1]:.3%}, test accuracy: {test_acc_history[-1]:.3%}"
    )

# 清空CUDA缓存
torch.cuda.empty_cache()

# 打印CUDA内存使用情况
print(f"CUDA memory allocated: {torch.cuda.memory_allocated(device) / 1024**2:.0f} MB")

print("Finished Training")