In [1]:
# import necessary libraries
import torch
import os
import shutil
import copy
import datetime
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models, datasets
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from copy import deepcopy
from torch.optim import lr_scheduler
from torchvision.datasets import ImageFolder


In [3]:
# 创建一个大的张量来模拟GPU操作
x = torch.ones((1024 * 12, 1024 * 12), dtype=torch.float32, device='mps')
print("MPS Device:", x.device)

# 使用 MPS 设备
device = torch.device("mps")
print(f"Using device: {device}")

# 加载预训练的 ResNet-18 模型
model = models.resnet18(pretrained=True)

# 修改模型的最后一层以适应新的类别数
num_classes = 200
model.fc = nn.Linear(model.fc.in_features, num_classes)

# 将模型移动到 MPS 设备
model = model.to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()

# 设置优化器，微调模型的不同部分
optimizer = optim.SGD([
    {'params': model.fc.parameters(), 'lr': 1e-2},
    {'params': model.layer4.parameters(), 'lr': 1e-3},
    {'params': model.layer3.parameters(), 'lr': 1e-4},
    {'params': model.layer2.parameters(), 'lr': 1e-5},
    {'params': model.layer1.parameters(), 'lr': 1e-6},
], momentum=0.9)


MPS Device: mps:0
Using device: mps


In [12]:
# 设置数据集路径
data_dir = '/Users/fuchenxu/Desktop/computer vision/midterm_project/task_1/CUB_200_2011'
images_dir = os.path.join(data_dir, 'images')
split_file = os.path.join(data_dir, 'train_test_split.txt')
images_file = os.path.join(data_dir, 'images.txt')

# 创建train和val目录
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'val')
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# 读取images.txt文件
image_paths = {}
with open(images_file, 'r') as f:
    for line in f.readlines():
        img_id, img_path = line.strip().split()
        image_paths[int(img_id)] = img_path

# 读取分割文件并进行数据分割
with open(split_file, 'r') as f:
    for line in f.readlines():
        img_id, is_train = line.strip().split()
        img_id = int(img_id)
        is_train = int(is_train)

        # 获取图像文件路径
        img_file = os.path.join(images_dir, image_paths[img_id])

        # 确定目标路径
        if is_train == 1:
            target_dir = train_dir
        else:
            target_dir = val_dir

        # 确保目标子目录存在
        class_name = os.path.basename(os.path.dirname(img_file))
        target_class_dir = os.path.join(target_dir, class_name)
        os.makedirs(target_class_dir, exist_ok=True)

        # 复制图像到目标目录
        shutil.copy(img_file, target_class_dir)

print("Data split completed!")



Data split completed!


In [4]:
# 1. 定义数据预处理步骤
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),  # 随机裁剪并调整为224x224大小
        transforms.RandomHorizontalFlip(),  # 随机水平翻转
        transforms.ToTensor(),  # 转换为Tensor
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # 标准化处理
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),  # 调整图像大小
        transforms.CenterCrop(224),  # 中心裁剪为224x224大小
        transforms.ToTensor(),  # 转换为Tensor
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # 标准化处理
    ]),
}

# 2. 设置数据集路径
data_dir = 'CUB_200_2011'  

# 检查train和val目录是否存在
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'val')
if not os.path.exists(train_dir):
    raise FileNotFoundError(f'Training directory not found: {train_dir}')
if not os.path.exists(val_dir):
    raise FileNotFoundError(f'Validation directory not found: {val_dir}')

# 3. 创建训练和验证的数据集
image_datasets = {
    'train': ImageFolder(root=train_dir, transform=data_transforms['train']),
    'val': ImageFolder(root=val_dir, transform=data_transforms['val'])
}

# 4. 创建数据加载器
dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=32, shuffle=True, num_workers=4),
    'val': DataLoader(image_datasets['val'], batch_size=32, shuffle=False, num_workers=4)
}

# 5. 获取数据集大小
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
print(f"Training dataset size: {dataset_sizes['train']}")
print(f"Validation dataset size: {dataset_sizes['val']}")

# 6. 获取类别名称
class_names = image_datasets['train'].classes
print(f"Class names: {class_names}")



Training dataset size: 5994
Validation dataset size: 5794
Class names: ['001.Black_footed_Albatross', '002.Laysan_Albatross', '003.Sooty_Albatross', '004.Groove_billed_Ani', '005.Crested_Auklet', '006.Least_Auklet', '007.Parakeet_Auklet', '008.Rhinoceros_Auklet', '009.Brewer_Blackbird', '010.Red_winged_Blackbird', '011.Rusty_Blackbird', '012.Yellow_headed_Blackbird', '013.Bobolink', '014.Indigo_Bunting', '015.Lazuli_Bunting', '016.Painted_Bunting', '017.Cardinal', '018.Spotted_Catbird', '019.Gray_Catbird', '020.Yellow_breasted_Chat', '021.Eastern_Towhee', '022.Chuck_will_Widow', '023.Brandt_Cormorant', '024.Red_faced_Cormorant', '025.Pelagic_Cormorant', '026.Bronzed_Cowbird', '027.Shiny_Cowbird', '028.Brown_Creeper', '029.American_Crow', '030.Fish_Crow', '031.Black_billed_Cuckoo', '032.Mangrove_Cuckoo', '033.Yellow_billed_Cuckoo', '034.Gray_crowned_Rosy_Finch', '035.Purple_Finch', '036.Northern_Flicker', '037.Acadian_Flycatcher', '038.Great_Crested_Flycatcher', '039.Least_Flycatcher', 

In [14]:
def train_model(model, criterion, optimizer, dataloaders, dataset_sizes, device, num_epochs=20):
    best_model_wts = None
    best_acc = 0.0

    # 初始化TensorBoard SummaryWriter
    writer = SummaryWriter()

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.float() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # 记录损失和准确率到TensorBoard
            writer.add_scalar(f'{phase}_Loss', epoch_loss, epoch)
            writer.add_scalar(f'{phase}_Accuracy', epoch_acc, epoch)

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

    print(f'Best val Acc: {best_acc:.4f}')

    # 关闭TensorBoard writer
    writer.close()

    # 加载最佳模型权重
    model.load_state_dict(best_model_wts)
    return model


In [8]:
!tensorboard --logdir="/Users/fuchenxu/Desktop/computer vision/midterm_project/task_1"


Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.16.2 at http://localhost:6007/ (Press CTRL+C to quit)
^C


In [11]:

# 获取当前时间并格式化
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
filename = f'best_model_{current_time}.pth'

# 保存模型权重
torch.save(model.state_dict(), filename)
print(f"Model saved as '{filename}'")


Model saved as 'best_model_20240520-093549.pth'


In [10]:
# 训练模型
num_epochs = 20
model = train_model(model, criterion, optimizer, dataloaders, dataset_sizes, device, num_epochs)

Epoch 0/19
----------
train Loss: 4.2920 Acc: 0.1253
val Loss: 2.5077 Acc: 0.3771
Epoch 1/19
----------
train Loss: 2.4599 Acc: 0.4164
val Loss: 1.7314 Acc: 0.5352
Epoch 2/19
----------
train Loss: 1.8941 Acc: 0.5255
val Loss: 1.4016 Acc: 0.6170
Epoch 3/19
----------
train Loss: 1.6167 Acc: 0.5874
val Loss: 1.2796 Acc: 0.6486
Epoch 4/19
----------
train Loss: 1.4183 Acc: 0.6428
val Loss: 1.2165 Acc: 0.6609
Epoch 5/19
----------
train Loss: 1.3027 Acc: 0.6678
val Loss: 1.1157 Acc: 0.6873
Epoch 6/19
----------
train Loss: 1.1491 Acc: 0.7032
val Loss: 1.1183 Acc: 0.6873
Epoch 7/19
----------
train Loss: 1.1370 Acc: 0.7119
val Loss: 1.0885 Acc: 0.6974
Epoch 8/19
----------
train Loss: 1.0257 Acc: 0.7426
val Loss: 1.0672 Acc: 0.7016
Epoch 9/19
----------
train Loss: 0.9851 Acc: 0.7466
val Loss: 1.0539 Acc: 0.7113
Epoch 10/19
----------
train Loss: 0.9204 Acc: 0.7629
val Loss: 1.0484 Acc: 0.7083
Epoch 11/19
----------
train Loss: 0.8982 Acc: 0.7688
val Loss: 1.0504 Acc: 0.7175
Epoch 12/19
--

In [11]:

def initialize_model(num_classes, use_pretrained=True):
    model = models.resnet18()
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    return model

def set_optimizer(model, learning_rate):
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
    return optimizer

# 训练模型
num_classes = 200
num_epochs = 20
learning_rate = 1e-2  # 使用单一学习率进行简化

model = initialize_model(num_classes=num_classes, use_pretrained=False).to(device)
optimizer = set_optimizer(model, learning_rate)
criterion = nn.CrossEntropyLoss()

In [None]:
training_from_random_initialization = train_model(model, criterion, optimizer, dataloaders, dataset_sizes, device, num_epochs=num_epochs)

In [6]:
def train_model2(model, criterion, optimizer, dataloaders, dataset_sizes, device, num_epochs=20):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    # 初始化TensorBoard SummaryWriter
    writer = SummaryWriter()

    # 设置学习率衰减
    scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # 每个阶段的训练和验证
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # 设置模型为训练模式
            else:
                model.eval()   # 设置模型为评估模式

            running_loss = 0.0
            running_corrects = 0

            # 迭代数据
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # 清零梯度
                optimizer.zero_grad()

                # 前向传播
                # track history only if in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # 反向传播+优化只在训练阶段
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # 统计
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.float() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # 记录损失和准确率到TensorBoard
            writer.add_scalar(f'{phase}_Loss', epoch_loss, epoch)
            writer.add_scalar(f'{phase}_Accuracy', epoch_acc, epoch)

            # 深拷贝模型
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        # 更新学习率
        scheduler.step()

    print(f'Best val Acc: {best_acc:.4f}')

    # 关闭TensorBoard writer
    writer.close()

    # 加载最佳模型权重
    model.load_state_dict(best_model_wts)
    return model

criterion = nn.CrossEntropyLoss()

In [7]:
# 训练模型
num_epochs = 20
model = train_model2(model, criterion, optimizer, dataloaders, dataset_sizes, device, num_epochs)

Epoch 0/19
----------
train Loss: 4.2656 Acc: 0.1376
val Loss: 2.5544 Acc: 0.3769
Epoch 1/19
----------
train Loss: 2.4822 Acc: 0.4101
val Loss: 1.7429 Acc: 0.5259
Epoch 2/19
----------
train Loss: 1.8937 Acc: 0.5232
val Loss: 1.3962 Acc: 0.6206
Epoch 3/19
----------
train Loss: 1.6329 Acc: 0.5889
val Loss: 1.2781 Acc: 0.6422
Epoch 4/19
----------
train Loss: 1.4293 Acc: 0.6400
val Loss: 1.1714 Acc: 0.6678
Epoch 5/19
----------
train Loss: 1.1833 Acc: 0.7187
val Loss: 1.0351 Acc: 0.7190
Epoch 6/19
----------
train Loss: 1.1190 Acc: 0.7389
val Loss: 1.0330 Acc: 0.7201
Epoch 7/19
----------
train Loss: 1.1579 Acc: 0.7326
val Loss: 1.0201 Acc: 0.7209
Epoch 8/19
----------
train Loss: 1.1176 Acc: 0.7372
val Loss: 1.0157 Acc: 0.7201
Epoch 9/19
----------
train Loss: 1.1017 Acc: 0.7431
val Loss: 1.0172 Acc: 0.7228
Epoch 10/19
----------
train Loss: 1.1061 Acc: 0.7471
val Loss: 1.0143 Acc: 0.7232
Epoch 11/19
----------
train Loss: 1.0790 Acc: 0.7526
val Loss: 1.0120 Acc: 0.7237
Epoch 12/19
--