In [4]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18
from torch.utils.tensorboard import SummaryWriter
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import pickle
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn.functional as F
from torchvision.transforms.functional import to_pil_image, to_tensor
import random
import gc

In [5]:
# 读取并解析二进制文件
train_path = '/Users/fuchenxu/Desktop/computer_vision/final_project/task_1/SIFAR-100/cifar-100-python/train'
test_path = '/Users/fuchenxu/Desktop/computer_vision/final_project/task_1/SIFAR-100/cifar-100-python/test'

with open(train_path, 'rb') as f:
    train_data = pickle.load(f, encoding='latin1')
with open(test_path, 'rb') as f:
    test_data = pickle.load(f, encoding='latin1')

# 检查 train_data 的内容
print(type(train_data),type(test_data))
print(len(train_data),len(test_data))
print(train_data.keys() if isinstance(train_data, dict) else "Not a dictionary")
print(test_data.keys() if isinstance(test_data, dict) else "Not a dictionary")

<class 'dict'> <class 'dict'>
5 5
dict_keys(['filenames', 'batch_label', 'fine_labels', 'coarse_labels', 'data'])
dict_keys(['filenames', 'batch_label', 'fine_labels', 'coarse_labels', 'data'])


In [6]:
# 自定义数据集类定义
class CustomBinaryDataset(Dataset):
    def __init__(self, data_dict, transform=None):
        self.filenames = data_dict['filenames']
        self.images = data_dict['data']
        self.labels = data_dict['fine_labels']
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        # 将图像数据从一维数组转换为 32x32x3 的形状
        image = image.reshape(3, 32, 32).transpose(1, 2, 0)
        image = Image.fromarray(image)

        if self.transform:
            image = self.transform(image)

        return image, label

# CutMix 数据增强函数
def cutmix(data, targets, alpha=1.0):
    indices = torch.randperm(data.size(0))
    shuffled_data = data[indices]
    shuffled_targets = targets[indices]

    lam = np.random.beta(alpha, alpha)

    bbx1, bby1, bbx2, bby2 = rand_bbox(data.size(), lam)
    data[:, :, bbx1:bbx2, bby1:bby2] = shuffled_data[:, :, bbx1:bbx2, bby1:bby2]

    targets_a, targets_b = targets, shuffled_targets
    return data, targets_a, targets_b, lam

def rand_bbox(size, lam):
    W = size[2]
    H = size[3]
    cut_rat = np.sqrt(1. - lam)
    cut_w = int(W * cut_rat)  
    cut_h = int(H * cut_rat)  

    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2


# 数据增强和标准化
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2761)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4865, 0.4409), (0.2673, 0.2564, 0.2761)),
])

# 加载训练集数据
train_dataset = CustomBinaryDataset(data_dict=train_data, transform=transform_train)
train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)

# 加载测试集数据
test_dataset = CustomBinaryDataset(data_dict=test_data, transform=transform_test)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

# 验证数据加载器是否正常工作
for images, labels in train_loader:
    print(images.shape, labels.shape)
    break
for images, labels in test_loader:
    print(images.shape, labels.shape)
    break

torch.Size([256, 3, 32, 32]) torch.Size([256])
torch.Size([256, 3, 32, 32]) torch.Size([256])


In [7]:
def train(model, train_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    total_correct = 0
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        
        # CutMix 数据增强
        images, targets_a, targets_b, lam = cutmix(images, labels, alpha=1.0)
        
        optimizer.zero_grad()
        outputs = model(images)
        
        loss = lam * criterion(outputs, targets_a) + (1 - lam) * criterion(outputs, targets_b)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total_correct += (lam * predicted.eq(targets_a).sum().item() + (1 - lam) * predicted.eq(targets_b).sum().item())
        
    avg_loss = total_loss / len(train_loader)
    accuracy = total_correct / len(train_loader.dataset)
    return avg_loss, accuracy

def test(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    total_correct = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total_correct += predicted.eq(labels).sum().item()
            
    avg_loss = total_loss / len(test_loader)
    accuracy = total_correct / len(test_loader.dataset)
    return avg_loss, accuracy

def train_and_evaluate(model, train_loader, test_loader, params):
    device = torch.device(params['device'])
    model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=params['learning_rate'])
    
    writer = SummaryWriter()
    
    for epoch in range(params['num_epochs']):
        train_loss, train_accuracy = train(model, train_loader, optimizer, criterion, device)
        test_loss, test_accuracy = test(model, test_loader, criterion, device)
        
        print(f'Epoch [{epoch+1}/{params["num_epochs"]}], Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}')
        print(f'Epoch [{epoch+1}/{params["num_epochs"]}], Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}')
        
        writer.add_scalar('Loss/train', train_loss, epoch)
        writer.add_scalar('Loss/test', test_loss, epoch)
        writer.add_scalar('Accuracy/train', train_accuracy, epoch)
        writer.add_scalar('Accuracy/test', test_accuracy, epoch)
    
    torch.save(model.state_dict(), params['model_save_path'])
    writer.close()

In [15]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(256 * 4 * 4, 1024)
        self.fc2 = nn.Linear(1024, 100)
        self.pool = nn.MaxPool2d(2, 2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(x)
        x = self.relu(self.conv2(x))
        x = self.pool(x)
        x = self.relu(self.conv3(x))
        x = self.pool(x)
        x = x.view(-1, 256 * 4 * 4)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [17]:
params_CNN = {
    'device': 'mps',
    'learning_rate': 1e-4,
    'num_epochs': 15,
    'batch_size': 256,
    'model_save_path': 'cnn_cifar100.pth'
}

In [18]:
cnn_model = SimpleCNN()
train_and_evaluate(cnn_model, train_loader, test_loader, params_CNN)


Epoch [1/15], Train Loss: 4.4342, Train Accuracy: 0.1133
Epoch [1/15], Test Loss: 4.0322, Test Accuracy: 0.2805
Epoch [2/15], Train Loss: 4.2466, Train Accuracy: 0.2024
Epoch [2/15], Test Loss: 3.7853, Test Accuracy: 0.4185
Epoch [3/15], Train Loss: 4.1604, Train Accuracy: 0.2442
Epoch [3/15], Test Loss: 3.6275, Test Accuracy: 0.4833
Epoch [4/15], Train Loss: 4.0780, Train Accuracy: 0.2896
Epoch [4/15], Test Loss: 3.5295, Test Accuracy: 0.5505
Epoch [5/15], Train Loss: 4.0391, Train Accuracy: 0.3107
Epoch [5/15], Test Loss: 3.3461, Test Accuracy: 0.6600
Epoch [6/15], Train Loss: 3.9801, Train Accuracy: 0.3407
Epoch [6/15], Test Loss: 3.2857, Test Accuracy: 0.6804
Epoch [7/15], Train Loss: 3.9650, Train Accuracy: 0.3512
Epoch [7/15], Test Loss: 3.1866, Test Accuracy: 0.7299
Epoch [8/15], Train Loss: 3.9353, Train Accuracy: 0.3705
Epoch [8/15], Test Loss: 3.2004, Test Accuracy: 0.7539
Epoch [9/15], Train Loss: 3.9089, Train Accuracy: 0.3878
Epoch [9/15], Test Loss: 3.1289, Test Accuracy:

In [8]:
class SimpleTransformer(nn.Module):
    def __init__(self):
        super(SimpleTransformer, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.transformer = nn.Transformer(d_model=64, nhead=8, num_encoder_layers=3)
        self.fc1 = nn.Linear(64 * 32 * 32, 1024)
        self.fc2 = nn.Linear(1024, 100)
        self.pool = nn.MaxPool2d(2, 2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = x.view(x.size(0), -1, 64)
        x = self.transformer(x, x)
        x = x.view(-1, 64 * 32 * 32)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [18]:
class SimpleTransformer(nn.Module):
    def __init__(self):
        super(SimpleTransformer, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)  
        self.transformer = nn.Transformer(d_model=32, nhead=4, num_encoder_layers=2)  
        self.fc1 = nn.Linear(32 * 32 * 32, 512)  
        self.fc2 = nn.Linear(512, 100)
        self.pool = nn.MaxPool2d(2, 2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = x.view(x.size(0), -1, 32)
        x = self.transformer(x, x)
        x = x.view(-1, 32 * 32 * 32)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [19]:
params_Transformer = {
    'device': 'mps',
    'learning_rate': 1e-4,
    'num_epochs': 15,
    'batch_size': 256,
    'model_save_path': 'transformer_cifar100.pth'
}

In [None]:
transformer_model = SimpleTransformer()
train_and_evaluate(transformer_model, train_loader, test_loader, params_Transformer)


In [20]:
!tensorboard --logdir="/Users/fuchenxu/Desktop/computer_vision/final_project/task_2"

TensorFlow installation not found - running with reduced feature set.
Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all
TensorBoard 2.14.0 at http://localhost:6006/ (Press CTRL+C to quit)
E0615 12:48:14.710119 6141571072 _internal.py:97] Error on request:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/final/lib/python3.8/site-packages/werkzeug/serving.py", line 363, in run_wsgi
    execute(self.server.app)
  File "/opt/anaconda3/envs/final/lib/python3.8/site-packages/werkzeug/serving.py", line 324, in execute
    application_iter = app(environ, start_response)
  File "/opt/anaconda3/envs/final/lib/python3.8/site-packages/tensorboard/backend/application.py", line 528, in __call__
    return self._app(environ, start_response)
  File "/opt/anaconda3/envs/final/lib/python3.8/site-packages/tensorboard/backend/application.py", line 569, in wrapper
    return wsgi_app(environ, start_response)
  File "/opt/anaconda3/envs/final/lib/python