In [0]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

In [3]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test':  transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.48216, 0.44653],[0.24703, 0.24349, 0.26159])
    ])
}

trainset = torchvision.datasets.CIFAR10(root='./',
                    train=True,download=True,
                    transform=data_transforms['train'])
valset=torchvision.datasets.CIFAR10(root='./',
                  train=True,download=True,
                  transform=data_transforms['val'])

indices = np.arange(50000)
np.random.shuffle(indices)
train_loader = torch.utils.data.DataLoader(trainset,batch_size=100, shuffle=False, sampler=torch.utils.data.SubsetRandomSampler(indices[:45000]))
val_loader = torch.utils.data.DataLoader(valset,batch_size=100, shuffle=False, sampler=torch.utils.data.SubsetRandomSampler(indices[45000:50000]))



Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./cifar-10-python.tar.gz to ./
Files already downloaded and verified


In [0]:
class_names = trainset.classes
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
dataloaders={'train':train_loader,'val':val_loader}
dataset_sizes = {'train':45000,'val':5000}

In [5]:
class_names

['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

In [0]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            
            print(phase,':',time.strftime("%Y--%m--%d %H:%M:%S", time.localtime(int(time.time()))))
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
#                 print('2:',time.time())
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
#                     print('3:',time.time())
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
#                     print('4:',time.time())
                    loss = criterion(outputs, labels)
#                     print('5:',time.time())
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [7]:
alex = models.alexnet(pretrained=True)
#pthfile = r'E:\KaiKeBa\基础班\Python\第六章\第四节\4-CNN(2)\models\alexnet-owt-4df8aa71.pth'
#alex.load_state_dict(torch.load(pthfile))
for param, name in zip(alex.parameters(), alex.state_dict().keys()):
    param.requires_grad = False
    
alex.classifier[6] = nn.Linear(4096, 10)

# for param, name in zip(alex.parameters(), alex.state_dict().keys()):
#     print(name,":", param.requires_grad)
# print(alex)
criterion = nn.CrossEntropyLoss()
# # Observe that all parameters are being optimized
optimizer = optim.SGD(alex.parameters(), lr=0.001, momentum=0.9)  # 随机梯度下降
# optimizer = optim.Adam(alex.parameters(), lr=3e-4)
# # Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

Downloading: "https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth" to /root/.cache/torch/checkpoints/alexnet-owt-4df8aa71.pth


HBox(children=(FloatProgress(value=0.0, max=244418560.0), HTML(value='')))




In [8]:
train_model(alex.to(device), criterion, optimizer, exp_lr_scheduler, num_epochs=10)

Epoch 0/9
----------
train : 2020--06--11 04:58:23




train Loss: 1.2690 Acc: 0.5478
val : 2020--06--11 05:00:07
val Loss: 0.7101 Acc: 0.7520

Epoch 1/9
----------
train : 2020--06--11 05:00:18
train Loss: 1.1295 Acc: 0.5967
val : 2020--06--11 05:01:59
val Loss: 0.6543 Acc: 0.7758

Epoch 2/9
----------
train : 2020--06--11 05:02:11
train Loss: 1.1001 Acc: 0.6069
val : 2020--06--11 05:03:50
val Loss: 0.6637 Acc: 0.7666

Epoch 3/9
----------
train : 2020--06--11 05:04:02
train Loss: 1.0928 Acc: 0.6094
val : 2020--06--11 05:05:42
val Loss: 0.6210 Acc: 0.7852

Epoch 4/9
----------
train : 2020--06--11 05:05:53
train Loss: 1.0745 Acc: 0.6171
val : 2020--06--11 05:07:33
val Loss: 0.6091 Acc: 0.7892

Epoch 5/9
----------
train : 2020--06--11 05:07:44
train Loss: 1.0691 Acc: 0.6184
val : 2020--06--11 05:09:23
val Loss: 0.5857 Acc: 0.7986

Epoch 6/9
----------
train : 2020--06--11 05:09:34
train Loss: 1.0382 Acc: 0.6318
val : 2020--06--11 05:11:13
val Loss: 0.5911 Acc: 0.7968

Epoch 7/9
----------
train : 2020--06--11 05:11:25
train Loss: 1.0407 A

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 